diff options
author | root <root@(null).(none)> | 2009-05-03 17:20:00 +0200 |
---|---|---|
committer | root <root@(null).(none)> | 2009-05-03 17:20:00 +0200 |
commit | e0faf22f31c48fb27b43c1825897d26e58feafc4 (patch) | |
tree | 65a09372b31e08a3a865bd0a88cd2718bafcd643 /ufs |
This is my initial working version.
There is a bug in boot in this version: subhurd sometimes cannot boot.
Diffstat (limited to 'ufs')
-rw-r--r-- | ufs/CVS/Entries | 19 | ||||
-rw-r--r-- | ufs/CVS/Repository | 1 | ||||
-rw-r--r-- | ufs/CVS/Root | 1 | ||||
-rw-r--r-- | ufs/CVS/Tag | 1 | ||||
-rw-r--r-- | ufs/ChangeLog | 2012 | ||||
-rw-r--r-- | ufs/Makefile | 32 | ||||
-rw-r--r-- | ufs/alloc.c | 1703 | ||||
-rw-r--r-- | ufs/bmap.c | 120 | ||||
-rw-r--r-- | ufs/consts.c | 33 | ||||
-rw-r--r-- | ufs/dinode.h | 137 | ||||
-rw-r--r-- | ufs/dir.c | 988 | ||||
-rw-r--r-- | ufs/dir.h | 163 | ||||
-rw-r--r-- | ufs/fs.h | 509 | ||||
-rw-r--r-- | ufs/hyper.c | 414 | ||||
-rw-r--r-- | ufs/inode.c | 703 | ||||
-rw-r--r-- | ufs/main.c | 210 | ||||
-rw-r--r-- | ufs/pager.c | 806 | ||||
-rw-r--r-- | ufs/pokeloc.c | 85 | ||||
-rw-r--r-- | ufs/sizes.c | 719 | ||||
-rw-r--r-- | ufs/subr.c | 264 | ||||
-rw-r--r-- | ufs/tables.c | 138 | ||||
-rw-r--r-- | ufs/ufs.h | 289 |
22 files changed, 9347 insertions, 0 deletions
diff --git a/ufs/CVS/Entries b/ufs/CVS/Entries new file mode 100644 index 00000000..f8342b30 --- /dev/null +++ b/ufs/CVS/Entries @@ -0,0 +1,19 @@ +/ChangeLog/1.50/Tue Nov 13 23:32:45 2007//Tzhengda-soc2008-virt-branch +/Makefile/1.53/Thu Mar 14 21:11:37 2002//Tzhengda-soc2008-virt-branch +/alloc.c/1.24/Tue Jun 11 21:41:03 2002//Tzhengda-soc2008-virt-branch +/bmap.c/1.8/Tue Apr 23 18:09:23 1996//Tzhengda-soc2008-virt-branch +/consts.c/1.9/Wed Oct 13 19:37:29 1999//Tzhengda-soc2008-virt-branch +/dinode.h/1.4/Fri Jul 15 17:36:29 1994//Tzhengda-soc2008-virt-branch +/dir.c/1.47/Fri Oct 5 10:00:44 2007//Tzhengda-soc2008-virt-branch +/dir.h/1.8/Tue Mar 14 23:26:37 2006//Tzhengda-soc2008-virt-branch +/fs.h/1.5/Fri Jul 15 17:45:34 1994//Tzhengda-soc2008-virt-branch +/hyper.c/1.36/Mon Jan 8 22:33:11 2001//Tzhengda-soc2008-virt-branch +/inode.c/1.63/Tue Nov 13 23:32:45 2007//Tzhengda-soc2008-virt-branch +/main.c/1.50/Wed May 8 09:59:52 2002//Tzhengda-soc2008-virt-branch +/pager.c/1.48/Mon Sep 13 06:35:07 1999//Tzhengda-soc2008-virt-branch +/pokeloc.c/1.6/Mon Sep 23 17:25:16 1996//Tzhengda-soc2008-virt-branch +/sizes.c/1.40/Mon Sep 13 06:35:11 1999//Tzhengda-soc2008-virt-branch +/subr.c/1.8/Tue Sep 27 01:30:00 1994//Tzhengda-soc2008-virt-branch +/tables.c/1.4/Thu Sep 1 15:39:31 1994//Tzhengda-soc2008-virt-branch +/ufs.h/1.37/Mon Oct 1 16:37:27 2001//Tzhengda-soc2008-virt-branch +D diff --git a/ufs/CVS/Repository b/ufs/CVS/Repository new file mode 100644 index 00000000..20bcdd2f --- /dev/null +++ b/ufs/CVS/Repository @@ -0,0 +1 @@ +hurd/ufs diff --git a/ufs/CVS/Root b/ufs/CVS/Root new file mode 100644 index 00000000..a10aa66d --- /dev/null +++ b/ufs/CVS/Root @@ -0,0 +1 @@ +:ext:zhengda@cvs.savannah.gnu.org:/sources/hurd diff --git a/ufs/CVS/Tag b/ufs/CVS/Tag new file mode 100644 index 00000000..7e454c6d --- /dev/null +++ b/ufs/CVS/Tag @@ -0,0 +1 @@ +Tzhengda-soc2008-virt-branch diff --git a/ufs/ChangeLog b/ufs/ChangeLog new file mode 100644 index 00000000..14fb86bd --- /dev/null +++ b/ufs/ChangeLog @@ -0,0 +1,2012 @@ +2007-11-13 Thomas Schwinge <tschwinge@gnu.org> + + * inode.c (read_disknode, write_node): Adapt to ``struct stat'' + changes. + +2007-08-16 Samuel Thibault <samuel.thibault@ens-lyon.org> + + * dir.c (diskfs_lookup_hard, diskfs_dirempty): Call + diskfs_set_node_atime instead of setting dp->dn_set_atime. + * inode.c (read_symlink_hook): Likewise. + +2006-03-15 Thomas Schwinge <tschwinge@gnu.org> + + * dir.h (DIRECT_NAMELEN): Don't use ?: as a lvalue. + +2002-10-03 Roland McGrath <roland@frob.com> + + * dir.h (MAXNAMLEN): #undef before defining. + +2002-07-31 Roland McGrath <roland@frob.com> + + * dir.c (diskfs_direnter_hard): Fix test in last change. + +2002-06-08 Roland McGrath <roland@frob.com> + + * inode.c (diskfs_cached_lookup): Use ino_t for argument. + + * dir.c (diskfs_direnter_hard): Use size_t for OLDSIZE. + Fail with EOVERFLOW when it would exceed that width. + + * alloc.c, dir.c: Use %Ld format for ino_t values. + * alloc.c (diskfs_alloc_node): Use %Ld format for blkcnt_t values. + +2002-05-08 Roland McGrath <roland@frob.com> + + * main.c (diskfs_append_args): Fix argument type. + (main): Use %z format modifier for size_t arg. + * dir.c (dirscanblock): Use %z format modifier for vm_address_t arg. + (diskfs_dirempty): int -> vm_address_t + (count_dirents): int -> size_t + (diskfs_get_directs): u_int -> size_t + +2002-03-11 Roland McGrath <roland@frob.com> + + * Makefile (ufs.static): Depend on + $(boot-store-types:%=../libstore/libstore_%.a). + +2001-11-21 Roland McGrath <roland@frob.com> + + * inode.c (read_disknode): Just always call getpid for the fsid value. + +2001-10-01 Marcus Brinkmann <marcus@gnu.org> + + * ufs.h (swab_long_long): Use LL, not lL, for constant. + Submitted by Maurizio Boriani <baux@debian.org>. + +2001-08-09 Roland McGrath <roland@frob.com> + + * inode.c (diskfs_get_translator): Fail with EFTYPE if the length + field stored on disk is unreasonable. Don't crash on ENOMEM. + Use memcpy instead of bcopy. + +2001-06-09 Mark Kettenis <kettenis@gnu.org> + + * inode.c (diskfs_set_statfs): If number of free blocks is less + than the number of reserved blocks, set the number of available + blocks to 0. + +2001-01-08 Marcus Brinkmann <marcus@gnu.org> + + * main.c (main): Use %Ld instead %ld to print store->size. + * hyper.c (get_hypermetadata): Likewise. + * inode.c (diskfs_S_file_get_storage_info): Change type of variables + start and length from off_t to store_offset_t. + +2001-01-07 Marcus Brinkmann <marcus@gnu.org> + + * dir.c: Make diskfs_dirstat_size const. + +2000-12-02 Roland McGrath <roland@frob.com> + + * inode.c (write_node): Remove assert that dn_set_mtime et al are + clear. It is ok if they are set in parallel, because the latter + setting will be carried out eventually. + +2000-11-30 Marcus Brinkmann <marcus@gnu.org> + + * dir.c (diskfs_lookup_hard): If name is too long, clear + DS before returning ENAMETOOLONG. + +2000-07-26 Mark Kettenis <kettenis@gnu.org> + + * Makefile (HURDLIBS): Reorder libs such that the threads lib + comes before the ports lib. This makes sure the functions in + libthreads properly override the stubs in libports with the new + dynamic linker semantics in glibc 2.2. + +2000-03-03 Roland McGrath <roland@baalperazim.frob.com> + + * dir.c (diskfs_get_directs): Don't allocate buffer for *DATA until + after scanning for ENTRY and possibly returning EOF. + +1999-10-13 Roland McGrath <roland@baalperazim.frob.com> + + * consts.c (diskfs_name_max): New variable. + +1999-09-13 Roland McGrath <roland@baalperazim.frob.com> + + * dir.c, sizes.c, pager.c: Reverted changes related to io_map_segment. + +1999-09-09 Roland McGrath <roland@baalperazim.frob.com> + + * Makefile (makemode): servers -> server. + (targets): Replaced with target; remove ufs.static. + (ufs.static-LDFLAGS): Variable removed. + (ufs.static, ufs): Remove deps. + +1999-09-08 Thomas Bushnell, BSG <tb@mit.edu> + + * dir.c (diskfs_get_directs): Initialize `err' to shut gcc up. + +1999-09-07 Thomas Bushnell, BSG <tb@mit.edu> + + * dir.c (diskfs_lookup_hard): Pass additional parameter to + diskfs_get_filemap. + (diskfs_dirempty): Likewise. + * sizes.c (diskfs_truncate): Likewise. + (block_extended): Likewise. + (diskfs_grow): Likewise. + * pager.c (diskfs_get_filemap): Accept additional parameter. + +1999-09-04 Thomas Bushnell, BSG <tb@mit.edu> + + * pager.c (find_address): If !ISREAD, then don't return errors for + access past NP->allocsize, and clear *ADDR and *DISKSIZE. These + can happen through harmless races against truncate. + (pager_write_page): Don't print annoying messages for writes to + unallocated disk. These can happen through harmless races against + truncate, and so we should not pester the console with them. + +1999-07-10 Roland McGrath <roland@baalperazim.frob.com> + + * ufs.h: Add #include <sys/mman.h> for munmap decl. + +1999-07-09 Thomas Bushnell, BSG <tb@mit.edu> + + * dir.c (diskfs_get_directs): Use mmap instead of vm_allocate. + * hyper.c (get_hypermetadata): Likewise. + * pager.c (pager_read_page): Likewise. + +1999-07-06 Thomas Bushnell, BSG <tb@mit.edu> + + * hyper.c (diskfs_readonly_changed): Use mprotect instead of + vm_protect. + +Mon Jul 5 20:04:58 1999 Thomas Bushnell, BSG <tb@mit.edu> + + * dir.c (diskfs_lookup_hard): Repair typo. Reported by Yamashita + TAKAO <jargon@lares.dti.ne.jp>. + +1999-07-03 Thomas Bushnell, BSG <tb@mit.edu> + + * dir.c (diskfs_lookup_hard): Use munmap instead of + vm_deallocate. + (diskfs_direnter_hard): Likewise. + (diskfs_dirremove_hard): Likewise. + (diskfs_dirrewrite_hard): Likewise. + (diskfs_dirempty): Likewise. + (diskfs_drop_dirstat): Likewise. + (diskfs_get_directs): Likewise. + * sizes.c (block_extended): Likewise. + (poke_pages): Likewise. + * hyper.c (get_hypermetadata): Likewise. + (diskfs_set_hypermetadata): Likewise. + +1999-06-29 Thomas Bushnell, BSG <tb@mit.edu> + + * hyper.c (diskfs_readonly_changed): Adjust whether the store + should permit writes too. + +1999-05-02 Roland McGrath <roland@baalperazim.frob.com> + + * main.c (main): Remove bogus uninitialized variable ERR. + +1999-01-23 Roland McGrath <roland@baalperazim.frob.com> + + * main.c (main): Use diskfs_init_main. + +1998-12-27 Roland McGrath <roland@baalperazim.frob.com> + + * inode.c (diskfs_set_statfs): Remove __ from struct members. + +1998-12-21 Mark Kettenis <kettenis@phys.uva.nl> + + * inode.c (diskfs_set_statfs): Fill in statfs members that are + used to implement statvfs. + +1998-12-27 Roland McGrath <roland@baalperazim.frob.com> + + * main.c (main): Pass ARGP_IN_ORDER flag to argp_parse because + diskfs options need it. + +1998-12-20 Roland McGrath <roland@baalperazim.frob.com> + + * alloc.c (diskfs_alloc_node): Fix printf format to silence warning. + * hyper.c (get_hypermetadata): Likewise. + +1998-09-04 Roland McGrath <roland@baalperazim.frob.com> + + * dir.c (diskfs_lookup_hard): Fix defn with `const'. + (diskfs_direnter_hard): Likewise. + (dirscanblock): Likewise. + * inode.c (diskfs_create_symlink_hook, create_symlink_hook): Likewise. + (diskfs_set_translator): Likewise. + +Wed Aug 20 14:34:24 1997 Thomas Bushnell, n/BSG <thomas@gnu.ai.mit.edu> + + * dir.c (diskfs_lookup_hard): Cope with error return from + diskfs_get_filemap. + * sizes.c (diskfs_grow): Likewise. + * dir.c (diskfs_dirempty): Cope (poorly) with error return from + diskfs_get_filemap. + * sizes.c (diskfs_truncate): Likewise. + (block_extended): Likewise. + + * pager.c (diskfs_get_filemap): If pager_create fails, return + error to caller. + +Mon Jun 30 17:38:57 1997 Thomas Bushnell, n/BSG <thomas@gnu.ai.mit.edu> + + * main.c (main): If the store cannot be made writable, then set + diskfs_hard_readonly and diskfs_readonly. + +1997-06-20 Miles Bader <miles@gnu.ai.mit.edu> + + * hyper.c (diskfs_set_hypermetadata): Adjust device addresses for + possible differences between DEV_BSIZE & device block size. + * inode.c (diskfs_S_file_get_storage_info): Likewise. + * pager.c (pager_read_page, pager_write_page, pager_unlock_page): + Likewise. + * ufs.h (log2_dev_blocks_per_bsize): New declaration. + * main.c (main): Only require device-block-size to be <= DEV_BSIZE. + Get rid of device-block-size-is-power-of-2 check. + Set LOG2_DEV_BLOCKS_PER_BSIZE. + Exit with an error if the disk is too small rather than assert failing. + (log2_dev_blocks_per_bsize): New variable. + +Thu Feb 6 01:56:27 1997 Miles Bader <miles@gnu.ai.mit.edu> + + (diskfs_S_file_getfh, diskfs_S_fsys_getfile): Functions removed. + +Tue Nov 19 18:28:26 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * inode.c (read_disknode): If SBLOCK->fs_inodefmt < FS_44INODEFMT, + set ST->st_author to st->st_uid, and NP->author_tracks_uid to true. + (diskfs_validate_author_change): New function. + +Mon Nov 18 17:10:00 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * inode.c (read_disknode): When setting ST->st_mode, Clear + S_ITRANS bits, and set S_IPTRANS if necessary. Don't set + NP->istranslated anymore. + (diskfs_set_translator): Frob S_IPTRANS bit in mode bits instead + of NP->istranslated. + (write_node): Don't write any bits in S_ITRANS to disk. + * alloc.c (ffs_alloc): Use S_IPTRANS in NP->dn_stat.st_mode + instead of NP->istranslated. + +Sat Nov 16 17:21:40 1996 Thomas Bushnell, n/BSG <thomas@gnu.ai.mit.edu> + + * inode.c (diskfs_S_fsys_getfile): Delete var `fakecred'. + diskfs_access -> fshelp_access. + * alloc.c (ffs_alloc): diskfs_isuid -> idvec_contains. + (ffs_realloccg): Likewise. + +Thu Nov 14 16:43:36 1996 Thomas Bushnell, n/BSG <thomas@gnu.ai.mit.edu> + + * inode.c (diskfs_S_file_getfh): diskfs_isuid -> idvec_contains. + (diskfs_S_fsys_getfile): Use idvecs and iousers. + +Thu Oct 24 16:07:17 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * main.c (startup_children, runtime_children): New variables. + (startup_parents, runtime_parents): Variables removed. + (startup_argp, runtime_argp): Use new *_CHILDREN variables instead of + corresponding *_PARENT ones. + +Thu Sep 19 18:02:40 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * Makefile (HURDLIBS): Add store. + +Wed Sep 18 15:30:00 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * inode.c (diskfs_S_file_get_storage_info): Narrow scope of RUN. + + * consts.c (diskfs_extra_version): New variable. + + * main.c (main): Remove CLASSES argument to store_parsed_open. + Use STORE_PARAMS variable to get result from parsing STORE_ARGP. + Don't force COMPAT_GNU on bootstrap filesystems (it's the default + anyway). + +Mon Sep 16 13:27:38 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * Makefile (ufs.static ufs): Add ../libstore/libstore.a. + + * hyper.c (zeroblock): Change type to `void *'. + (get_hypermetadata): Cast ZEROBLOCK when vm_{de,}allocating. + * ufs.h (zeroblock): Change type to `void *'. + * sizes.c (diskfs_truncate): Don't cast ZEROBLOCK to diskfs_node_rw. + (block_extended, diskfs_grow): Do cast it to offer_data. + + * main.c (main): Don't set DISKFS_USE_MACH_DEVICE (which is no longer). + + * inode.c (diskfs_S_file_get_storage_info): Coalesce adjacent + blocks when constructing RUNS. + Set *PORTS_TYPE, not *STORAGE_PORT_TYPE. + * inode.c (diskfs_S_file_getfh): Variable ERR removed. + + * sizes.c (indir_release): Use DISKFS_DISK_PAGER instead of DISK_PAGER. + * ufs.h (sync_disk_blocks): Likewise. + * pokeloc.c (sync_disk): Likewise. + * main.c (diskfs_reload_global_state): Likewise. + * pager.c (create_disk_pager, diskfs_shutdown_pager, + diskfs_sync_everything): Likewise. + * main.c <argp.h>, <hurd/store.h>: New includes. + * hyper.c, pager.c, inode.c <hurd/store.h>: New include. + (get_hypermetadata): Use %Zd for printfing size_t. + (diskfs_set_hypermetadata): Return EIO for incomplete writes. + Cast BUF when calling vm_deallocate. + + * dir.c (diskfs_direnter_hard): Initialize OLDSIZE to shut up gcc. + +Sat Sep 14 20:38:47 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * ufs.h (store, store_parsed, disk_image): New declarations. + + * pager.c (thread_function): Function removed. + (create_disk_pager): Create PAGER_BUCKET. + Use diskfs_start_disk_pager instead of disk_pager_setup. + (disk_image): New variable. + + * main.c (store, store_parsed, diskfs_disk_name): New variables. + (parse_opt): Propagate our input to the first child parser. + (diskfs_append_args): New function. + (diskfs_get_options): Function removed. + (startup_parents): Use DISKFS_STORE_STARTUP_ARGP instead of + DISKFS_STD_DEVICE_STARTUP_ARGP. + + * hyper.c (get_hypermetadata): Use DISKFS_DISK_NAME instead of + DISKFS_DEVICE_ARG. + * main.c (main): Likewise. + + * hyper.c (get_hypermetadata, diskfs_readonly_changed): Use + fields in STORE instead of DISKFS_DEVICE_* variables. + * inode.c (diskfs_S_file_get_storage_info): Likewise. + * pager.c (pager_report_extent): Likewise. + * main.c (main): Likewise. + * pager.c (pager_read_page, pager_write_page, pager_unlock_page): + Use store_{read,write} instead of diskfs_device_{read,write}_sync. + * hyper.c (diskfs_set_hypermetadata): Likewise. + * inode.c (diskfs_S_file_get_storage_info): Rewrite to use + libstore functions (still has NDADDR block limit, though). + +Thu Sep 12 16:36:19 1996 Thomas Bushnell, n/BSG <thomas@gnu.ai.mit.edu> + + * Makefile (HURDLIBS): New variable. + (ufs.static ufs): Depend on $(library_deps) instead of long list + of libraries. + +Fri Sep 6 16:00:42 1996 Thomas Bushnell, n/BSG <thomas@gnu.ai.mit.edu> + + * consts.c: Include <version.h>. + (diskfs_major_version, diskfs_minor_version, diskfs_edit_version): + Deleted variables. + (diskfs_server_version): New variable. + +Thu Aug 29 16:07:07 1996 Thomas Bushnell, n/BSG <thomas@gnu.ai.mit.edu> + + * dir.c (diskfs_lookup_hard): When setting ds->stat to EXTEND, set + ds->idx by looking at the size of the file. (IDX itself is no + longer at the end because of the change on Aug 16 1996.) + +Wed Aug 28 12:15:15 1996 Thomas Bushnell, n/BSG <thomas@gnu.ai.mit.edu> + + * dir.c (dirscanblock): Size dirents correctly when mallocing it. + (diskfs_direnter_hard): Be more careful when sizing or resizing + dirents. Correctly set to -1 all the new entries we create after + realloc call. + +Fri Aug 16 18:51:31 1996 Thomas Bushnell, n/BSG <thomas@gnu.ai.mit.edu> + + * ufs.h (struct disknode): New member `dir_idx'. + * inode.c (diskfs_cached_lookup): Initialize DN->dir_idx. + * dir.c (diskfs_lookup_hard): After successful dirscanblock, + record index where we finished in DP->dn->dir_idx. Start searches + at that index. + +Mon Aug 12 13:43:46 1996 Thomas Bushnell, n/BSG <thomas@gnu.ai.mit.edu> + + * hyper.c (diskfs_set_hypermetadata): Bother to return 0 at end of + function. + +Wed Aug 7 13:00:30 1996 Thomas Bushnell, n/BSG <thomas@gnu.ai.mit.edu> + + * inode.c (diskfs_set_statfs): Compute st->f_blocks correctly; set + bsize to be fs_fsize, not fs_bsize. + + * hyper.c (diskfs_set_hypermetadata): Return an error as + appropriate. + + * inode.c (struct ufs_fhandle): Layout filehandle more like Unixy + NFSD. + (diskfs_S_file_getfh): Bother to clear unused parts of a + file handle so that they always compare equal. + +Tue Aug 6 12:19:38 1996 Thomas Bushnell, n/BSG <thomas@gnu.ai.mit.edu> + + * inode.c: Include <fcntl.h>. + (struct ufs_fhandle): New type. + (diskfs_S_fsys_getfile, diskfs_S_file_getfh): New functions. + +Tue Jul 23 15:58:28 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * inode.c (write_node, read_disknode): `struct timespec' now uses + a field prefix of `tv_'. + +Sat Jul 6 16:14:10 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * main.c (ufs_version): Variable removed. + +Sat Jul 6 12:45:36 1996 Michael I. Bushnell, p/BSG <mib@gnu.ai.mit.edu> + + * inode.c (read_disknode): Don't set allocsize based on st->size + for kludged symlinks. + + * sizes.c (diskfs_truncate): Call record_poke after truncating a + kludged symlink. + +Wed Jul 3 13:27:04 1996 Michael I. Bushnell, p/BSG <mib@gnu.ai.mit.edu> + + * main.c: Include <argz.h>. + (startup_parents, runtime_parents): Declare const. + +Tue Jun 25 14:02:02 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * main.c (diskfs_get_options): Include `--compat=' in options. + +Mon Jun 24 16:59:12 1996 Michael I. Bushnell, p/BSG <mib@gnu.ai.mit.edu> + + * dir.c (diskfs_lookup_hard): Use diskfs_check_readonly instead of + diskfs_readonly. + (diskfs_dirempty): Likewise. + + * dir.c (diskfs_lookup_hard): Use diskfs_check_readonly instead of + diskfs_readonly. + (diskfs_dirempty): Likewise. + * inode.c (diskfs_cached_lookup): Likewise. + (read_symlink_hook): Likewise. + * sizes.c (diskfs_truncate): Call diskfs_check_readonly. + (diskfs_grow): Likewise. + * hyper.c (diskfs_set_hypermetadata): If CLEAN is not set, make + sure we clear the clean bit on disk. Always call sync_disk (with + appropriate WAIT). + (diskfs_readonly_changed): Don't do set_hypermetadata here. + (copy_sblock): Don't track clean state here. + + * pager.c (diskfs_shutdown_pager): Don't shutdown DISKPAGER ever, + just sync it instead. + +Sat Jun 22 17:45:34 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * main.c (diskfs_get_options): New function. + (options): Make const. + +Fri Jun 21 01:32:09 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * main.c (parse_opt): Handle runtime invalid selection of 4.2 mode. + Save select mode until we're done to correctly deal with external + errors at runtime. + (startup_parents, startup_argp, runtime_parents, runtime_argp): + New variables. + (main): Argp vars made global. + (argp_parents): diskfs_device_startup_argp --> + &diskfs_std_device_startup_argp. + +Sat Jun 15 13:57:27 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * main.c (options): New variable. + (parse_opt): New function. + (main): Parse ufs-specific options too. + <string.h>: New include. + +Fri May 10 09:29:03 1996 Michael I. Bushnell, p/BSG <mib@gnu.ai.mit.edu> + + * inode.c (diskfs_set_statfs): Fix one reference to old name of ST + member. + +Thu May 9 11:54:13 1996 Michael I. Bushnell, p/BSG <mib@gnu.ai.mit.edu> + + * Makefile (ufs.static ufs): s/ioserver/iohelp/g + * ufs.h: ioserver.h -> iohelp.h. + + * inode.c (diskfs_set_statfs): Use and fill in new statfs + structure. + +Mon May 6 14:23:54 1996 Michael I. Bushnell, p/BSG <mib@gnu.ai.mit.edu> + + * main.c (ufs_version): Upgrade to 0.0. + +Fri May 3 09:15:33 1996 Michael I. Bushnell, p/BSG <mib@gnu.ai.mit.edu> + + * sizes.c (block_extended): Rewrite code that moves pages + to be more efficient, and not deadlock too, using unlocked + pagein permission feature (read "hack"). Return value + now indicates whether we expect a sync. + (diskfs_grow): If a call to block_extended returns nonzero, + then sync the file before returning. + * pager.c (diskfs_get_filemap): Initialize + UPI->allow_unlocked_pagein and UPI->unlocked_pagein_length. + (unlocked_pagein_lock): New variable. + (find_address): New parameter `isread'; all callers changed. + If ISREAD and we are in the unlocked pagein region, don't + attempt to acquire NP->dn->allocptrlock. + * ufs.h (struct user_pager_info): New members + `allow_unlocked_pagein' and `unlocked_pagein_length'. + (unlocked_pagein_lock): New variable. + +Thu May 2 10:56:10 1996 Michael I. Bushnell, p/BSG <mib@gnu.ai.mit.edu> + + * sizes.c (offer_data): Offer pages at ADDR each time through the + loop, not the same page over and over. + (block_extended): When moving data, sync in-core pager both before + reading from disk and after providing data to kernel. + (diskfs_grow): Always call block_extended or offer_data before + adjusting block pointer. + +Tue Apr 30 13:38:42 1996 Michael I. Bushnell, p/BSG <mib@gnu.ai.mit.edu> + + * sizes.c (diskfs_grow): In last offer_data, don't offer a block + number as an address. + +Fri Apr 26 15:35:53 1996 Michael I. Bushnell, p/BSG <mib@gnu.ai.mit.edu> + + * Makefile (makemode): Now `servers'. + (targets): Renamed from `target'; now include ufs.static. + (ufs.static-LDFLAGS): Renamed from `LDFLAGS'. + (ufs.static): Depend on same things as `ufs'. + (include ../Makeconf): Must come before dependency information. + +Wed Apr 24 14:05:48 1996 Michael I. Bushnell, p/BSG <mib@gnu.ai.mit.edu> + + * dir.h (DIRECT_NAMLEN) [! LITTLE_ENDIAN]: Deal correctly with the + case where it was written on a little endian machine without the + extension. + (DIRECT_NAMLEN) [LITTLE_ENDIAN]: Deal with case correctly where it + was written without the extension on a big endian machine. + * dir.c (dirscanblock): Use read/write_disk_entry when reading or + writing fields from directory entries. + (diskfs_direnter_hard): Likewise. + (diskfs_dirremove_hard): Likewise. + (diskfs_dirrewrite_hard): Likewise. + (diskfs_get_directs): Likewise. + (diskfs_dirempty): Likewise. + (count_dirents): Likewise. + +Tue Apr 23 11:28:42 1996 Michael I. Bushnell, p/BSG <mib@gnu.ai.mit.edu> + + * dir.c (diskfs_dirempty): node_update -> diskfs_node_update. + + * hyper.c (swab_sblock, swab_csums): New functions. + (get_hypermetadata): If this is a swapped filesystem, set swab_disk. + Also swap csum and sblock after reading them. + (diskfs_set_hypermetadata): If swab_disk, swap the csums back before + writing them. + (copy_sblock): If swab_disk, swap the sblock before writing it. + * ufs.h (swab_disk): New variable. + (swab_short, swab_long, swab_long_long): New functions. + (read_disk_entry, write_disk_entry): New macros. + * alloc.c (ffs_realloccg): Use read/write_disk_entry when + reading/writing on-disk inode fields. + * bmap.c (fetch_indir_spec): Likewise. + * inode.c (read_disknode): Likewise. + (write_node): Likewise. + (diskfs_set_translator): Likewise. + (diskfs_get_translator): Likewise. + (diskfs_S_file_get_storage_info): Likewise. + * sizes.c (diskfs_truncate): Likewise. + (diskfs_grow): Likewise. + * pager.c (pager_unlock_page): Likewise. + * bmap.c (fetch_indir_spec): Use read/write_disk_entry when + reading/writing on-disk indirect blocks. + * sizes.c (diskfs_truncate): Likewise. + (indir_release): Likewise. + (diskfs_grow): Likewise. + * pager.c (pager_unlock_page): Likewise. + * alloc.c: Include <string.h> + (ffs_blkpref): Use read_disk_entry when reading from BAP array. + (swab_cg, read_cg, release_cg): New functions. + (ffs_fragextend, ffs_alloccg, ffs_nodealloccg, ffs_blkfree, + diskfs_free_node): Use new cg access functions. + +Thu Apr 18 14:50:30 1996 Michael I. Bushnell, p/BSG <mib@gnu.ai.mit.edu> + + * sizes.c (diskfs_grow): New variable `pagerpt'. + (offer_zeroes, block_extended): New functions. + (diskfs_grow): In initializing newly allocated data disk blocks with + zeroes, use less aggressive offer_zeroes instead of immediate + synchronous writes. After ffs_realloccg succeeds, use + block_extended to handle the magic. Get rid of old poke calls. + * alloc.c (ffs_realloccg): If we are allocating a new block, don't + actually free the old one here. + * sizes.c (diskfs_grow): New variable `pagerpt'. + (offer_zeroes, block_extended): New functions. + (diskfs_grow): In initializing newly allocated data disk blocks + with zeroes, use less aggressive offer_zeroes instead of immediate + synchronous writes. After ffs_realloccg succeeds, use + block_extended to handle the magic. Get rid of old poke calls. + +Tue Apr 16 15:20:07 1996 Michael I. Bushnell, p/BSG <mib@gnu.ai.mit.edu> + + * dir.c (diskfs_lookup_hard): Set atime appropriately, and sync + the new atime if we are running synchronously (!). + (diskfs_dirempty): Likewise. + (diskfs_direnter_hard): Set mtime appropriately. + (diskfs_dirremove_hard): Likewise. + (diskfs_dirrewrite_hard): Likewise. + + * inode.c (diskfs_write_disknode): Only do sync if WAIT is set. + +Thu Apr 4 16:39:22 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * inode.c (diskfs_cached_lookup): Intialize NP->cache_id *after* + NP exists. + +Wed Apr 3 16:03:51 1996 Michael I. Bushnell, p/BSG <mib@gnu.ai.mit.edu> + + * inode.c (diskfs_cached_lookup): Renamed from `iget'. All + callers changed. Initialize NP->cache_id. + +Fri Mar 29 16:52:31 1996 Michael I. Bushnell, p/BSG <mib@gnu.ai.mit.edu> + + * sizes.c (diskfs_truncate): Cast DI->di_shortlink to correct type + before adding a character count to it. + +Mon Mar 25 13:08:10 1996 Michael I. Bushnell, p/BSG <mib@gnu.ai.mit.edu> + + * dir.c (diskfs_null_dirstat): New function. + +Fri Mar 22 23:43:53 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * inode.c (read_symlink_hook): Only set NP's atime if !readonly. + +Wed Mar 20 14:36:31 1996 Michael I. Bushnell, p/BSG <mib@gnu.ai.mit.edu> + + * dir.c (diskfs_lookup_hard): Don't do initial or final permission + checking here. + * dir.c (diskfs_dirrewrite_hard): Renamed from diskfs_dirrewrite. + No longer call modification tracking routines. + (diskfs_dirremove_hard): Renamed from diskfs_dirremove. No longer call + modification tracking routines. + (diskfs_direnter_hard): Renamed from diskfs_direnter. No longer call + modification tracking routines. + (diskfs_lookup_hard): Renamed from diskfs_lookup. + +Mon Mar 18 19:50:41 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * main.c (main): Pass new arg to argp_parse. + +Mon Mar 18 12:33:06 1996 Michael I. Bushnell, p/BSG <mib@gnu.ai.mit.edu> + + * pager.c (diskfs_max_user_pager_prot) [add_pager_max_prot]: + (a == b) ? 1 : 0 ====> (a == b). + +Fri Feb 23 15:27:05 1996 Roland McGrath <roland@charlie-brown.gnu.ai.mit.edu> + + * hyper.c (get_hypermetadata): Use diskfs_device_arg in unclean msgs. + +Wed Feb 21 05:57:12 1996 Roland McGrath <roland@charlie-brown.gnu.ai.mit.edu> + + * hyper.c: Implement proper handling of the filesystem `clean bit'. + (ufs_clean): New variable. + (get_hypermetadata): Set it from the fs_clean flag. If not clean, + complain and force read-only. Complain when ignoring COMPAT_BSD42. + (diskfs_set_hypermetadata): Set the clean flag in the superblock + when CLEAN and fs was clean to start with. + (copy_sblock): Remove bogus clean flag frobnication. + +Fri Feb 16 17:05:36 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * main.c (main): Check error return from diskfs_init_diskfs. + +Sat Jan 6 11:50:14 1996 Roland McGrath <roland@churchy.gnu.ai.mit.edu> + + * ufs.h (diskpager, diskpagerport, disk_image): Variables removed. + Include <hurd/diskfs-pager.h> instead. + (sync_disk_blocks): Use `disk_pager' in place of `diskpager->p'. + * pager.c (diskfs_shutdown_pager, diskfs_sync_everything): Use + `disk_pager' in place of `diskpager->p'. + (create_disk_pager): Rewritten using disk_pager_setup. + * pokeloc.c (sync_disk): Use `disk_pager' in place of `diskpager->p'. + * sizes.c (indir_release): Likewise. + * main.c (diskfs_reload_global_state): Likewise. + +Thu Jan 4 19:10:11 1996 Roland McGrath <roland@churchy.gnu.ai.mit.edu> + + * main.c (main): Don't map disk image here; create_disk_pager now + does it. + + * hyper.c (get_hypermetadata, copy_sblock): Don't put + diskfs_catch_exception () inside assert, bonehead! Use + assert_perror on a variable of its result. + +Mon Jan 1 16:38:14 1996 Michael I. Bushnell, p/BSG <mib@gnu.ai.mit.edu> + + * pager.c (pager_unlock_page): When allocating block in direct + array, clear it synchronously just like we do when it goes in the + indirect array. + +Thu Nov 9 14:01:30 1995 Michael I. Bushnell, p/BSG <mib@gnu.ai.mit.edu> + + * dir.c (struct dirstat): New member `nbytes'. + (dirscanblock): If DS->type is COMPRESS, still look + for TAKE/SHRINK possibilities. Also, if it's COMPRESS, + still look to see if the current block can be compressed + with fewer byte copies. + +Sun Nov 5 02:08:38 1995 Miles Bader <miles@gnu.ai.mit.edu> + + * main.c (main): Add flags arg to diskfs_startup_diskfs call. + +Sat Nov 4 20:01:58 1995 Miles Bader <miles@gnu.ai.mit.edu> + + * inode.c (diskfs_S_file_get_storage_info): Add FLAGS argument. + +Thu Oct 19 12:50:11 1995 Miles Bader <miles@gnu.ai.mit.edu> + + * pager.c (diskfs_max_user_pager_prot): Return what we discovered, + instead of 1. + + * dir.c (diskfs_lookup, diskfs_dirempty): Give diskfs_get_filemap + a protection arg. + * sizes.c (diskfs_truncate, diskfs_grow): Ditto. + + * hyper.c (diskfs_readonly_changed): Give the 2nd arg to + vm_protect an appropiate type. + + * pager.c (diskfs_max_user_pager_prot): Stop iterating early if poss. + +Wed Oct 18 16:28:42 1995 Miles Bader <miles@gnu.ai.mit.edu> + + * ufs.h (struct user_pager_info): Add max_prot field. + * pager.c (diskfs_get_filemap): Add PROT parameter, & use it. + (diskfs_pager_users): Split out block_caching & enable_caching. + (block_caching, enable_caching): New function. + (diskfs_max_user_pager_prot): New function. + + * main.c (main): Always include VM_PROT_WRITE in max prot. + * hyper.c (diskfs_readonly_changed): Change the protection of + DISK_IMAGE to reflect the new state. Clear SBLOCK_DIRTY if readonly. + + * inode.c (read_disknode): Bother to set the allocsize field. + + * ufs.h (struct rwlock): Structure deleted. + (rwlock_init, rwlock_reader_unlock, rwlock_reader_lock, + rwlock_writer_lock, rwlock_writer_unlock): Functions deleted. + + +Tue Oct 17 14:49:43 1995 Miles Bader <miles@gnu.ai.mit.edu> + + * inode.c (diskfs_node_reload): New function. + (iget): Move allocsize setting into read_disknode. + * pager.c (flush_node_pager): New function. + * ufs.h (zeroblock, sblock, csum): Declare extern. + (flush_node_pager, flush_pokes): New declarations. + * pokeloc.c (flush_pokes): New function. + * hyper.c (diskfs_readonly_changed): New function. + (get_hypermetadata): Move compat_mode futzing & disk size + validation here from main. + (zeroblock, sblock, csum): Define (were common). + (get_hypermetadata): Only allocate SBLOCK if not already done. + Deallocate any old ZEROBLOCK and CSUM storage. + (diskfs_readonly_changed): New function. + * main.c (main): Move stuff into get_hypermetadata. + Writable init code moved to diskfs_readonly_changed. + (diskfs_reload_global_state): New function. + +Fri Oct 13 15:03:37 1995 Miles Bader <miles@gnu.ai.mit.edu> + + * main.c (main): Use new handy diskfs routines and get rid of + tons of junk. Main should be almost all ufs-specific now. + (USAGE, usage, SHORT_OPTS, long_opts, parse_opt, trans_parse_arg): RIP. + (printf_lock): Initialize. + +Thu Oct 12 18:48:04 1995 Miles Bader <miles@gnu.ai.mit.edu> + + * pager.c (pager_unlock_page, pager_write_page, pager_read_page): + Use diskfs_device_{read,write}_sync instead of dev_{read,write}_sync. + * hyper.c (diskfs_set_hypermetadata): Ditto. + * sizes.c (diskfs_grow): Ditto. + * pager.c (pager_report_extent): Calculate the pager size. + * ufs.h (dev_read_sync, dev_write_sync, dev_write, diskpagersize): + Decls removed. + + * Makefile (SRCS): Remove devio.c. + * ufs.h (ufs_device, ufs_device_name): Variables removed. + * inode.c (diskfs_S_file_get_storage_info): Use DISKFS_DEVICE + instead of UFS_DEVICE, and DISKFS_DEVICE_NAME instead of + UFS_DEVICE_NAME. + +Sat Oct 7 20:47:56 1995 Miles Bader <miles@gnu.ai.mit.edu> + + * main.c (diskfs_init_completed): Function deleted (now in libdiskfs). + (thread_cancel): Function deleted. + +Fri Oct 6 17:30:23 1995 Miles Bader <miles@gnu.ai.mit.edu> + + * inode.c (diskfs_S_file_get_storage_info): Change type of + ADDRESSES to off_t **, and add the BLOCK_SIZE parameter. + +Wed Oct 4 17:21:33 1995 Miles Bader <miles@gnu.ai.mit.edu> + + * inode.c (diskfs_set_statfs): fsys_stb_bsize -> fsys_stb_iosize. + fsys_stb_fsize -> fsys_stb_bsize. + + * main.c (parse_opt): Rearrange slightly. + +Tue Sep 26 11:54:35 1995 Michael I. Bushnell, p/BSG <mib@gnu.ai.mit.edu> + + * inode.c: Include <netinet/in.h>. + (diskfs_S_file_get_storage_info): New function. + * main.c (main): Delete var `devname'. Use `ufs_device_name' + throughout instead. + * ufs.h (ufs_device_name): New var. + +Fri Sep 22 13:22:42 1995 Roland McGrath <roland@churchy.gnu.ai.mit.edu> + + * hyper.c (get_hypermetadata): Use %Zd format for result of sizeof. + +Tue Sep 19 13:41:46 1995 Miles Bader <miles@churchy.gnu.ai.mit.edu> + + * Makefile (LDFLAGS): New variable. + +Wed Sep 13 12:30:23 1995 Michael I. Bushnell, p/BSG <mib@duality.gnu.ai.mit.edu> + + * dir.c (diskfs_lookup): Don't attempt to lock NP if NPP is not + set. Don't even set NP if NPP is not set; use INUM as "lookup + succeeded flag" instead. Lookups for REMOVE and RENAME now *must* + set NPP. + +Wed Sep 6 11:01:50 1995 Miles Bader <miles@churchy.gnu.ai.mit.edu> + + * pager.c (diskfs_pager_users): Ignore the disk pager when seeing + if there are any active pagers. + +Mon Aug 28 17:07:36 1995 Roland McGrath <roland@churchy.gnu.ai.mit.edu> + + * Makefile (ufs): Depend on ../libshouldbeinlibc/libshouldbeinlibc.a. + +Fri Aug 25 17:14:09 1995 Michael I. Bushnell, p/BSG <mib@duality.gnu.ai.mit.edu> + + * sizes.c (diskfs_truncate): When freeing direct blocks mentioned + in a single indirect block, or single indirect blocks mentioned in + a double, only call the free routine (ffs_blkfree or + indir_release, respectively) if the block is actually allocated. + +Wed Aug 23 12:24:07 1995 Miles Bader <miles@churchy.gnu.ai.mit.edu> + + * Makefile (ufs): Add explicit dependencies. + (HURDLIBS, LDFLAGS, REMHDRS): Removed. + Rules associated with ../lib removed. + +Fri Jul 21 17:48:12 1995 Michael I Bushnell <mib@geech.gnu.ai.mit.edu> + + * pager.c (diskfs_get_filemap): Drop initial reference created by + pager_create. + + * pager.c (diskfs_get_filemap): Avoid race with simultaneous + termination by looping until we win. + (pager_clear_user_data): Only clear UPI->np->dn->fileinfo if it + still points to us. + +Mon Jul 17 14:35:25 1995 Michael I Bushnell <mib@geech.gnu.ai.mit.edu> + + * pager.c (thread_function): Don't have any global timeout here; + we don't use it anyhow. + +Thu Jul 6 15:42:52 1995 Michael I Bushnell <mib@duality.gnu.ai.mit.edu> + + * Makefile: Removed dependencies that are now automatically + generated. + +Mon Jun 26 20:17:42 1995 Michael I Bushnell <mib@duality.gnu.ai.mit.edu> + + * pager.c: Include <unistd.h>. + (diskfs_pager_users): New function. + +Thu Jun 22 11:41:04 1995 Michael I Bushnell <mib@duality.gnu.ai.mit.edu> + + * pager.c (thread_function): Move thread_function to be non-local, + of course, because it needs to live even after create_disk_pager + returns. + + * main.c (thread_cancel): New function (HACK). + + * Makefile (HURDLIBS): Add libihash. + + * main.c (main): Have main thread exit when done instead of + calling a diskfs function. + +Wed Jun 21 12:20:01 1995 Michael I Bushnell <mib@duality.gnu.ai.mit.edu> + + * ufs.h (user_pager_info): Removed members next and prevp. + * pager.c (pager_clear_user_data): Don't maintain pager linked + list. + (diskfs_get_filemap): Don't maintain pager linked list. + (pager_dropweak): New function. + (pager_traverse): Delete function. + (diskfs_shutdown_pager): Use ports_bucket_iterate instead of + pager_traverse. + (diskfs_sync_everything): Likewise. + + * pager.c (pager_bucket): New variable. + (create_disk_pager): Provide pager_bucket in call to pager_create. + (diskfs_get_filemap): Likewise. + (diskfs_file_update): Use ports reference calls directly instead + of pager wrappers. + (drop_pager_softrefs): Likewise. + (allow_pager_softrefs): Likewise. + (pager_traverse): Likewise. + (create_disk_pager): Initialize pager_bucket here and fork off + service thread for pager ports. + + * sizes.c (diskfs_truncate): Likewise. + + * dir.c (diskfs_lookup): Provide initialization for BUFLEN. + (diskfs_direnter): Move assignment out of if test. + +Tue Jun 20 11:48:06 1995 Michael I Bushnell <mib@duality.gnu.ai.mit.edu> + + * sizes.c (diskfs_grow): Provide initialization of POKE_OFF. + * alloc.c (ffs_realloccg): Remove assignment from if tests. + * sizes.c (diskfs_truncate): Likewise. + * bmap.c (fetch_indir_spec): Likewise. + +Mon Jun 19 21:17:21 1995 Michael I Bushnell <mib@duality.gnu.ai.mit.edu> + + * inode.c (diskfs_node_iterate): New function. + (write_all_disknodes): Use it. + +Wed Jun 14 16:18:55 1995 Michael I Bushnell <mib@duality.gnu.ai.mit.edu> + + * inode.c (diskfs_get_translator): Conform to new memory usage + semantic. + +Sat May 20 00:17:30 1995 Miles Bader <miles@churchy.gnu.ai.mit.edu> + + * main.c (trans_parse_args): Use options_parse & + diskfs_standard_startup_options to parse our translator options. + (usage): New function. + (parse_opt): New function. + + * Makefile (CPPFLAGS): Add -I../lib, to get include lib include files, + and $(CPPFLAGS-$(notdir $<)) to get file-specific cpp options. + Add a vpath for %.c to ../lib, so we can use source files from there. + +Mon May 15 13:14:48 1995 Michael I Bushnell <mib@duality.gnu.ai.mit.edu> + + * pager.c (pager_clear_user_data): Doc fix. + +Sat May 13 05:04:11 1995 Roland McGrath <roland@churchy.gnu.ai.mit.edu> + + * Makefile (OBJS): Remove exec_server_image.o. + (exec_server_image.o): Rule removed. + +Mon May 8 08:43:43 1995 Miles Bader <miles@churchy.gnu.ai.mit.edu> + + * dir.c (diskfs_lookup): When looping back to try_again: because + we're looking up "..", be sure and trash the mapping we made of + the directory's pager -- otherwise the reference to the pager + never gets dropped and we can never free the node. + + * dir.c (diskfs_lookup): ds->type was being compared to LOOKING, which + value it can never have. Compare ds->stat against LOOKING instead. + + * pager.c (pager_clear_user_data): Don't die when called on the + disk pager. + + * inode.c (write_all_disknodes): Fix typo `alloc' --> `alloca'. + +Tue May 2 11:59:09 1995 Michael I Bushnell <mib@duality.gnu.ai.mit.edu> + + * pager.c (pager_clear_user_data): Acquire pagerlistlock around + modifications to UPI->next/prevp list structure. + +Fri Apr 28 19:02:05 1995 Michael I Bushnell <mib@geech.gnu.ai.mit.edu> + + * inode.c (write_all_disknodes): We have to really lock the nodes + around the calls to diskfs_set_node_times and write_node; this in + turn forces us to have real refereces. + +Thu Apr 13 16:36:57 1995 Miles Bader <miles@churchy.gnu.ai.mit.edu> + + * main.c (main): Don't abort if a std file descriptor is already open. + +Tue Apr 4 20:08:25 1995 Michael I Bushnell <mib@geech.gnu.ai.mit.edu> + + * inode.c (diskfs_set_translator): When freeing passive + translator, account for blocks freed in NP->dn_stat.st_blocks. + +Fri Mar 31 13:43:27 1995 Michael I Bushnell <mib@geech.gnu.ai.mit.edu> + + * sizes.c (diskfs_truncate): Don't acquire writer lock on + NP->dn->allocptrlock until after forcing delayed copies through; + otherwise the pageins will deadlock attempting to get a reader + lock to service them. This is safe, because we only need + NP->allocsize here, and that can't change as long as we hold + NP->lock. + +Mon Mar 20 13:58:44 1995 Michael I Bushnell <mib@duality.gnu.ai.mit.edu> + + * consts.c (diskfs_synchronous): New variable. + +Fri Mar 17 14:31:04 1995 Michael I Bushnell <mib@duality.gnu.ai.mit.edu> + + * alloc.c (ffs_clusteracct): Make static. + (alloc_sync): New function. + (ffs_alloc): Call alloc_sync. + (ffs_realloccg): Likewise. + (diskfs_alloc_node): Likewise. + (ffs_blkfree): Likewise. + (diskfs_free_node): Likewise. + +Sat Jan 28 14:59:26 1995 Roland McGrath <roland@churchy.gnu.ai.mit.edu> + + * Makefile (OBJS): Remove reference to libc's devstream.o. + +Fri Nov 11 11:45:38 1994 Michael I Bushnell <mib@churchy.gnu.ai.mit.edu> + + * hyper.c (diskfs_set_hypermetadata): Always use dev_write_sync to + avoid device_write bug that says you can't modify the buffer until + device_write returns. Also remember to deallocate BUF. + +Thu Nov 10 13:27:09 1994 Michael I Bushnell <mib@churchy.gnu.ai.mit.edu> + + * main.c (main): Issue decent prompt. + + * hyper.c (diskfs_set_hypermetadata): Copy CSUM into a + page-aligned page-sized buffer for disk write to avoid inane + kernel bug. + +Wed Nov 9 05:43:14 1994 Michael I Bushnell <mib@geech.gnu.ai.mit.edu> + + * main.c (main): Behave more reasonably if we can't open DEVNAME. + +Tue Nov 8 00:03:20 1994 Roland McGrath <roland@churchy.gnu.ai.mit.edu> + + * pager.c (pager_write_page): Use %p for printing PAGER. + + * ufs.h: Declare copy_sblock. + +Wed Nov 2 16:06:10 1994 Michael I Bushnell <mib@geech.gnu.ai.mit.edu> + + * hyper.c (copy_sblock): Don't copy csum here. + (diskfs_set_hypermetadata): Write csum directly to disk here. + +Thu Oct 27 20:58:08 1994 Michael I Bushnell <mib@churchy.gnu.ai.mit.edu> + + * dir.c (diskfs_lookup): diskfs_get_filemap returns a send right, + so don't create an additional one here. + (diskfs_dirempty): Likewise. + * sizes.c (diskfs_truncate): Likewise. + (diskfs_grow): Likewise. + +Tue Oct 25 12:49:41 1994 Michael I Bushnell <mib@churchy.gnu.ai.mit.edu> + + * hyper.c (copy_sblock): Call record_poke for csum and superblock + after modifying them. + + * pager.c (diskfs_shutdown_pager): Call copy_sblock. + (diskfs_sync_everything): Likewise. + + * alloc.c (ffs_fragextend): Call record_poke for CG after + modifying it. Also set CSUM_DIRTY and SBLOCK_DIRTY. + (ffs_alloccg): Likewise. + (ffs_alloccgblk): Likewise. + (ffs_nodealloccg): Likewise. + (ffs_blkfree): Likewise. + (diskfs_free_node): Likewise. + +Fri Oct 7 01:32:56 1994 Roland McGrath <roland@churchy.gnu.ai.mit.edu> + + * main.c (diskfs_init_completed): Don't call _hurd_proc_init. + (saved_argv): Variable removed. + (main): Don't set saved_argv. Pass ARGV to diskfs_start_bootstrap. + +Wed Oct 5 22:18:46 1994 Michael I Bushnell <mib@churchy.gnu.ai.mit.edu> + + * inode.c (read_disknode): If we are the bootstrap filesystem, + then getpid changes once proc starts up. So only call getpid + once, thus not allowing st_dev values to mysteriously change. + +Wed Oct 5 12:56:53 1994 Michael I Bushnell <mib@geech.gnu.ai.mit.edu> + + * alloc.c (diskfs_alloc_node): Abort if free inode has + translator attached. + +Tue Oct 4 18:33:35 1994 Michael I Bushnell <mib@churchy.gnu.ai.mit.edu> + + * pager.c (pager_unlock_page): Call diskfs_catch_exception. + +Tue Oct 4 00:16:04 1994 Michael I Bushnell <mib@geech.gnu.ai.mit.edu> + + * inode.c (diskfs_lost_hardrefs): Comment out body. + * ufs.h (node2pagelock): New variable. + * pager.c (node2pagelock): Initialize. + (diskfs_get_filemap): Don't let node hold a reference to the pager. + (pager_clear_user_data): Acquire node2pagelock and clear + the node's reference to the pager. + (diskfs_file_update): Hold node2pagelock for reference + of NP->dn->fileinfo. + (drop_pager_softrefs): Likewise. + (allow_pager_softrefs): Likewise. + (diskfs_get_filemap): Likewise. + * sizes.c (diskfs_truncate): Likewise. + + * Makefile (SRCS): Added pokeloc.c. + +Mon Oct 3 15:03:38 1994 Michael I Bushnell <mib@geech.gnu.ai.mit.edu> + + * sizes.c (diskfs_truncate): Rewritten. + + * bmap.c (fetch_indir_spec): Initialize OFFSET values to -2, + meaning that the entry is not needed. If LBN is negative, + then don't set values for the data block. + + * inode.c (write_node): Call record_poke after writing + dinode. + (create_symlink_hook): Likewise. + (diskfs_set_translator): Likewise. + * pager.c (pager_unlock_page): Likewise. + * sizes.c (diskfs_truncate): Likewise. + * pager.c (pager_unlock_page): Call record_poke after writing + indirect block. + * sizes.c (diskfs_grow): Likewise. + (diskfs_grow): Likewise. + * pager.c (diskfs_sync_everything) [sync_one]: If this is the + disk pager, call sync_disk instead. + * pokeloc.c: New file. + +Fri Sep 30 11:25:36 1994 Michael I Bushnell <mib@churchy.gnu.ai.mit.edu> + + * dir.h: Delete DT_* definitions; they are now in <dirent.h>. + * dir.c (diskfs_get_directs): Set USERP->d_type as DT_UNKNOWN. + When the bugs in the type fields are fixed (dealing with + multiple links and mode changes) then this can actually return + the value. + +Thu Sep 29 17:16:58 1994 Roland McGrath <roland@churchy.gnu.ai.mit.edu> + + * main.c (main): Test getpid()>0 to decide we are a normal + translator instead of the boot fs. Fetch bootstrap port after + possibly calling diskfs_parse_bootargs, not before. + +Tue Sep 27 15:24:58 1994 Michael I Bushnell <mib@churchy.gnu.ai.mit.edu> + + * sizes.c (diskfs_grow) [computation of newallocsize]: Last block + number is one less than the total number of blocks. + +Tue Sep 27 11:58:44 1994 Michael I Bushnell <mib@geech.gnu.ai.mit.edu> + + * bmap.c (fetch_indir_spec): Single indirect block pointer is + in the INDIR_SINGLE slot, not the INDIR_DOUBLE slot. + +Mon Sep 26 20:47:30 1994 Michael I Bushnell <mib@geech.gnu.ai.mit.edu> + + * Makefile (SRCS): Added bmap.c. + + * main.c (main): Don't call pager_init. + + * inode.c (diskfs_get_translator): Repair to read translator + correctly. + + * sizes.c (diskfs_grow): Compute block numbers in a more clean + (and confidently correct) fashion. + (diskfs_truncate): Set NP->allocsize from a properly rounded + value. + +Mon Sep 26 12:50:38 1994 Michael I Bushnell <mib@churchy.gnu.ai.mit.edu> + + * inode.c (diskfs_lost_hardrefs): "Know" that a pager starts + with a portinfo; we don't actually have access to the pager + struct here. + +Fri Sep 23 14:21:55 1994 Michael I Bushnell <mib@churchy.gnu.ai.mit.edu> + + [ Continuing yesterday's changes. ] + * ufs.h (struct dirty_indir): New type. + (struct disknode): New member `dirty'. + * inode.c (iget): Initialize DN->dirty. + * bmap.c (mark_indir_dirty): New function. + * pager.c (pager_unlock_page): Call mark_indir_dirty before + writing into indirect blocks. + (diskfs_file_update): Sync indirect blocks here. + (pager_traverse): Simplify; do FILE_DATA and diskpager. + (pager_init): Removed function. + (create_disk_pager): New function. + * sizes.c: Completely rewritten. + * main.c (main): Spawn first thread sooner so we can + map and look at the disk image. + * hyper.c (get_hypermetadata): Moved firewall asserts + here from pager_init. + +Thu Sep 22 11:28:46 1994 Michael I Bushnell <mib@churchy.gnu.ai.mit.edu> + + [This long series of changes deletes the DINODE, CG, SINDIR, + and DINDIR pagers and adds a new pager type DISK.] + * ufs.h (struct disknode) Removed DINLOCK, SINLOCK, and + SININFO members. New member ALLOCPTRLOCK renamed from DATALOCK. + Removed SINLOC, DINLOC, SINLOCLEN, and DINLOCLEN. + (struct user_pager_info) [enum pager_type]: Removed types + DINODE, CG, SINDIR and DINDIR; added type DISK. + (dinpager, dinodepager, cgpager): Deleted vars. + (diskpager): New var. + (dinmaplock, sinmaplock, pagernplock): Deleted vars. + (sblock_dirty, csum_dirty, cgs, dinodes): Deleted vars. + (fsaddr): New macro. + (dino, indir_block, cg_locate): New inline functions. + (sync_disk_blocks, sync_dinode): New inline functions. + (struct iblock_spec): New type. + * pager.c (dinport, dinodeport, cgport, sinlist): Deleted vars. + (filepagerlist): Renamed from filelist. + (pagernplock): Deleted variable. + (find_address): Removed switch; support only DISK and FILE_DATA. + (pager_report_extent): Likewise. + (pager_unlock_page): Removed switch. Return without comment for + DISK; allocate indirect blocks as necessary right here for + FILE_DATA. + (sin_map, sin_remap, sin_unmap, din_map, din_unmap): Deleted + functions. + (indir_alloc, sync_dinode): Deleted functions. + (enqueue_pager, dequeue_pager): Deleted functions. + (diskfs_file_update): No longer lock pagernplock; nothing + to do with sininfo. + (drop_pager_softrefs): Likewise. + (allow_pager_softrefs): Likewise. + (diskfs_get_filemap): Put pager on filepagerlist right here + instead of through pager_enqueue. + (pager_clear_user_data): Likewise, mutatis mutandis. + * main.c (main): Call create_disk_pager and then map the + entire disk into disk_image. + * hyper.c (get_hypermetadata): Use bcopy instead of dev_read_sync. + (diskfs_set_hypermetadata): NOP out function. + (copy_sblock): New function, substance of code is from old + diskfs_set_hypermetadata. + * inode.c (iget): Don't initialize deleted disknode fields. + (diskfs_node_norefs): Don't verify that deleted disknode + fields are not set. + (read_disknode): Get dinode from DINO, not DINODES array. + (write_node): Likewise. + (create_symlink_hook): Likewise. + (read_symlink_hook): Likewise. + (diskfs_set_translator): Likewise. + (diskfs_get_translator): Likewise. + (diskfs_node_translated): Likewise. + * alloc.c (ffs_realloccg): Likewise. + (ffs_fragextend): Use cg_locate instead of cgs array. + (ffs_alloccg): Likewise. + (ffs_nodealloccg): Likewise. + (ffs_blkfree): Likewise. + (diskfs_free_node): Likewise. + * inode.c (diskfs_set_translator): Use bcopy and sync_disk_blocks + instead of dev_write_sync. + (diskfs_get_translator): Likewise, mutatis mutandis. + (read_disknode): Initialize NP->istranslated. + (diskfs_set_translator): Set/clear NP->istranslated as appropriate. + (diskfs_node_translated): Removed function. + * bmap.c: New file. + + [This improves the RWLOCK mechanism and makes it more + orthogonal. It should probably be moved into a library.] + * ufs.h (struct rwlock): Added MASTER and WAKEUP members. + (struct disknode): Removed RWLOCK_MASTER and RWLOCK_WAKEUP + fields. + (rwlock_reader_lock): Ommitted arg DN; use new MASTER and WAKEUP + members inside LOCK instead. + (rwlock_writer_lock): Likewise. + (rwlock_reader_unlock): Likewise. + (rwlock_init): Initialize new MASTER and WAKEUP fields. + * inode.c (iget): Don't deal with RWLOCK_MASTER and RWLOCK_WAKEUP. + * pager.c (find_address): Deleted arg DNP. Only pass one + arg to rwlock functions. + (pager_read_page): Deleted var DN; only pass one arg to rwlock + functions. + (pager_write_page): Likewise. + +Wed Sep 21 00:26:25 1994 Michael I Bushnell <mib@churchy.gnu.ai.mit.edu> + + * pager.c (allow_pager_softrefs): Unlock PAGERNPLOCK when + we're done with it. + (sin_map): Hold PAGERNPLOCK all the way until we're done + with the sininfo pointer. + (pagernplock): No longer static. + * ufs.h (pagernplock): Declare here. + + * sizes.c (diskfs_grow): Don't call diskfs_file_update here. + This was done to prevent too much dirty data from accumulating + and then overwhelming the pager later. But that's really the + pager's responsibility. + + * ufs.h (struct disknode): New members `dinloclen' and `sinloclen'. + * inode.c (iget): Initialize DN->dinloclen and DN->sinloclen. + (diskfs_node_norefs): Verify that DN->dinloclen and DN->sinloclen + are both zero. + * pager.c (find_address) [SINDIR]: Verify that reference is + within bounds of NP->dn->dinloc. + (pager_unlock_page) [SINDIR]: Likewise. + (din_map): Set NP->dn->dinloclen. + (din_unmap): Clear NP->dn->dinloclen. + (find_address) [FILE_DATA]: Verify that reference is within + bounds of NP->dn->sinloc. + (pager_unlock_page) [FILE_DATE]: Likewise. + (sin_map): Set NP->dn->sinloclen. + (sin_remap): Reset NP->dn->sinloclen. + (sin_unmap): Clean NP->dn->sinloclen. + + * pager.c (pager_write_page): Flush stdout after printf. + (pager_unlock_page) [FILE_DATA]: Likewise. + + * sizes.c (diskfs_truncate): In all references to sinloc and + dinloc arrays, verify that references are within allocated bounds. + (diskfs_grow): Likewise. + (sindir_drop): Likewise. + + * pager.c: Create new mapping with extent NEWSIZE, not SIZE (which + was the old size of the mapping). + +Tue Sep 20 15:51:35 1994 Michael I Bushnell <mib@churchy.gnu.ai.mit.edu> + + * pager.c (pager_report_extent) [SINDIR]: Remove erroneous extra + division by block size. + (sin_remap): Likewise. + +Mon Sep 19 17:34:11 1994 Michael I Bushnell <mib@churchy.gnu.ai.mit.edu> + + * inode.c (create_symlink_hook): Write assert test correctly. + + * dir.c (diskfs_direnter) [EXTEND]: Reference file size only + *once*; don't rely on the behavior if diskfs_grow vis a vis + file size. + +Fri Sep 16 10:29:42 1994 Michael I Bushnell <mib@churchy.gnu.ai.mit.edu> + + * dir.c (dirscanblock): Compute offset correctly for mangled + entry notice. + + * dir.c (diskfs_direnter) [EXTEND]: Reference file size only + once before calling diskfs_grow in case diskfs_grow actually + increases the size. + + * inode.c (diskfs_set_statfs): Set fsid from getpid. + (read_disknode): Likewise. + + * dir.h (struct directory_entry): Renamed from struct direct. + * dir.c: All uses of struct direct changed to use + struct directory_entry. + (diskfs_get_directs): New var `userp'. Copy from *ENTRYP into + it (set at DATAP) more cleanly. + +Mon Sep 12 11:30:48 1994 Michael I Bushnell <mib@churchy.gnu.ai.mit.edu> + + * hyper.c (diskfs_set_hypermetadata): Don't frob clean and dirty + bits if we are readonly. + +Sat Sep 10 11:41:06 1994 Roland McGrath <roland@churchy.gnu.ai.mit.edu> + + * main.c (main): When started up as a passive translator, + open fds 0, 1, and 2 on /dev/console for debugging messages. + Call diskfs_init_diskfs with no args; after warp_root, call + diskfs_startup_diskfs on BOOTSTRAP. Compare BOOTSTRAP to + MACH_PORT_NULL instead of zero. + +Fri Sep 9 13:02:33 1994 Michael I Bushnell <mib@churchy.gnu.ai.mit.edu> + + * main.c (trans_parse_args): Fix and enable. + +Tue Sep 6 11:29:55 1994 Michael I Bushnell <mib@churchy.gnu.ai.mit.edu> + + * inode.c (iget): Remove old assert test that checked for bad + inode block allocations. + +Thu Sep 1 11:39:12 1994 Michael I Bushnell <mib@geech.gnu.ai.mit.edu> + + * tables.c: Don't include "ufs.h"; include <sys/types.h>. Then + this file can be used unmodified by fsck. + +Tue Aug 30 13:36:37 1994 Michael I Bushnell <mib@geech.gnu.ai.mit.edu> + + * inode.c (diskfs_set_translator): ffs_blkfree doesn't have + a return value. + +Mon Aug 29 12:49:17 1994 Michael I Bushnell <mib@geech.gnu.ai.mit.edu> + + * inode.c (diskfs_set_translator): If NAMELEN is zero, then + make the node have no translator. + +Fri Aug 26 12:28:20 1994 Michael I Bushnell <mib@geech.gnu.ai.mit.edu> + + * inode.c (read_disknode): 4.4 fsck sometimes sets the author + field to -1 to mean "ignore old uid location"; take that to mean + "author == uid". + (diskfs_set_translator): If we are allocating a new block for + the translator, then account for it in st_blocks. + +Thu Aug 18 12:41:12 1994 Michael I Bushnell <mib@geech.gnu.ai.mit.edu> + + * Makefile (HURDLIBS): Use short version. + + * alloc.c (diskfs_alloc_node): Bother to set *NPP before + returning. + +Tue Aug 16 10:48:04 1994 Michael I Bushnell <mib@geech.gnu.ai.mit.edu> + + * Makefile (LDFLAGS): New variable. + +Fri Aug 5 15:51:09 1994 Michael I Bushnell <mib@churchy.gnu.ai.mit.edu> + + * dir.c (diskfs_direnter) [EXTEND]: Crash if the entry won't + fit in the new block. + (diskfs_lookup): Return ENAMETOOLONG if the name is bigger than + MAXNAMLEN. + + * dir.c (diskfs_get_directs): Set USERD->d_reclen correctly. + +Fri Jul 22 15:12:35 1994 Michael I Bushnell <mib@geech.gnu.ai.mit.edu> + + * Makefile: Rewritten in accord with new scheme. + +Wed Jul 20 13:28:38 1994 Michael I Bushnell <mib@geech.gnu.ai.mit.edu> + + * main.c (main): Don't set diskfs_dotdot_file. + +Tue Jul 19 21:51:54 1994 Roland McGrath (roland@churchy.gnu.ai.mit.edu) + + * ufs.h: Removed defns of u_quad_t, quad_t; now in <sys/types.h>. + Removed defn of struct timespec; now in <sys/time.h>. + +Tue Jul 19 12:47:31 1994 Michael I Bushnell (mib@churchy.gnu.ai.mit.edu) + + * main.c (main): Deleted var `diskfs_dotdot_file'. + (trans_parse_args): Don't set diskfs_dotdot_file; don't expect + dotdot from fsys_getroot. + + * Makefile (LDFLAGS): Moved to rule for `ufs' and commented out. + (ufs): Don't use variable $(link) anymore. + +Mon Jul 18 14:55:17 1994 Michael I Bushnell (mib@churchy.gnu.ai.mit.edu) + + * dir.c (diskfs_get_directs): Return data to user in old format. + Add new code for new format, maintaining compatibility correctly, + but comment it out until the library is ready. + + * hyper.c (diskfs_set_hypermetadata): If we presumed to + set new values of fs_maxfilesize, fs_qbmask, and fs_qfmask, + then restore the originals before writing out the superblock. + + * pager.c (diskfs_get_filemap): Test should be S_ISLNK, not + S_ISSOCK. + + * hyper.c (get_hypermetadata): Set new constants in filesystems + which don't have them yet. + (get_hypermetadata): Cast MAXSYMLINKLEN to long to avoid + converting sblock->fs_maxsymlinklen into an unsigned. + + * subr.c (scanc, skipc): New functions. + (ffs_setblock): Use assert instead of panic. + + * inode.c (read_disknode): Set old stat structure until the header + file gets changed. + +Fri Jul 15 12:07:15 1994 Michael I Bushnell (mib@churchy.gnu.ai.mit.edu) + + * sizes.c: Include <string.h> for bzero. + * fs.h (blksize): Comment out dblksize macro. In blksize + macro, use NP->allocsize instead of IP->i_size. + + * dinode.h (INDIR_SINGLE, INDIR_DOUBLE, INDIR_TRIPLE): New macros. + + * inode.c (read_disknode, write_node): Use new stat and dinode + fields for times. + + * ufs.h: Change `nextgennumber' to be `u_long' instead of int. + Change prototypes of some alloc.c functions. + * alloc.c (ffs_alloc): Declare to return error_t. + (ffs_realloccg): Likewise. + (ffs_hashalloc, ffs_alloccg, ffs_fragextend, ffs_alloccg, + ffs_dirpref, ffs_nodealloccg, ffs_allccgblk, ffs_mapsearch, + ffs_clusteracct): Provide forward declarations. + (ffs_realloccg): Use printf instead of log. + Make BPREF volatile for setjmp safety. + (diskfs_alloc_node): Use diskfs global variable instead of TIME. + (ffs_nodealloccg): Likewise. + (ffs_blkfree): Likewise. + (diskfs_free_node): Likewise. + (ffs_blkfree, ffs_clusteracct): Declare as void. + (ffs_alloccg, ffs_nodealloccg): Declare as u_long. + + * ufs.h: Change prototypes of some subr.c functions. + * subr.c (ffs_isblock): Use assert instead of panic. + (ffs_clrblock): Likewise. + + * hyper.c: Include "dinode.h". + + * dinode.h (LINK_MAX): New macro, from BSD sys/sys/syslimits.h. + * fs.h (MAXBSIZE, MAXFRAG): New macros, from BSD sys/sys/param.h. + + * hyper.c (get_hypermetadata): Provide first arg in call to + fsbtodb. + (diskfs_set_hypermetadata): Likewise. + * inode.c (diskfs_set_translator): Likewise. + (diskfs_get_translator): Likewise. + * pager.c (find_address): Likewise. + (indir_alloc): Likewise. + * inode.c (iget): Provide first arg in call to lblkno. + * sizes.c (diskfs_truncate): Likewise. + * pager.c (find_address): Likewise. + * sizes.c (diskfs_grow): Likewise. + * inode.c (iget): Provide first arg in call to fragroundup. + * sizes.c (diskfs_trucate): Likewise. + * sizes.c (diskfs_grow): Likewise. + * inode.c (iget): Provide first arg in call to blkroundup. + * pager.c (pager_unlock_page): Likewise. + * sizes.c (diskfs_truncate): Likewise. + * sizes.c (diskfs_grow): Likewise. + * pager.c (find_address): Provide first arg in call to cgtod. + * pager.c (find_address): Provide first arg in call to cgimin. + * pager.c (find_address): Provide first arg in call to blktofrags. + * pager.c (find_address): Provide first arg in call to blkoff. + * sizes.c (diskfs_truncate): Likewise. + * sizes.c (diskfs_grow): Likewise. + * sizes.c (diskfs_truncate): Provide first arg in call to blksize. + * sizes.c (diskfs_grow): Likewise. + * sizes.c (diskfs_truncate): Provide first arg in call to numfrags. + + * ufs.h: Added temporary declarations of `u_quad_t', `quad_t', and + `struct timespec'. + + * pager.c (diskfs_get_filemap): Make sure that this is + a kind of node that can be validly read. + + * inode.c (create_symlink_hook): Renamed from symlink_hook. + (read_symlink_hook): New function. + (diskfs_read_symlink_hook): Initialize. + +Thu Jul 14 12:23:45 1994 Michael I Bushnell (mib@churchy.gnu.ai.mit.edu) + + * alloc.c: New from 4.4 BSD; BSD version `8.8 2/21/94'. + Remove old includes; include "ufs.h", "fs.h", "dinode.h", + and <stdio.h>. Replace panics with asserts and comment out + uprintfs. Use prototypes throughout. Replace calls + to ffs_fserr with printf. + (alloclock): New variable. + (ffs_alloc): Variable struct inode *IP is now struct node *NP; + refer to it appropriately. Initialize FS to sblock. + Lock alloclock around actual allocation steps. Reverse order + of BNP and CRED arguments; declare CRED as a protid and use + accordingly. Permit CRED to be null. + (ffs_realloccg): Variable struct inode *IP is now struct node *NP; + refer to it accordingly. Comment out U*x buffer management code. + Lock alloclock around actual allocation steps. Initialize FS + from sblock. Declare CRED as a protid and use it accordingly. + Change BUF arg to PBN (physical block number); return new block + there. + (ffs_reallocblks): Comment out. + (diskfs_alloc_node): Renamed from ialloc. Initialize FS from + sblock. Use calling sequence from <hurd/diskfs.h>. Acquire + alloclock aroud actual allocation steps. Deleted vars + `pip', `pvp' (use dir instead). Use iget instead of VFS_VGET. + Var struct inode *IP now struct node *NP. Lock gennumberlock + around frobbing of nextgennumber. + (ffs_blkpref): Arg struct inode *ip is now struct node *np; + refer to it accordingly. Initialize FS to sblock. Lock + alloclock during actual work. Use csum instead of fs_cs macro. + (ffs_hashalloc): Arg struct inode *IP is now struct node *NP; + use it accordingly. Initialize FS from sblock. + (ffs_fragextend): Arg struct inode *IP is now struct node *NP; + use it accordingly. Initialize FS from sblock. Initialize + CGP from cgs array; don't use bread. Comment out calls to brelse + and bdwrite. Set CGP->time from diskfs global var. Use csum + instead of fs_cs macro. + (ffs_alloccg): Arg struct inode *IP is now struct node *NP. + Initialize FS from sblock. Initialize CGP from cgs array; + don't use bread. Comment out calls to brelse and bdwrite. + Set CGP->time from diskfs global var. Use csum instead of + fs_cs macro. + (ffs_nodealloccg): Arg struct inode *IP is now struct node *NP. + Initialize FS from sblock. Initialize CGP from cgs array; + don't use bread. Comment out calls to brelse and bdwrite. Use + csum instead of fs_cs macro. + (ffs_blkfree): Arg struct inode *IP is now struct node *NP. + Initialize FS from sblock. Initialize CGP from cgs array; + don't use bread. Comment out calls to brelse and bdwrite. Use + csum instead of fs_cs macro. + (diskfs_free_node): Renamed from ffs_vfree. Use calling + sequence from <hurd/diskfs.h>. Initialize FS from sblock. + Deleted vars pip,pvp (use NP instead). Initialize CGP from + cgs array; don't use bread. Comment out calls to brelse and + bdwrite. Use csum instead of fs_cs macro. + (ffs_fserr): Commented out. + (ffs_dirpref): Use csum instead of fs_cs macro. + + * ufs.h (ffs_alloc): Renamed from alloc; all callers changed. + (ffs_blkfree): New arg NP; renamed from blkfree; all callers changed. + (ffs_blkpref): Renamed from blkpref; all callers changed. + (ffs_realloocg): Rename from realloccg; all callers changed. + + * fs.h: New from 4.4 BSD; BSD version `8.7 4/19/94'. + (fs_cs): Don't use fs_csp; use global csum instead. + + * subr.c: New from 4.4 BSD; BSD version `8.2 9/21/93'. + Remove old includes. Include "ufs.h" and "fs.h". + (ffs_blkatoff, ffs_checkoverlap): Comment out. + + * tables.c: New from 4.4 BSD; BSD version `8.1 6/11/93'. + Don't include <param.h>; do include "ufs.h" and "fs.h". + + * dinode.h: New from 4.4 BSD; BSD version `8.3 1/21/94'. + Remove oldids/inum union; replace with author. + Renamed di_mode to be di_model; allocated di_modeh from spare. + Allocate di_trans from spare. + (di_inumber): Remove macro. + * inode.c (read_disknode): Fetch uid and gid from new (long) + fields in dinode unless we are the old inode format, in which + case fetch them from the old fields. + (write_node): Only set new uid and gid fields if we are not + COMPAT_BSD4. Set old fields if the superblock says to. + (symlink_hook): New function. + (diskfs_create_symlink_hook): Initialize. + * sizes.c (diskfs_truncate): Deal with truncation of short + symlink properly. + + * dir.h: New from 4.4 BSD; BSD version `8.2 1/21/94'. + Substitute our version of DIRSIZ which uses the namelen. + Comment out declarations of struct dirtemplate and struct + odirtemplate. + (DIRECT_TYPE, DIRECT_NAMLEN): New macros. + * ufs.h (direct_symlink_extension): New variable. + * hyper.c (get_hypermetadata): Set direct_symlink_extension. + * dir.c (dirscanblock): Use DIRECT_NAMLEN instead of d_namlen. + (diskfs_direnter): Likewise. + (diskfs_dirempty): Likewise. + (diskfs_get_directs): Likewise. + (diskfs_direnter): Set d_type field of new slot if + direct_symlink_extension is set. + (diskfs_dirrewrite): Likewise. + + * ufs.h (compat_mode): New variable. + * main.c (main): Set compat_mode to zero if we are the bootstrap + filesystem. + * inode.c (diskfs_set_translator): Return error if compat_mode + is set. + (write_node): Don't set GNU dinode field extensions unless + compat_mode is COMPAT_GNU. + +Mon Jul 11 18:14:26 1994 Michael I Bushnell (mib@churchy.gnu.ai.mit.edu) + + * dir.c (diskfs_get_directs): When copying entries into DATAP, + set the d_reclen parameter of the copy to the minimum length + (because that's all we use) rather than the size that it had + in the directory itself. + +Wed Jul 6 14:41:48 1994 Michael I Bushnell (mib@churchy.gnu.ai.mit.edu) + + * dir.c (dirscanblock): In main loop, initialize PREVOFF + to zero, not BLOCKADDR. Otherwise, the wrong value is + stored into DS->prevoff and then diskfs_dirremove crashes. + +Tue Jul 5 14:07:38 1994 Michael I Bushnell (mib@churchy.gnu.ai.mit.edu) + + * dinode.h: Include <endian.h> before test of BYTE_ORDER. + + * Makefile (TAGSLIBS): New variable. + +Tue Jun 21 13:45:04 1994 Michael I Bushnell (mib@churchy.gnu.ai.mit.edu) + + * dir.c (diskfs_direnter): Update dirents of DP, not NP. + +Mon Jun 20 16:43:48 1994 Michael I Bushnell (mib@churchy.gnu.ai.mit.edu) + + * dir.c (diskfs_direnter) [case SHRINK]: NEW should be set to + OLDNEEDED past DS->entry, not to the start of the next entry. + + * dir.c (diskfs_direnter) [case EXTEND]: Cast in assignment + to NEW needs proper scope. + + * inode.c (diskfs_node_norefs): Free dirents list of structure + being deallocated. Also add assert checks to make sure other + state is already clean. + +Thu Jun 16 11:38:17 1994 Michael I Bushnell (mib@geech.gnu.ai.mit.edu) + + * dir.c (diskfs_dirempty): Map directory contents ourselves + instead of using diskfs_node_rdwr. + (struct dirstat): New structure to cache mapping between + lookup and commit operation and avoid use of diskfs_node_rdwr. + (diskfs_lookup): Map directory ourselves. Keep mapping in + DS if DS is nonzero and we might use it in direnter, dirremove, + or dirrewrite. Deallocate mapped buffer if we return some + error (other than ENOENT), or if DS is zero, or if there is + no possible commit operation to follow. When setting DS->stat + to EXTEND, do it the new way. + (dirscanblock): Changed BLKOFF to be virtual address of mapped + block and renamed it BLKADDR. New arg IDX. Use mapped block + instead of calling diskfs_node_rdwr. Set DS according to the new + rules. + (diskfs_direnter): Interpret new dirstat format. + (diskfs_dirremove): Likewise. + (diskfs_dirrewrite): Likewise. + (diskfs_drop_dirstat): Deallocate cached mapping here. + + * dir.c (dirscanblock): When we find the node for type CREATE, + invalidate DS by setting type to LOOKUP, not LOOKING. + + * dir.c (diskfs_direnter, diskfs_dirremove, diskfs_dirrewrite): + Call diskfs_notice_dirchange when appropriate. + + * dir.c (diskfs_get_directs): Deal properly with case where + BUFSIZ==0 and where NENTRIES==-1. + +Wed Jun 15 16:40:12 1994 Michael I Bushnell (mib@geech.gnu.ai.mit.edu) + + * main.c (main): Check device sector size and media size + on startup. + +Tue Jun 14 14:41:17 1994 Michael I Bushnell (mib@geech.gnu.ai.mit.edu) + + * ufs.h (struct disknode) [dirents]: New member. + * inode.c (iget): Initialize DN->dirents. + * dir.c (diskfs_direnter, diskfs_dirremove): Keep track + of dirents member. + (dirscanblock): New var `nentries'; use it to count the + number of directory entries in this block and set it if + we end up scanning the entire block. + (count_dirents): New function. + (diskfs_get_directs): New function. + +Mon Jun 13 13:50:00 1994 Michael I Bushnell (mib@geech.gnu.ai.mit.edu) + + * ufs.h (sinmaplock, dinmaplock): New global vars. + * inode.c (inode_init): Initialize sinmaplock and dinmaplock. + * pager.c (find_address, pager_unlock_page): Protect use + if dinloc array with dinmaplock. + (din_map, din_unmap): Doc fix. + (find_address, pager_unlock_page): Protect use of sinloc array + with sinmaplock. + (sin_map, sin_remap, sin_unmap): Doc fix. + (pager_clear_user_data): Acquire sinmaplock and dinmaplock + instead of NP->dn->datalock and NP->dn->sinlock respectively. + + * sizes.c (diskfs_truncate, diskfs_grow): Protect use of sinloc + and sindir mapping functions with sinmaplock. + (sindir_drop): Protect use of dinloc and dindir mapping functions + with dinmaplock. + + * ufs.h (struct rwlock): New type. + (struct disknode) [dinlock, sinlock, datalock]: Use read-write lock. + Change comments so that these don't lock dinloc and sinloc anymore. + [rwlock_master, rwlock_wakeup]: New members. + (rwlock_reader_lock, rwlock_writer_lock, rwlock_reader_unlock, + rwlock_writer_unlock, rwlock_init): New functions. + * inode.c (iget): Initialize DN->rwlock_master and + DN->rwlock_wakeup. Change initialization of DN->dinlock, + DN->sinlock, and DN->datalock to use rwlock_init. + * pager.c (find_address): Lock NP->dn->dinlock, NP->dn->sinlock, + and NP->dn->datalock with rwlock_reader_lock. Change type of + parameter NPLOCK to be a read-write lock. New parm DNP. Callers + changed. + (pager_read_page, pager_write_page): Change type of NPLOCK to be + read-write lock; call rwlock_reader_unlock instead of + mutex_unlock. New variable DN. + (pager_unlock_page): Use rwlock_writer_lock to lock + NP->dn->dinlock, NP->dn->sinlock, and NP->dn->datalock. + * sizes.c (diskfs_truncate, diskfs_grow): Change locks of DATALOCK + field to use rwlock_writer_{un,}lock. + (sindir_drop): Ditto for SINLOCK field. + (dindir_drop): Ditto for DINLOCK field. + +Mon Jun 6 19:23:26 1994 Michael I Bushnell (mib@churchy.gnu.ai.mit.edu) + + * sizes.c (diskfs_grow): After realloccg, zero new data (which I'm + not sure is really necessary, but until I figure it out, this is + safest). Also poke old data (the latter only if the block has + moved)--otherwise the kernel won't know to page it out to the new + location. + (poke_pages): When poking, be careful not to actually change the data. + LEN should be end - start, not start - end. + +Fri Jun 3 12:37:27 1994 Michael I Bushnell (mib@churchy.gnu.ai.mit.edu) + + * inode.c (iget): When we find the node in the table, acquire the + mutex *after* incrementing NP->references and unlocking + diskfs_node_refcnt_lock; otherwise we can deadlock against + diskfs_nput. + +Thu Jun 2 12:16:09 1994 Michael I Bushnell (mib@churchy.gnu.ai.mit.edu) + + * ufs.h (sblock_dirty, csum_dirty, alloclock): New global variables. + * alloc.c (alloclock): Remove static keyword.. + * alloc.c (realloccg): Set sblock_dirty after changing sblock. + (blkpref): Likewise. + (fragextend): Likewise. + (alloccg): Likewise. + (alloccgblk): Likewise. + (ialloccg): Likewise. + (blkfree): Likewise. + (diskfs_free_node): Likewise. + * hyper.c (diskfs_set_hypermetadata): Likewise. + * alloc.c (fragextend): Set csum_dirty after changi csum. + (alloccg): Likewise. + (alloccgblk): Likewise. + (ialloccg): Likewise. + (blkfree): Likewise. + (diskfs_free_node): Likewise. + * hyper.c (diskfs_set_hypermetadata): Acquire alloclock while + writing hypermetadata. Only write csum and sblock if + csum_dirty or sblock_dirty, respectively, is set, and then + clear it after starting the write. + + * main.c (main): Likewise. + + * sizes.c (diskfs_truncate): Don't turn off caching; the new + light reference system takes care of this. + * pager.c (diskfs_get_filemap): No longer necessary to turn + on caching here, because truncate no longer turns it off. + + * inode.c (diskfs_lost_hardrefs, diskfs_new_hardrefs): New functions. + * pager.c (drop_pager_softrefs, allow_pager_softrefs): New functions. + (sin_map): Use diskfs_nref_light, not diskfs_nref. + (diskfs_get_filemap): Use diskfs_nref_light, not diskfs_nref. + (pager_clear_user_data): Use diskfs_nrele_light, not diskfs_nrele. + * ufs.h (drop_pager_softrefs, allow_pager_softrefs): New + declarations. + +Wed Jun 1 13:35:11 1994 Michael I Bushnell (mib@churchy.gnu.ai.mit.edu) + + * sizes.c (diskfs_truncate): After calling sin_unmap, turn + off caching on the sininfo pager so that it gets freed promptly + (there's generally no value in keeping it around because there + is no live fileinfo pager). + * pager.c (diskfs_get_filemap): Make sure we turn caching back on + here, however, if we start using the file pager. + + * pager.c (sin_map): When np->dn->sininfo is set, we have + to insert a valid send right after fetching the receive name. + + * pager.c (sin_unmap, din_unmap): New functions. + (pager_clear_user_data): Call sin_unmap and din_unmap + instead of doing it right here. + + * sizes.c (diskfs_truncate): Call sin_unmap instead of + doing it right here. + (sindir_drop): Call din_unmap instead of doing it right + here. Also, call it always, not just when wo do dindir_drop. + + * sizes.c (diskfs_grow): After alloc into sindir area, + unmap it if we don't have an active data pager. + * ufs.h (sin_unmap, din_unmap): New declarations. + + * sizes.c (diskfs_grow): In computing OSIZE in the realloc + case of lbn < NDADDR, deal correctly with the case where + np->allocsize is already an integral number of blocks. + + * sizes.c (diskfs_grow): Compute SIZE correctly. + + * alloc.c (alloc, realloccg, blkfree): When checking validity + of size arguments, also make sure the size isn't zero. + + * alloc.c (diskfs_alloc_node): Lock ALLOCLOCK before checking + sblock->fs_cstotal.cs_nifree. + +Tue May 31 18:47:42 1994 Roland McGrath (roland@geech.gnu.ai.mit.edu) + + * ufs.h (DONT_CACHE_MEMORY_OBJECTS): Define it. + + * dir.c (diskfs_direnter: case TAKE): Assert that OLD->d_reclen >= + NEEDED, not that it is strictly >. + +Tue May 31 11:10:28 1994 Michael I Bushnell (mib@churchy.gnu.ai.mit.edu) + + * sizes.c (diskfs_grow): Call diskfs_node_update (but don't wait) + after successful completion to prevent old data from hanging around + too long and getting flushed all at once from the kernel. + + * sizes.c (diskfs_grow): Change SIZE to be the size of the last + block allocated. Delete variable NSIZE; use SIZE instead. + +Fri May 27 13:15:26 1994 Michael I Bushnell (mib@geech.gnu.ai.mit.edu) + + * sizes.c (diskfs_truncate): Set NP->dn_stat_dirty after each + modification of NP->dn_stat. + + * sizes.c (diskfs_truncate): Compute new value of NP->allocsize + correctly. + + * inode.c (iget): Set NP->allocsize to be the *actual* allocsize. + +Thu May 26 11:51:45 1994 Michael I Bushnell (mib@churchy.gnu.ai.mit.edu) + + * sizes.c (diskfs_truncate): In blkfree loop of blocks past + NDADDR, subtract NDADDR from idx to index correctly into + sinloc array. Start this loop with idx not less than NDADDR. + (diskfs_truncate): If olastblock == NDADDR, then we also + need to truncate blocks (one) mapped by single indirect blocks. + (diskfs_truncate): New variable `first2free'. Use in place + of older losing calculations involving lastblock. + (sindir_drop): Rename parameters to be FIRST and LAST. Change + interpretation of FIRST by one to correspond with changed call + in diskfs_truncate. + + * pager.c (sin_remap): When computing NEWSIZE, round up to + a page boundary, thus mimicing the SINDIR computation in + pager_report_extent properly. + + * pager.c (pager_unlock_page) [case SINDIR; vblkno == 0]: Read + from ....di_ib[INDIR_SINGLE] rather than invalid data before + NP->dn->dinloc. + + * alloc.c (alloc) [nospace]: Unlock alloclock. + (realloccg): Unlock alloclock before jumping to nospace. + (blkpref) [!(lbn < NDADDR)]: Unlock alloclock before returning + success. + + * sizes.c (diskfs_grow): When allocing a block past NDADDR, the + tbl arg to blkpref is the table of direct block pointers + NP->dn->sinloc, not the table of indirect block pointers + ...->di_ib. + + * sizes.c (diskfs_grow): When writing into the SINDIR area, call + sin_map instead of sin_remap if the sindir isn't already mapped. + Also set np->allocsize *before* calling sin_map, but *after* + calling sin_remap, to meet the requirements of those separate + routines. + + * sizes.c (diskfs_grow): If END isn't bigger than NP->allocsize, + then don't try and do anything. In computation of LBN and the + first use of NB, round up to block boundary correctly. Don't + attempt to realloc an old block if the size is 0 (in which case + NB is -1 and unsigned comparison rules might foul things up). + +Mon May 23 13:18:33 1994 Michael I Bushnell (mib@churchy.gnu.ai.mit.edu) + + * Makefile (ufs): Give -n in the proper order to rsh. + + * main.c: Include <hurd/startup.h>. + + * ufs.h (DONT_CACHE_MEMORY_OBJECTS): New compilation flag. + * pager.c (pager_report_attributes): Deleted function. + (MAY_CACHE): New macro; more useful form for using + DONT_CACHE_MEMORY_OBJECTS. + (sin_map, pager_init, diskfs_get_filemap): Provide new + args in calls to pager_create. + * sizes.c (MAY_CACHE): New macro; more useful form for + using DONT_CACHE_MEMORY_OBJECTS. + (diskfs_truncate): Use MAY_CACHE in calls to pager_change_attributes. + +Fri May 20 18:52:41 1994 Michael I Bushnell (mib@geech.gnu.ai.mit.edu) + + * sizes.c (diskfs_truncate): Force any delayed copies of the + vanishing region to be taken immediately before stopping, and + prevent any new delayed copies from being made until we are done + manipulating things. + (poke_pages): New function. + * pager.c (pager_report_attributes): New function. + +Wed May 18 15:51:40 1994 Michael I Bushnell (mib@churchy.gnu.ai.mit.edu) + + * alloc.c (alloc, realloccg, diskfs_alloc_node, alloccgblk, + blkfree, diskfs_free_node, mapsearch): Added helpful strings to + asserts. + (realloccg): Split up assert. + +Tue May 17 13:26:22 1994 Michael I Bushnell (mib@churchy.gnu.ai.mit.edu) + + * main.c (main): Delete unused variable PROC. + +Mon May 16 15:32:07 1994 Michael I Bushnell (mib@churchy.gnu.ai.mit.edu) + + * alloc.c (realloccg): When fragextend succeeds, bother to set + *PBN. + + * sizes.c (diskfs_grow): In fragment growth case, NSIZE should + not be the amount to hold SIZE (SIZE is the amount the file is + growing by), but rather the old size of the fragment plus the + SIZE. + + * dir.c (diskfs_direnter case COMPRESS): Rewrite loop to deal + properly with the case where from and to overlap. + +Mon May 9 16:51:44 1994 Michael I Bushnell (mib@geech.gnu.ai.mit.edu) + + * main.c (ufs_version): New variable. + (save_argv): New variable. + (main): Set save_argv. + (diskfs_init_completed): New function. + +Thu May 5 19:06:54 1994 Michael I Bushnell (mib@geech.gnu.ai.mit.edu) + + * Makefile (exec_server_image.o): Use -n when calling rsh. + +Thu May 5 07:39:38 1994 Roland McGrath (roland@churchy.gnu.ai.mit.edu) + + * Makefile ($(OBJS)): Use $(includedir) instead of $(headers) in deps. diff --git a/ufs/Makefile b/ufs/Makefile new file mode 100644 index 00000000..02cf38ba --- /dev/null +++ b/ufs/Makefile @@ -0,0 +1,32 @@ +# Makefile for ufs +# +# Copyright (C) 1994,95,96,99,2000,02 Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2, or (at +# your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +dir := ufs +makemode := server + +target = ufs +SRCS = alloc.c consts.c dir.c hyper.c inode.c main.c pager.c \ + sizes.c subr.c tables.c bmap.c pokeloc.c +LCLHDRS = ufs.h fs.h dinode.h dir.h + +OBJS = $(SRCS:.c=.o) +HURDLIBS = diskfs iohelp fshelp store pager threads ports ihash shouldbeinlibc + +include ../Makeconf + +ufs.static: $(boot-store-types:%=../libstore/libstore_%.a) diff --git a/ufs/alloc.c b/ufs/alloc.c new file mode 100644 index 00000000..48ee60cc --- /dev/null +++ b/ufs/alloc.c @@ -0,0 +1,1703 @@ +/* Disk allocation routines + Copyright (C) 1993,94,95,96,98,2002 Free Software Foundation, Inc. + +This file is part of the GNU Hurd. + +The GNU Hurd is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +The GNU Hurd is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with the GNU Hurd; see the file COPYING. If not, write to +the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* Modified from UCB by Michael I. Bushnell. */ +/* + * Copyright (c) 1982, 1986, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ffs_alloc.c 8.8 (Berkeley) 2/21/94 + */ + +#include "ufs.h" +#include <stdio.h> +#include <string.h> + + +/* These don't work *at all* here; don't even try setting them. */ +#undef DIAGNOSTIC +#undef QUOTA + +extern u_long nextgennumber; + +spin_lock_t alloclock = SPIN_LOCK_INITIALIZER; + +/* Forward declarations */ +static u_long ffs_hashalloc (struct node *, int, long, int, + u_long (*)(struct node *, int, daddr_t, int)); +static u_long ffs_alloccg (struct node *, int, daddr_t, int); +static daddr_t ffs_fragextend (struct node *, int, long, int, int); +static ino_t ffs_dirpref (struct fs *); +static u_long ffs_nodealloccg (struct node *, int, daddr_t, int); +static daddr_t ffs_alloccgblk (struct fs *, struct cg *, daddr_t); +static daddr_t ffs_mapsearch (struct fs *, struct cg *, daddr_t, int); +static void ffs_clusteracct (struct fs *, struct cg *, daddr_t, int); + +/* Sync all allocation information and nod eNP if diskfs_synchronous. */ +inline void +alloc_sync (struct node *np) +{ + if (diskfs_synchronous) + { + if (np) + diskfs_node_update (np, 1); + copy_sblock (); + diskfs_set_hypermetadata (1, 0); + sync_disk (1); + } +} + +/* Byteswap everything in CGP. */ +void +swab_cg (struct cg *cg) +{ + int i, j; + + if (swab_long (cg->cg_magic) == CG_MAGIC + || cg->cg_magic == CG_MAGIC) + { + cg->cg_magic = swab_long (cg->cg_magic); + cg->cg_time = swab_long (cg->cg_time); + cg->cg_cgx = swab_long (cg->cg_cgx); + cg->cg_ncyl = swab_short (cg->cg_ncyl); + cg->cg_niblk = swab_short (cg->cg_niblk); + cg->cg_cs.cs_ndir = swab_long (cg->cg_cs.cs_ndir); + cg->cg_cs.cs_nbfree = swab_long (cg->cg_cs.cs_nbfree); + cg->cg_cs.cs_nifree = swab_long (cg->cg_cs.cs_nifree); + cg->cg_cs.cs_nffree = swab_long (cg->cg_cs.cs_nffree); + cg->cg_rotor = swab_long (cg->cg_rotor); + cg->cg_irotor = swab_long (cg->cg_irotor); + for (i = 0; i < MAXFRAG; i++) + cg->cg_frsum[i] = swab_long (cg->cg_frsum[i]); + cg->cg_btotoff = swab_long (cg->cg_btotoff); + cg->cg_boff = swab_long (cg->cg_boff); + cg->cg_iusedoff = swab_long (cg->cg_iusedoff); + cg->cg_freeoff = swab_long (cg->cg_freeoff); + cg->cg_nextfreeoff = swab_long (cg->cg_nextfreeoff); + cg->cg_clustersumoff = swab_long (cg->cg_clustersumoff); + cg->cg_clusteroff = swab_long (cg->cg_clusteroff); + cg->cg_nclusterblks = swab_long (cg->cg_nclusterblks); + + /* blktot map */ + for (i = 0; i < cg->cg_ncyl; i++) + cg_blktot(cg)[i] = swab_long (cg_blktot(cg)[i]); + + /* blks map */ + for (i = 0; i < cg->cg_ncyl; i++) + for (j = 0; j < sblock->fs_nrpos; j++) + cg_blks(sblock, cg, i)[j] = swab_short (cg_blks (sblock, cg, i)[j]); + + for (i = 0; i < sblock->fs_contigsumsize; i++) + cg_clustersum(cg)[i] = swab_long (cg_clustersum(cg)[i]); + + /* inosused, blksfree, and cg_clustersfree are char arrays */ + } + else + { + /* Old format cylinder group... */ + struct ocg *ocg = (struct ocg *) cg; + + if (swab_long (ocg->cg_magic) != CG_MAGIC + && ocg->cg_magic != CG_MAGIC) + return; + + ocg->cg_time = swab_long (ocg->cg_time); + ocg->cg_cgx = swab_long (ocg->cg_cgx); + ocg->cg_ncyl = swab_short (ocg->cg_ncyl); + ocg->cg_niblk = swab_short (ocg->cg_niblk); + ocg->cg_ndblk = swab_long (ocg->cg_ndblk); + ocg->cg_cs.cs_ndir = swab_long (ocg->cg_cs.cs_ndir); + ocg->cg_cs.cs_nbfree = swab_long (ocg->cg_cs.cs_nbfree); + ocg->cg_cs.cs_nifree = swab_long (ocg->cg_cs.cs_nifree); + ocg->cg_cs.cs_nffree = swab_long (ocg->cg_cs.cs_nffree); + ocg->cg_rotor = swab_long (ocg->cg_rotor); + ocg->cg_frotor = swab_long (ocg->cg_frotor); + ocg->cg_irotor = swab_long (ocg->cg_irotor); + for (i = 0; i < 8; i++) + ocg->cg_frsum[i] = swab_long (ocg->cg_frsum[i]); + for (i = 0; i < 32; i++) + ocg->cg_btot[i] = swab_long (ocg->cg_btot[i]); + for (i = 0; i < 32; i++) + for (j = 0; j < 8; j++) + ocg->cg_b[i][j] = swab_short (ocg->cg_b[i][j]); + ocg->cg_magic = swab_long (ocg->cg_magic); + } +} + + +/* Read cylinder group indexed CG. Set *CGPP to point at it. + Return 1 if caller should call release_cgp when we're done with it; + otherwise zero. */ +int +read_cg (int cg, struct cg **cgpp) +{ + struct cg *diskcg = cg_locate (cg); + + if (swab_disk) + { + *cgpp = malloc (sblock->fs_cgsize); + bcopy (diskcg, *cgpp, sblock->fs_cgsize); + swab_cg (*cgpp); + return 1; + } + else + { + *cgpp = diskcg; + return 0; + } +} + +/* Caller of read_cg is done with cg; write it back to disk (swapping it + along the way) and free the memory allocated in read_cg. */ +void +release_cg (struct cg *cgp) +{ + int cgx = cgp->cg_cgx; + swab_cg (cgp); + bcopy (cgp, cg_locate (cgx), sblock->fs_cgsize); + free (cgp); +} + + +/* + * Allocate a block in the file system. + * + * The size of the requested block is given, which must be some + * multiple of fs_fsize and <= fs_bsize. + * A preference may be optionally specified. If a preference is given + * the following hierarchy is used to allocate a block: + * 1) allocate the requested block. + * 2) allocate a rotationally optimal block in the same cylinder. + * 3) allocate a block in the same cylinder group. + * 4) quadradically rehash into other cylinder groups, until an + * available block is located. + * If no block preference is given the following heirarchy is used + * to allocate a block: + * 1) allocate a block in the cylinder group that contains the + * inode for the file. + * 2) quadradically rehash into other cylinder groups, until an + * available block is located. + */ +error_t +ffs_alloc(register struct node *np, + daddr_t lbn, + daddr_t bpref, + int size, + daddr_t *bnp, + struct protid *cred) +{ + register struct fs *fs; + daddr_t bno; + int cg; + + *bnp = 0; + fs = sblock; +#ifdef DIAGNOSTIC + if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) { + printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n", + ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt); + panic("ffs_alloc: bad size"); + } + assert (cred); +#endif /* DIAGNOSTIC */ + spin_lock (&alloclock); + if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0) + goto nospace; + if (cred && !idvec_contains (cred->user->uids, 0) + && freespace(fs, fs->fs_minfree) <= 0) + goto nospace; +#ifdef QUOTA + if (error = chkdq(ip, (long)btodb(size), cred, 0)) + return (error); +#endif + if (bpref >= fs->fs_size) + bpref = 0; + if (bpref == 0) + cg = ino_to_cg(fs, np->dn->number); + else + cg = dtog(fs, bpref); + bno = (daddr_t)ffs_hashalloc(np, cg, (long)bpref, size, + (u_long (*)())ffs_alloccg); + if (bno > 0) { + spin_unlock (&alloclock); + np->dn_stat.st_blocks += btodb(size); + np->dn_set_ctime = 1; + np->dn_set_mtime = 1; + *bnp = bno; + alloc_sync (np); + return (0); + } +#ifdef QUOTA + /* + * Restore user's disk quota because allocation failed. + */ + (void) chkdq(ip, (long)-btodb(size), cred, FORCE); +#endif +nospace: + spin_unlock (&alloclock); + printf ("file system full"); +/* ffs_fserr(fs, cred->cr_uid, "file system full"); */ +/* uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt); */ + return (ENOSPC); +} + +/* + * Reallocate a fragment to a bigger size + * + * The number and size of the old block is given, and a preference + * and new size is also specified. The allocator attempts to extend + * the original block. Failing that, the regular block allocator is + * invoked to get an appropriate block. + */ +error_t +ffs_realloccg(register struct node *np, + daddr_t lbprev, + volatile daddr_t bpref, + int osize, + int nsize, + daddr_t *pbn, + struct protid *cred) +{ + register struct fs *fs; + int cg, error; + volatile int request; + daddr_t bprev, bno; + + *pbn = 0; + fs = sblock; +#ifdef DIAGNOSTIC + if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 || + (u_int)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) { + printf( + "dev = 0x%x, bsize = %d, osize = %d, nsize = %d, fs = %s\n", + ip->i_dev, fs->fs_bsize, osize, nsize, fs->fs_fsmnt); + panic("ffs_realloccg: bad size"); + } + if (cred == NOCRED) + panic("ffs_realloccg: missing credential\n"); +#endif /* DIAGNOSTIC */ + + spin_lock (&alloclock); + + if (!idvec_contains (cred->user->uids, 0) + && freespace(fs, fs->fs_minfree) <= 0) + goto nospace; + error = diskfs_catch_exception (); + if (error) + return error; + bprev = read_disk_entry ((dino (np->dn->number))->di_db[lbprev]); + diskfs_end_catch_exception (); + assert ("old block not allocated" && bprev); + +#if 0 /* Not needed in GNU Hurd ufs */ + /* + * Allocate the extra space in the buffer. + */ + if (error = bread(ITOV(ip), lbprev, osize, NOCRED, &bp)) { + brelse(bp); + return (error); + } +#ifdef QUOTA + if (error = chkdq(ip, (long)btodb(nsize - osize), cred, 0)) { + brelse(bp); + return (error); + } +#endif +#endif /* 0 */ + + /* + * Check for extension in the existing location. + */ + cg = dtog(fs, bprev); + bno = ffs_fragextend(np, cg, (long)bprev, osize, nsize); + if (bno) { + assert (bno == bprev); + spin_unlock (&alloclock); + np->dn_stat.st_blocks += btodb(nsize - osize); + np->dn_set_ctime = 1; + np->dn_set_mtime = 1; + *pbn = bno; +#if 0 /* Not done this way in GNU Hurd ufs. */ + allocbuf(bp, nsize); + bp->b_flags |= B_DONE; + bzero((char *)bp->b_data + osize, (u_int)nsize - osize); + *bpp = bp; +#endif + alloc_sync (np); + return (0); + } + /* + * Allocate a new disk location. + */ + if (bpref >= fs->fs_size) + bpref = 0; + switch ((int)fs->fs_optim) { + case FS_OPTSPACE: + /* + * Allocate an exact sized fragment. Although this makes + * best use of space, we will waste time relocating it if + * the file continues to grow. If the fragmentation is + * less than half of the minimum free reserve, we choose + * to begin optimizing for time. + */ + request = nsize; + if (fs->fs_minfree < 5 || + fs->fs_cstotal.cs_nffree > + fs->fs_dsize * fs->fs_minfree / (2 * 100)) + break; + printf ("%s: optimization changed from SPACE to TIME\n", + fs->fs_fsmnt); + fs->fs_optim = FS_OPTTIME; + break; + case FS_OPTTIME: + /* + * At this point we have discovered a file that is trying to + * grow a small fragment to a larger fragment. To save time, + * we allocate a full sized block, then free the unused portion. + * If the file continues to grow, the `ffs_fragextend' call + * above will be able to grow it in place without further + * copying. If aberrant programs cause disk fragmentation to + * grow within 2% of the free reserve, we choose to begin + * optimizing for space. + */ + request = fs->fs_bsize; + if (fs->fs_cstotal.cs_nffree < + fs->fs_dsize * (fs->fs_minfree - 2) / 100) + break; + printf ("%s: optimization changed from TIME to SPACE\n", + fs->fs_fsmnt); + fs->fs_optim = FS_OPTSPACE; + break; + default: + assert (0); + /* NOTREACHED */ + } + bno = (daddr_t)ffs_hashalloc(np, cg, (long)bpref, request, + (u_long (*)())ffs_alloccg); + if (bno > 0) { +#if 0 /* Not necessary in GNU Hurd ufs */ + bp->b_blkno = fsbtodb(fs, bno); + (void) vnode_pager_uncache(ITOV(ip)); +#endif +/* Commented out here for Hurd; we don't want to free this until we've + saved the old contents. Callers are responsible for freeing the + block when they are done with it. */ +/* ffs_blkfree(np, bprev, (long)osize); */ + if (nsize < request) + ffs_blkfree(np, bno + numfrags(fs, nsize), + (long)(request - nsize)); + spin_unlock (&alloclock); + np->dn_stat.st_blocks += btodb(nsize - osize); + np->dn_set_mtime = 1; + np->dn_set_ctime = 1; + *pbn = bno; +#if 0 /* Not done this way in GNU Hurd ufs */ + allocbuf(bp, nsize); + bp->b_flags |= B_DONE; + bzero((char *)bp->b_data + osize, (u_int)nsize - osize); + *bpp = bp; +#endif /* 0 */ + alloc_sync (np); + return (0); + } +#ifdef QUOTA + /* + * Restore user's disk quota because allocation failed. + */ + (void) chkdq(ip, (long)-btodb(nsize - osize), cred, FORCE); +#endif +#if 0 /* Not necesarry in GNU Hurd ufs */ + brelse(bp); +#endif +nospace: + /* + * no space available + */ + spin_unlock (&alloclock); + printf ("file system full"); +/* ffs_fserr(fs, cred->cr_uid, "file system full"); */ +/* uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt); */ + return (ENOSPC); +} + +#if 0 /* Not used (yet?) in GNU Hurd ufs */ +/* + * Reallocate a sequence of blocks into a contiguous sequence of blocks. + * + * The vnode and an array of buffer pointers for a range of sequential + * logical blocks to be made contiguous is given. The allocator attempts + * to find a range of sequential blocks starting as close as possible to + * an fs_rotdelay offset from the end of the allocation for the logical + * block immediately preceeding the current range. If successful, the + * physical block numbers in the buffer pointers and in the inode are + * changed to reflect the new allocation. If unsuccessful, the allocation + * is left unchanged. The success in doing the reallocation is returned. + * Note that the error return is not reflected back to the user. Rather + * the previous block allocation will be used. + */ +#include <sys/sysctl.h> +int doasyncfree = 1; +struct ctldebug debug14 = { "doasyncfree", &doasyncfree }; +int +ffs_reallocblks(ap) + struct vop_reallocblks_args /* { + struct vnode *a_vp; + struct cluster_save *a_buflist; + } */ *ap; +{ + struct fs *fs; + struct inode *ip; + struct vnode *vp; + struct buf *sbp, *ebp; + daddr_t *bap, *sbap, *ebap; + struct cluster_save *buflist; + daddr_t start_lbn, end_lbn, soff, eoff, newblk, blkno; + struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp; + int i, len, start_lvl, end_lvl, pref, ssize; + + vp = ap->a_vp; + ip = VTOI(vp); + fs = ip->i_fs; + if (fs->fs_contigsumsize <= 0) + return (ENOSPC); + buflist = ap->a_buflist; + len = buflist->bs_nchildren; + start_lbn = buflist->bs_children[0]->b_lblkno; + end_lbn = start_lbn + len - 1; +#ifdef DIAGNOSTIC + for (i = 1; i < len; i++) + if (buflist->bs_children[i]->b_lblkno != start_lbn + i) + panic("ffs_reallocblks: non-cluster"); +#endif + /* + * If the latest allocation is in a new cylinder group, assume that + * the filesystem has decided to move and do not force it back to + * the previous cylinder group. + */ + if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) != + dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno))) + return (ENOSPC); + if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) || + ufs_getlbns(vp, end_lbn, end_ap, &end_lvl)) + return (ENOSPC); + /* + * Get the starting offset and block map for the first block. + */ + if (start_lvl == 0) { + sbap = &ip->i_db[0]; + soff = start_lbn; + } else { + idp = &start_ap[start_lvl - 1]; + if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &sbp)) { + brelse(sbp); + return (ENOSPC); + } + sbap = (daddr_t *)sbp->b_data; + soff = idp->in_off; + } + /* + * Find the preferred location for the cluster. + */ + pref = ffs_blkpref(ip, start_lbn, soff, sbap); + /* + * If the block range spans two block maps, get the second map. + */ + if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) { + ssize = len; + } else { +#ifdef DIAGNOSTIC + if (start_ap[start_lvl-1].in_lbn == idp->in_lbn) + panic("ffs_reallocblk: start == end"); +#endif + ssize = len - (idp->in_off + 1); + if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &ebp)) + goto fail; + ebap = (daddr_t *)ebp->b_data; + } + /* + * Search the block map looking for an allocation of the desired size. + */ + if ((newblk = (daddr_t)ffs_hashalloc(ip, dtog(fs, pref), (long)pref, + len, (u_long (*)())ffs_clusteralloc)) == 0) + goto fail; + /* + * We have found a new contiguous block. + * + * First we have to replace the old block pointers with the new + * block pointers in the inode and indirect blocks associated + * with the file. + */ + blkno = newblk; + for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) { + if (i == ssize) + bap = ebap; +#ifdef DIAGNOSTIC + if (buflist->bs_children[i]->b_blkno != fsbtodb(fs, *bap)) + panic("ffs_reallocblks: alloc mismatch"); +#endif + *bap++ = blkno; + } + /* + * Next we must write out the modified inode and indirect blocks. + * For strict correctness, the writes should be synchronous since + * the old block values may have been written to disk. In practise + * they are almost never written, but if we are concerned about + * strict correctness, the `doasyncfree' flag should be set to zero. + * + * The test on `doasyncfree' should be changed to test a flag + * that shows whether the associated buffers and inodes have + * been written. The flag should be set when the cluster is + * started and cleared whenever the buffer or inode is flushed. + * We can then check below to see if it is set, and do the + * synchronous write only when it has been cleared. + */ + if (sbap != &ip->i_db[0]) { + if (doasyncfree) + bdwrite(sbp); + else + bwrite(sbp); + } else { + ip->i_flag |= IN_CHANGE | IN_UPDATE; + if (!doasyncfree) + VOP_UPDATE(vp, &time, &time, MNT_WAIT); + } + if (ssize < len) + if (doasyncfree) + bdwrite(ebp); + else + bwrite(ebp); + /* + * Last, free the old blocks and assign the new blocks to the buffers. + */ + for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) { + ffs_blkfree(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno), + fs->fs_bsize); + buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); + } + return (0); + +fail: + if (ssize < len) + brelse(ebp); + if (sbap != &ip->i_db[0]) + brelse(sbp); + return (ENOSPC); +} +#endif /* 0 */ + +/* + * Allocate an inode in the file system. + * + * If allocating a directory, use ffs_dirpref to select the inode. + * If allocating in a directory, the following hierarchy is followed: + * 1) allocate the preferred inode. + * 2) allocate an inode in the same cylinder group. + * 3) quadradically rehash into other cylinder groups, until an + * available inode is located. + * If no inode preference is given the following heirarchy is used + * to allocate an inode: + * 1) allocate an inode in cylinder group 0. + * 2) quadradically rehash into other cylinder groups, until an + * available inode is located. + */ +/* This is now the diskfs_alloc_node callback from the diskfs library + (described in <hurd/diskfs.h>). It used to be ffs_valloc in BSD. */ +error_t +diskfs_alloc_node (struct node *dir, + mode_t mode, + struct node **npp) +{ + register struct fs *fs; + struct node *np; + ino_t ino, ipref; + int cg, error; + int sex; + + fs = sblock; + + + spin_lock (&alloclock); + + if (fs->fs_cstotal.cs_nifree == 0) + { + spin_unlock (&alloclock); + goto noinodes; + } + + if (S_ISDIR (mode)) + ipref = ffs_dirpref(fs); + else + ipref = dir->dn->number; + + if (ipref >= fs->fs_ncg * fs->fs_ipg) + ipref = 0; + cg = ino_to_cg(fs, ipref); + ino = (ino_t)ffs_hashalloc(dir, cg, (long)ipref, + mode, ffs_nodealloccg); + spin_unlock (&alloclock); + if (ino == 0) + goto noinodes; + error = diskfs_cached_lookup (ino, &np); + assert ("duplicate allocation" && !np->dn_stat.st_mode); + assert (! (np->dn_stat.st_mode & S_IPTRANS)); + if (np->dn_stat.st_blocks) { + printf("free inode %Ld had %Ld blocks\n", + ino, np->dn_stat.st_blocks); + np->dn_stat.st_blocks = 0; + np->dn_set_ctime = 1; + } + np->dn_stat.st_flags = 0; + /* + * Set up a new generation number for this inode. + */ + spin_lock (&gennumberlock); + sex = diskfs_mtime->seconds; + if (++nextgennumber < (u_long)sex) + nextgennumber = sex; + np->dn_stat.st_gen = nextgennumber; + spin_unlock (&gennumberlock); + + *npp = np; + alloc_sync (np); + return (0); +noinodes: + printf ("out of inodes"); +/* ffs_fserr(fs, ap->a_cred->cr_uid, "out of inodes"); */ +/* uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt);*/ + return (ENOSPC); +} + +/* + * Find a cylinder to place a directory. + * + * The policy implemented by this algorithm is to select from + * among those cylinder groups with above the average number of + * free inodes, the one with the smallest number of directories. + */ +static ino_t +ffs_dirpref(register struct fs *fs) +{ + int cg, minndir, mincg, avgifree; + + avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg; + minndir = fs->fs_ipg; + mincg = 0; + for (cg = 0; cg < fs->fs_ncg; cg++) + if (csum[cg].cs_ndir < minndir && + csum[cg].cs_nifree >= avgifree) { + mincg = cg; + minndir = csum[cg].cs_ndir; + } + return ((ino_t)(fs->fs_ipg * mincg)); +} + +/* + * Select the desired position for the next block in a file. The file is + * logically divided into sections. The first section is composed of the + * direct blocks. Each additional section contains fs_maxbpg blocks. + * + * If no blocks have been allocated in the first section, the policy is to + * request a block in the same cylinder group as the inode that describes + * the file. If no blocks have been allocated in any other section, the + * policy is to place the section in a cylinder group with a greater than + * average number of free blocks. An appropriate cylinder group is found + * by using a rotor that sweeps the cylinder groups. When a new group of + * blocks is needed, the sweep begins in the cylinder group following the + * cylinder group from which the previous allocation was made. The sweep + * continues until a cylinder group with greater than the average number + * of free blocks is found. If the allocation is for the first block in an + * indirect block, the information on the previous allocation is unavailable; + * here a best guess is made based upon the logical block number being + * allocated. + * + * If a section is already partially allocated, the policy is to + * contiguously allocate fs_maxcontig blocks. The end of one of these + * contiguous blocks and the beginning of the next is physically separated + * so that the disk head will be in transit between them for at least + * fs_rotdelay milliseconds. This is to allow time for the processor to + * schedule another I/O transfer. + */ +daddr_t +ffs_blkpref(struct node *np, + daddr_t lbn, + int indx, + daddr_t *bap) +{ + register struct fs *fs; + register int cg; + int avgbfree, startcg; + daddr_t nextblk; + + fs = sblock; + spin_lock (&alloclock); + if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) { + if (lbn < NDADDR) { + cg = ino_to_cg(fs, np->dn->number); + spin_unlock (&alloclock); + return (fs->fs_fpg * cg + fs->fs_frag); + } + /* + * Find a cylinder with greater than average number of + * unused data blocks. + */ + if (indx == 0 || bap[indx - 1] == 0) + startcg = + (ino_to_cg(fs, np->dn->number) + + lbn / fs->fs_maxbpg); + else + startcg = dtog(fs, + read_disk_entry (bap[indx - 1])) + 1; + startcg %= fs->fs_ncg; + avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; + for (cg = startcg; cg < fs->fs_ncg; cg++) + if (csum[cg].cs_nbfree >= avgbfree) { + fs->fs_cgrotor = cg; + spin_unlock (&alloclock); + return (fs->fs_fpg * cg + fs->fs_frag); + } + for (cg = 0; cg <= startcg; cg++) + if (csum[cg].cs_nbfree >= avgbfree) { + fs->fs_cgrotor = cg; + spin_unlock (&alloclock); + return (fs->fs_fpg * cg + fs->fs_frag); + } + spin_unlock (&alloclock); + return 0; + } + spin_unlock (&alloclock); + /* + * One or more previous blocks have been laid out. If less + * than fs_maxcontig previous blocks are contiguous, the + * next block is requested contiguously, otherwise it is + * requested rotationally delayed by fs_rotdelay milliseconds. + */ + nextblk = read_disk_entry (bap[indx - 1]) + fs->fs_frag; + if (indx < fs->fs_maxcontig + || (read_disk_entry (bap[indx - fs->fs_maxcontig]) + + blkstofrags(fs, fs->fs_maxcontig) != nextblk)) + { + return (nextblk); + } + if (fs->fs_rotdelay != 0) + /* + * Here we convert ms of delay to frags as: + * (frags) = (ms) * (rev/sec) * (sect/rev) / + * ((sect/frag) * (ms/sec)) + * then round up to the next block. + */ + nextblk += roundup(fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect / + (NSPF(fs) * 1000), fs->fs_frag); + return (nextblk); +} + +/* + * Implement the cylinder overflow algorithm. + * + * The policy implemented by this algorithm is: + * 1) allocate the block in its requested cylinder group. + * 2) quadradically rehash on the cylinder group number. + * 3) brute force search for a free block. + */ +/*VARARGS5*/ +static u_long +ffs_hashalloc(struct node *np, + int cg, + long pref, + int size, /* size for data blocks, mode for inodes */ + u_long (*allocator)()) +{ + register struct fs *fs; + long result; + int i, icg = cg; + + fs = sblock; + /* + * 1: preferred cylinder group + */ + result = (*allocator)(np, cg, pref, size); + if (result) + return (result); + /* + * 2: quadratic rehash + */ + for (i = 1; i < fs->fs_ncg; i *= 2) { + cg += i; + if (cg >= fs->fs_ncg) + cg -= fs->fs_ncg; + result = (*allocator)(np, cg, 0, size); + if (result) + return (result); + } + /* + * 3: brute force search + * Note that we start at i == 2, since 0 was checked initially, + * and 1 is always checked in the quadratic rehash. + */ + cg = (icg + 2) % fs->fs_ncg; + for (i = 2; i < fs->fs_ncg; i++) { + result = (*allocator)(np, cg, 0, size); + if (result) + return (result); + cg++; + if (cg == fs->fs_ncg) + cg = 0; + } + return 0; +} + +/* + * Determine whether a fragment can be extended. + * + * Check to see if the necessary fragments are available, and + * if they are, allocate them. + */ +static daddr_t +ffs_fragextend(struct node *np, + int cg, + long bprev, + int osize, + int nsize) +{ + register struct fs *fs; + struct cg *cgp; + long bno; + int frags, bbase; + int i; + int releasecg; + + fs = sblock; + if (csum[cg].cs_nffree < numfrags(fs, nsize - osize)) + return 0; + frags = numfrags(fs, nsize); + bbase = fragnum(fs, bprev); + if (bbase > fragnum(fs, (bprev + frags - 1))) { + /* cannot extend across a block boundary */ + return 0; + } +#if 0 /* Wrong for GNU Hurd ufs */ + error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), + (int)fs->fs_cgsize, NOCRED, &bp); + if (error) { + brelse(bp); + return (NULL); + } + cgp = (struct cg *)bp->b_data; +#else + releasecg = read_cg (cg, &cgp); +#endif + if (!cg_chkmagic(cgp)) { +/* brelse(bp); */ + if (releasecg) + release_cg (cgp); + return 0; + } + cgp->cg_time = diskfs_mtime->seconds; + bno = dtogd(fs, bprev); + for (i = numfrags(fs, osize); i < frags; i++) + if (isclr(cg_blksfree(cgp), bno + i)) { +/* brelse(bp); */ + if (releasecg) + release_cg (cgp); + return 0; + } + /* + * the current fragment can be extended + * deduct the count on fragment being extended into + * increase the count on the remaining fragment (if any) + * allocate the extended piece + */ + for (i = frags; i < fs->fs_frag - bbase; i++) + if (isclr(cg_blksfree(cgp), bno + i)) + break; + cgp->cg_frsum[i - numfrags(fs, osize)]--; + if (i != frags) + cgp->cg_frsum[i - frags]++; + for (i = numfrags(fs, osize); i < frags; i++) { + clrbit(cg_blksfree(cgp), bno + i); + cgp->cg_cs.cs_nffree--; + fs->fs_cstotal.cs_nffree--; + csum[cg].cs_nffree--; + } + if (releasecg) + release_cg (cgp); + record_poke (cgp, sblock->fs_cgsize); + csum_dirty = 1; + sblock_dirty = 1; + fs->fs_fmod = 1; +/* bdwrite(bp); */ + return (bprev); +} + +/* + * Determine whether a block can be allocated. + * + * Check to see if a block of the appropriate size is available, + * and if it is, allocate it. + */ +static u_long +ffs_alloccg(struct node *np, + int cg, + daddr_t bpref, + int size) +{ + register struct fs *fs; + struct cg *cgp; + register int i; + int bno, frags, allocsiz; + int releasecg; + + fs = sblock; + if (csum[cg].cs_nbfree == 0 && size == fs->fs_bsize) + return 0; +#if 0 /* Not this way in GNU Hurd ufs */ + error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), + (int)fs->fs_cgsize, NOCRED, &bp); + if (error) { + brelse(bp); + return (NULL); + } + cgp = (struct cg *)bp->b_data; +#else + releasecg = read_cg (cg, &cgp); +#endif + if (!cg_chkmagic(cgp) || + (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) { +/* brelse(bp); */ + if (releasecg) + release_cg (cgp); + return 0; + } + cgp->cg_time = diskfs_mtime->seconds; + if (size == fs->fs_bsize) { + bno = ffs_alloccgblk(fs, cgp, bpref); +/* bdwrite(bp); */ + if (releasecg) + release_cg (cgp); + return (bno); + } + /* + * check to see if any fragments are already available + * allocsiz is the size which will be allocated, hacking + * it down to a smaller size if necessary + */ + frags = numfrags(fs, size); + for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++) + if (cgp->cg_frsum[allocsiz] != 0) + break; + if (allocsiz == fs->fs_frag) { + /* + * no fragments were available, so a block will be + * allocated, and hacked up + */ + if (cgp->cg_cs.cs_nbfree == 0) { +/* brelse(bp); */ + if (releasecg) + release_cg (cgp); + return 0; + } + bno = ffs_alloccgblk(fs, cgp, bpref); + bpref = dtogd(fs, bno); + for (i = frags; i < fs->fs_frag; i++) + setbit(cg_blksfree(cgp), bpref + i); + i = fs->fs_frag - frags; + cgp->cg_cs.cs_nffree += i; + fs->fs_cstotal.cs_nffree += i; + csum[cg].cs_nffree += i; + fs->fs_fmod = 1; + cgp->cg_frsum[i]++; + + if (releasecg) + release_cg (cgp); + record_poke (cgp, sblock->fs_cgsize); + csum_dirty = 1; + sblock_dirty = 1; +/* bdwrite(bp); */ + return (bno); + } + bno = ffs_mapsearch(fs, cgp, bpref, allocsiz); + if (bno < 0) { +/* brelse(bp); */ + if (releasecg) + release_cg (cgp); + return 0; + } + for (i = 0; i < frags; i++) + clrbit(cg_blksfree(cgp), bno + i); + cgp->cg_cs.cs_nffree -= frags; + fs->fs_cstotal.cs_nffree -= frags; + csum[cg].cs_nffree -= frags; + fs->fs_fmod = 1; + cgp->cg_frsum[allocsiz]--; + if (frags != allocsiz) + cgp->cg_frsum[allocsiz - frags]++; + if (releasecg) + release_cg (cgp); + record_poke (cgp, sblock->fs_cgsize); + csum_dirty = 1; + sblock_dirty = 1; +/* bdwrite(bp); */ + return (cg * fs->fs_fpg + bno); +} + +/* + * Allocate a block in a cylinder group. + * + * This algorithm implements the following policy: + * 1) allocate the requested block. + * 2) allocate a rotationally optimal block in the same cylinder. + * 3) allocate the next available block on the block rotor for the + * specified cylinder group. + * Note that this routine only allocates fs_bsize blocks; these + * blocks may be fragmented by the routine that allocates them. + */ +static daddr_t +ffs_alloccgblk(register struct fs *fs, + register struct cg *cgp, + daddr_t bpref) +{ + daddr_t bno, blkno; + int cylno, pos, delta; + short *cylbp; + register int i; + + if (bpref == 0 || dtog(fs, bpref) != cgp->cg_cgx) { + bpref = cgp->cg_rotor; + goto norot; + } + bpref = blknum(fs, bpref); + bpref = dtogd(fs, bpref); + /* + * if the requested block is available, use it + */ + if (ffs_isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bpref))) { + bno = bpref; + goto gotit; + } + /* + * check for a block available on the same cylinder + */ + cylno = cbtocylno(fs, bpref); + if (cg_blktot(cgp)[cylno] == 0) + goto norot; + if (fs->fs_cpc == 0) { + /* + * Block layout information is not available. + * Leaving bpref unchanged means we take the + * next available free block following the one + * we just allocated. Hopefully this will at + * least hit a track cache on drives of unknown + * geometry (e.g. SCSI). + */ + goto norot; + } + /* + * check the summary information to see if a block is + * available in the requested cylinder starting at the + * requested rotational position and proceeding around. + */ + cylbp = cg_blks(fs, cgp, cylno); + pos = cbtorpos(fs, bpref); + for (i = pos; i < fs->fs_nrpos; i++) + if (cylbp[i] > 0) + break; + if (i == fs->fs_nrpos) + for (i = 0; i < pos; i++) + if (cylbp[i] > 0) + break; + if (cylbp[i] > 0) { + /* + * found a rotational position, now find the actual + * block. A panic if none is actually there. + */ + pos = cylno % fs->fs_cpc; + bno = (cylno - pos) * fs->fs_spc / NSPB(fs); + assert (fs_postbl(fs, pos)[i] != -1); + for (i = fs_postbl(fs, pos)[i];; ) { + if (ffs_isblock(fs, cg_blksfree(cgp), bno + i)) { + bno = blkstofrags(fs, (bno + i)); + goto gotit; + } + delta = fs_rotbl(fs)[i]; + if (delta <= 0 || + delta + i > fragstoblks(fs, fs->fs_fpg)) + break; + i += delta; + } + printf("pos = %d, i = %d, fs = %s\n", pos, i, fs->fs_fsmnt); + assert (0); + } +norot: + /* + * no blocks in the requested cylinder, so take next + * available one in this cylinder group. + */ + bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag); + if (bno < 0) + return 0; + cgp->cg_rotor = bno; +gotit: + blkno = fragstoblks(fs, bno); + ffs_clrblock(fs, cg_blksfree(cgp), (long)blkno); + ffs_clusteracct(fs, cgp, blkno, -1); + cgp->cg_cs.cs_nbfree--; + fs->fs_cstotal.cs_nbfree--; + csum[cgp->cg_cgx].cs_nbfree--; + cylno = cbtocylno(fs, bno); + cg_blks(fs, cgp, cylno)[cbtorpos(fs, bno)]--; + cg_blktot(cgp)[cylno]--; + fs->fs_fmod = 1; + record_poke (cgp, sblock->fs_cgsize); + csum_dirty = 1; + sblock_dirty = 1; + return (cgp->cg_cgx * fs->fs_fpg + bno); +} + +#if 0 /* Not needed in GNU Hurd ufs (yet?) */ +/* + * Determine whether a cluster can be allocated. + * + * We do not currently check for optimal rotational layout if there + * are multiple choices in the same cylinder group. Instead we just + * take the first one that we find following bpref. + */ +static daddr_t +ffs_clusteralloc(ip, cg, bpref, len) + struct inode *ip; + int cg; + daddr_t bpref; + int len; +{ + register struct fs *fs; + register struct cg *cgp; + struct buf *bp; + int i, run, bno, bit, map; + u_char *mapp; + + fs = ip->i_fs; + if (fs->fs_cs(fs, cg).cs_nbfree < len) + return (NULL); + if (bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize, + NOCRED, &bp)) + goto fail; + cgp = (struct cg *)bp->b_data; + if (!cg_chkmagic(cgp)) + goto fail; + /* + * Check to see if a cluster of the needed size (or bigger) is + * available in this cylinder group. + */ + for (i = len; i <= fs->fs_contigsumsize; i++) + if (cg_clustersum(cgp)[i] > 0) + break; + if (i > fs->fs_contigsumsize) + goto fail; + /* + * Search the cluster map to find a big enough cluster. + * We take the first one that we find, even if it is larger + * than we need as we prefer to get one close to the previous + * block allocation. We do not search before the current + * preference point as we do not want to allocate a block + * that is allocated before the previous one (as we will + * then have to wait for another pass of the elevator + * algorithm before it will be read). We prefer to fail and + * be recalled to try an allocation in the next cylinder group. + */ + if (dtog(fs, bpref) != cg) + bpref = 0; + else + bpref = fragstoblks(fs, dtogd(fs, blknum(fs, bpref))); + mapp = &cg_clustersfree(cgp)[bpref / NBBY]; + map = *mapp++; + bit = 1 << (bpref % NBBY); + for (run = 0, i = bpref; i < cgp->cg_nclusterblks; i++) { + if ((map & bit) == 0) { + run = 0; + } else { + run++; + if (run == len) + break; + } + if ((i & (NBBY - 1)) != (NBBY - 1)) { + bit <<= 1; + } else { + map = *mapp++; + bit = 1; + } + } + if (i == cgp->cg_nclusterblks) + goto fail; + /* + * Allocate the cluster that we have found. + */ + bno = cg * fs->fs_fpg + blkstofrags(fs, i - run + 1); + len = blkstofrags(fs, len); + for (i = 0; i < len; i += fs->fs_frag) + if (ffs_alloccgblk(fs, cgp, bno + i) != bno + i) + panic("ffs_clusteralloc: lost block"); + brelse(bp); + return (bno); + +fail: + brelse(bp); + return (0); +} +#endif + +/* + * Determine whether an inode can be allocated. + * + * Check to see if an inode is available, and if it is, + * allocate it using the following policy: + * 1) allocate the requested inode. + * 2) allocate the next available inode after the requested + * inode in the specified cylinder group. + */ +static u_long +ffs_nodealloccg(struct node *np, + int cg, + daddr_t ipref, + int mode) +{ + register struct fs *fs; + struct cg *cgp; + int start, len, loc, map, i; + int releasecg; + + fs = sblock; + if (csum[cg].cs_nifree == 0) + return 0; +#if 0 /* Not this way in GNU Hurd ufs */ + error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), + (int)fs->fs_cgsize, NOCRED, &bp); + if (error) { + brelse(bp); + return (NULL); + } + cgp = (struct cg *)bp->b_data; +#else + releasecg = read_cg (cg, &cgp); +#endif + if (!cg_chkmagic(cgp) || cgp->cg_cs.cs_nifree == 0) { +/* brelse(bp); */ + if (releasecg) + release_cg (cgp); + return 0; + } + cgp->cg_time = diskfs_mtime->seconds; + if (ipref) { + ipref %= fs->fs_ipg; + if (isclr(cg_inosused(cgp), ipref)) + goto gotit; + } + start = cgp->cg_irotor / NBBY; + len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY); + loc = skpc(0xff, len, &cg_inosused(cgp)[start]); + if (loc == 0) { + len = start + 1; + start = 0; + loc = skpc(0xff, len, &cg_inosused(cgp)[0]); + assert (loc != 0); + } + i = start + len - loc; + map = cg_inosused(cgp)[i]; + ipref = i * NBBY; + for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) { + if ((map & i) == 0) { + cgp->cg_irotor = ipref; + goto gotit; + } + } + assert (0); + /* NOTREACHED */ +gotit: + setbit(cg_inosused(cgp), ipref); + cgp->cg_cs.cs_nifree--; + fs->fs_cstotal.cs_nifree--; + csum[cg].cs_nifree--; + fs->fs_fmod = 1; + if ((mode & IFMT) == IFDIR) { + cgp->cg_cs.cs_ndir++; + fs->fs_cstotal.cs_ndir++; + csum[cg].cs_ndir++; + } + if (releasecg) + release_cg (cgp); + record_poke (cgp, sblock->fs_cgsize); + csum_dirty = 1; + sblock_dirty = 1; +/* bdwrite(bp); */ + return (cg * fs->fs_ipg + ipref); +} + +/* + * Free a block or fragment. + * + * The specified block or fragment is placed back in the + * free map. If a fragment is deallocated, a possible + * block reassembly is checked. + */ +void +ffs_blkfree(register struct node *np, + daddr_t bno, + long size) +{ + register struct fs *fs; + struct cg *cgp; + daddr_t blkno; + int i, cg, blk, frags, bbase; + int releasecg; + + fs = sblock; + assert ((u_int)size <= fs->fs_bsize && !fragoff (fs, size)); + cg = dtog(fs, bno); + if ((u_int)bno >= fs->fs_size) { + printf("bad block %ld, ino %Ld\n", bno, np->dn->number); +/* ffs_fserr(fs, ip->i_uid, "bad block"); */ + return; + } +#if 0 /* Not this way in GNU Hurd ufs */ + error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), + (int)fs->fs_cgsize, NOCRED, &bp); + if (error) { + brelse(bp); + return; + } + cgp = (struct cg *)bp->b_data; +#else + releasecg = read_cg (cg, &cgp); +#endif + if (!cg_chkmagic(cgp)) { +/* brelse(bp); */ + if (releasecg) + release_cg (cgp); + return; + } + cgp->cg_time = diskfs_mtime->seconds; + bno = dtogd(fs, bno); + if (size == fs->fs_bsize) { + blkno = fragstoblks(fs, bno); + assert (!ffs_isblock(fs, cg_blksfree (cgp), blkno)); + ffs_setblock(fs, cg_blksfree(cgp), blkno); + ffs_clusteracct(fs, cgp, blkno, 1); + cgp->cg_cs.cs_nbfree++; + fs->fs_cstotal.cs_nbfree++; + csum[cg].cs_nbfree++; + i = cbtocylno(fs, bno); + cg_blks(fs, cgp, i)[cbtorpos(fs, bno)]++; + cg_blktot(cgp)[i]++; + } else { + bbase = bno - fragnum(fs, bno); + /* + * decrement the counts associated with the old frags + */ + blk = blkmap(fs, cg_blksfree(cgp), bbase); + ffs_fragacct(fs, blk, cgp->cg_frsum, -1); + /* + * deallocate the fragment + */ + frags = numfrags(fs, size); + for (i = 0; i < frags; i++) { + assert (!isset (cg_blksfree(cgp), bno + i)); + setbit(cg_blksfree(cgp), bno + i); + } + cgp->cg_cs.cs_nffree += i; + fs->fs_cstotal.cs_nffree += i; + csum[cg].cs_nffree += i; + /* + * add back in counts associated with the new frags + */ + blk = blkmap(fs, cg_blksfree(cgp), bbase); + ffs_fragacct(fs, blk, cgp->cg_frsum, 1); + /* + * if a complete block has been reassembled, account for it + */ + blkno = fragstoblks(fs, bbase); + if (ffs_isblock(fs, cg_blksfree(cgp), blkno)) { + cgp->cg_cs.cs_nffree -= fs->fs_frag; + fs->fs_cstotal.cs_nffree -= fs->fs_frag; + csum[cg].cs_nffree -= fs->fs_frag; + ffs_clusteracct(fs, cgp, blkno, 1); + cgp->cg_cs.cs_nbfree++; + fs->fs_cstotal.cs_nbfree++; + csum[cg].cs_nbfree++; + i = cbtocylno(fs, bbase); + cg_blks(fs, cgp, i)[cbtorpos(fs, bbase)]++; + cg_blktot(cgp)[i]++; + } + } + if (releasecg) + release_cg (cgp); + record_poke (cgp, sblock->fs_cgsize); + csum_dirty = 1; + sblock_dirty = 1; + fs->fs_fmod = 1; + alloc_sync (np); +/* bdwrite(bp); */ +} + +/* + * Free an inode. + * + * The specified inode is placed back in the free map. + */ +/* Implement diskfs call back diskfs_free_node (described in + <hurd/diskfs.h>. This was called ffs_vfree in BSD. */ +void +diskfs_free_node (struct node *np, mode_t mode) +{ + register struct fs *fs; + struct cg *cgp; + ino_t ino = np->dn->number; + int cg; + int releasecg; + + fs = sblock; + assert (ino < fs->fs_ipg * fs->fs_ncg); + cg = ino_to_cg(fs, ino); +#if 0 /* Not this way in GNU Hurd ufs */ + error = bread(pip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), + (int)fs->fs_cgsize, NOCRED, &bp); + if (error) { + brelse(bp); + return (0); + } + cgp = (struct cg *)bp->b_data; +#else + releasecg = read_cg (cg, &cgp); +#endif + if (!cg_chkmagic(cgp)) { +/* brelse(bp); */ + if (releasecg) + release_cg (cgp); + return; + } + cgp->cg_time = diskfs_mtime->seconds; + ino %= fs->fs_ipg; + if (isclr(cg_inosused(cgp), ino)) { +/* printf("dev = 0x%x, ino = %Ld, fs = %s\n", + pip->i_dev, ino, fs->fs_fsmnt); */ + assert (diskfs_readonly); + } + clrbit(cg_inosused(cgp), ino); + if (ino < cgp->cg_irotor) + cgp->cg_irotor = ino; + cgp->cg_cs.cs_nifree++; + fs->fs_cstotal.cs_nifree++; + csum[cg].cs_nifree++; + if ((mode & IFMT) == IFDIR) { + cgp->cg_cs.cs_ndir--; + fs->fs_cstotal.cs_ndir--; + csum[cg].cs_ndir--; + } + if (releasecg) + release_cg (cgp); + record_poke (cgp, sblock->fs_cgsize); + csum_dirty = 1; + sblock_dirty = 1; + fs->fs_fmod = 1; + alloc_sync (np); +/* bdwrite(bp); */ +} + +/* + * Find a block of the specified size in the specified cylinder group. + * + * It is a panic if a request is made to find a block if none are + * available. + */ +static daddr_t +ffs_mapsearch(register struct fs *fs, + register struct cg *cgp, + daddr_t bpref, + int allocsiz) +{ + daddr_t bno; + int start, len, loc, i; + int blk, field, subfield, pos; + + /* + * find the fragment by searching through the free block + * map for an appropriate bit pattern + */ + if (bpref) + start = dtogd(fs, bpref) / NBBY; + else + start = cgp->cg_frotor / NBBY; + len = howmany(fs->fs_fpg, NBBY) - start; + loc = scanc((u_int)len, (u_char *)&cg_blksfree(cgp)[start], + (u_char *)fragtbl[fs->fs_frag], + (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY)))); + if (loc == 0) { + len = start + 1; + start = 0; + loc = scanc((u_int)len, (u_char *)&cg_blksfree(cgp)[0], + (u_char *)fragtbl[fs->fs_frag], + (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY)))); + assert (loc); + + } + bno = (start + len - loc) * NBBY; + cgp->cg_frotor = bno; + /* + * found the byte in the map + * sift through the bits to find the selected frag + */ + for (i = bno + NBBY; bno < i; bno += fs->fs_frag) { + blk = blkmap(fs, cg_blksfree(cgp), bno); + blk <<= 1; + field = around[allocsiz]; + subfield = inside[allocsiz]; + for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) { + if ((blk & field) == subfield) + return (bno + pos); + field <<= 1; + subfield <<= 1; + } + } + assert (0); + return (-1); +} + +/* + * Update the cluster map because of an allocation or free. + * + * Cnt == 1 means free; cnt == -1 means allocating. + */ +static void +ffs_clusteracct(struct fs *fs, + struct cg *cgp, + daddr_t blkno, + int cnt) +{ + long *sump; + u_char *freemapp, *mapp; + int i, start, end, forw, back, map, bit; + + if (fs->fs_contigsumsize <= 0) + return; + freemapp = cg_clustersfree(cgp); + sump = cg_clustersum(cgp); + /* + * Allocate or clear the actual block. + */ + if (cnt > 0) + setbit(freemapp, blkno); + else + clrbit(freemapp, blkno); + /* + * Find the size of the cluster going forward. + */ + start = blkno + 1; + end = start + fs->fs_contigsumsize; + if (end >= cgp->cg_nclusterblks) + end = cgp->cg_nclusterblks; + mapp = &freemapp[start / NBBY]; + map = *mapp++; + bit = 1 << (start % NBBY); + for (i = start; i < end; i++) { + if ((map & bit) == 0) + break; + if ((i & (NBBY - 1)) != (NBBY - 1)) { + bit <<= 1; + } else { + map = *mapp++; + bit = 1; + } + } + forw = i - start; + /* + * Find the size of the cluster going backward. + */ + start = blkno - 1; + end = start - fs->fs_contigsumsize; + if (end < 0) + end = -1; + mapp = &freemapp[start / NBBY]; + map = *mapp--; + bit = 1 << (start % NBBY); + for (i = start; i > end; i--) { + if ((map & bit) == 0) + break; + if ((i & (NBBY - 1)) != 0) { + bit >>= 1; + } else { + map = *mapp--; + bit = 1 << (NBBY - 1); + } + } + back = start - i; + /* + * Account for old cluster and the possibly new forward and + * back clusters. + */ + i = back + forw + 1; + if (i > fs->fs_contigsumsize) + i = fs->fs_contigsumsize; + sump[i] += cnt; + if (back > 0) + sump[back] -= cnt; + if (forw > 0) + sump[forw] -= cnt; +} + +#if 0 +/* + * Fserr prints the name of a file system with an error diagnostic. + * + * The form of the error message is: + * fs: error message + */ +static void +ffs_fserr(fs, uid, cp) + struct fs *fs; + u_int uid; + char *cp; +{ + + log(LOG_ERR, "uid %d on %s: %s\n", uid, fs->fs_fsmnt, cp); +} +#endif diff --git a/ufs/bmap.c b/ufs/bmap.c new file mode 100644 index 00000000..1a138f39 --- /dev/null +++ b/ufs/bmap.c @@ -0,0 +1,120 @@ +/* Interpretation of indirect block structure + Copyright (C) 1994, 1995, 1996 Free Software Foundation, Inc. + Written by Michael I. Bushnell. + + This file is part of the GNU Hurd. + + The GNU Hurd is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2, or (at + your option) any later version. + + The GNU Hurd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include "ufs.h" + +/* For logical block number LBN of file NP, look it the block address, + giving the "path" of indirect blocks to the file, starting + with the least indirect. Fill *INDIRS with information for + the block. */ +error_t +fetch_indir_spec (struct node *np, volatile daddr_t lbn, + struct iblock_spec *indirs) +{ + struct dinode *di = dino (np->dn->number); + error_t err; + daddr_t *siblock; + + err = diskfs_catch_exception (); + if (err) + return err; + + indirs[0].offset = -2; + indirs[1].offset = -2; + indirs[2].offset = -2; + indirs[3].offset = -2; + + if (lbn < NDADDR) + { + if (lbn >= 0) + { + indirs[0].bno = read_disk_entry (di->di_db[lbn]); + indirs[0].offset = -1; + } + + diskfs_end_catch_exception (); + return 0; + } + + lbn -= NDADDR; + + indirs[0].offset = lbn % NINDIR (sblock); + + if (lbn / NINDIR (sblock)) + { + /* We will use the double indirect block */ + int ibn; + daddr_t *diblock; + + ibn = lbn / NINDIR (sblock) - 1; + + indirs[1].offset = ibn % NINDIR (sblock); + + /* We don't support triple indirect blocks, but this + is where we'd do it. */ + assert (!(ibn / NINDIR (sblock))); + + indirs[2].offset = -1; + indirs[2].bno = read_disk_entry (di->di_ib[INDIR_DOUBLE]); + + if (indirs[2].bno) + { + diblock = indir_block (indirs[2].bno); + indirs[1].bno = read_disk_entry (diblock[indirs[1].offset]); + } + else + indirs[1].bno = 0; + } + else + { + indirs[1].offset = -1; + indirs[1].bno = read_disk_entry (di->di_ib[INDIR_SINGLE]); + } + + if (indirs[1].bno) + { + siblock = indir_block (indirs[1].bno); + indirs[0].bno = read_disk_entry (siblock[indirs[0].offset]); + } + else + indirs[0].bno = 0; + + diskfs_end_catch_exception (); + return 0; +} + + +/* Mark indirect block BNO as dirty on node NP's list. NP must + be locked. */ +void +mark_indir_dirty (struct node *np, daddr_t bno) +{ + struct dirty_indir *d; + + for (d = np->dn->dirty; d; d = d->next) + if (d->bno == bno) + return; + + d = malloc (sizeof (struct dirty_indir)); + d->bno = bno; + d->next = np->dn->dirty; + np->dn->dirty = d; +} + diff --git a/ufs/consts.c b/ufs/consts.c new file mode 100644 index 00000000..69221233 --- /dev/null +++ b/ufs/consts.c @@ -0,0 +1,33 @@ +/* Various constants wanted by the diskfs library + Copyright (C) 1994, 1995, 1996, 1999 Free Software Foundation + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2, or (at + your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include "ufs.h" +#include "dir.h" +#include <version.h> + +int diskfs_link_max = LINK_MAX; +int diskfs_name_max = MAXNAMLEN; +int diskfs_maxsymlinks = 8; +int diskfs_shortcut_symlink = 1; +int diskfs_shortcut_chrdev = 1; +int diskfs_shortcut_blkdev = 1; +int diskfs_shortcut_fifo = 1; +int diskfs_shortcut_ifsock = 1; +char *diskfs_server_name = "ufs"; +char *diskfs_server_version = HURD_VERSION; +char *diskfs_extra_version = "GNU Hurd"; +int diskfs_synchronous = 0; diff --git a/ufs/dinode.h b/ufs/dinode.h new file mode 100644 index 00000000..00be0d94 --- /dev/null +++ b/ufs/dinode.h @@ -0,0 +1,137 @@ +/* + Copyright (C) 1994 Free Software Foundation + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2, or (at + your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* + * Copyright (c) 1982, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * (c) UNIX System Laboratories, Inc. + * All or some portions of this file are derived from material licensed + * to the University of California by American Telephone and Telegraph + * Co. or Unix System Laboratories, Inc. and are reproduced herein with + * the permission of UNIX System Laboratories, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)dinode.h 8.3 (Berkeley) 1/21/94 + */ + +/* + * The root inode is the root of the file system. Inode 0 can't be used for + * normal purposes and historically bad blocks were linked to inode 1, thus + * the root inode is 2. (Inode 1 is no longer used for this purpose, however + * numerous dump tapes make this assumption, so we are stuck with it). + */ +#define ROOTINO ((ino_t)2) + +/* + * A dinode contains all the meta-data associated with a UFS file. + * This structure defines the on-disk format of a dinode. + */ + +#define NDADDR 12 /* Direct addresses in inode. */ +#define NIADDR 3 /* Indirect addresses in inode. */ + +/* Maximum value of di_nlink field. */ +#define LINK_MAX 32767 + +/* Indexes into di_ib */ +#define INDIR_SINGLE 0 +#define INDIR_DOUBLE 1 +#define INDIR_TRIPLE 2 /* NOT SUPPORTED */ + +struct dinode { + u_short di_model; /* 0: IFMT and permissions. */ + short di_nlink; /* 2: File link count. */ + union + { + u_long diu_author; /* 4: File author */ + u_short diu_oldids[2]; /* Old format uid and gid */ + } di_u; + u_quad_t di_size; /* 8: File byte count. */ + struct timespec di_atime; /* 16: Last access time. */ + struct timespec di_mtime; /* 24: Last modified time. */ + struct timespec di_ctime; /* 32: Last inode change time. */ + daddr_t di_db[NDADDR]; /* 40: Direct disk blocks. */ + daddr_t di_ib[NIADDR]; /* 88: Indirect disk blocks. */ + u_long di_flags; /* 100: Status flags (chflags). */ + long di_blocks; /* 104: Blocks actually held. */ + long di_gen; /* 108: Generation number. */ + u_long di_uid; /* 112: File owner. */ + u_long di_gid; /* 116: File group. */ + u_short di_modeh; /* 120: Mode high bits */ + u_short di_spare; /* 122: unused */ + long di_trans; /* 124: filesystem translator */ +}; + +#define di_author di_u.diu_author /* GNU extension */ +#define di_ouid di_u.diu_oldids[0] +#define di_ogid di_u.diu_oldids[1] + +/* + * The di_db fields may be overlaid with other information for + * file types that do not have associated disk storage. Block + * and character devices overlay the first data block with their + * dev_t value. Short symbolic links place their path in the + * di_db area. + */ +#define di_rdev di_db[0] +#define di_shortlink di_db +#define MAXSYMLINKLEN ((NDADDR + NIADDR) * sizeof(daddr_t)) + +/* File modes. */ +#define IEXEC 0000100 /* Executable. */ +#define IWRITE 0000200 /* Writeable. */ +#define IREAD 0000400 /* Readable. */ +#define ISVTX 0001000 /* Sticky bit. */ +#define ISGID 0002000 /* Set-gid. */ +#define ISUID 0004000 /* Set-uid. */ + +/* File types. */ +#define IFMT 0170000 /* Mask of file type. */ +#define IFIFO 0010000 /* Named pipe (fifo). */ +#define IFCHR 0020000 /* Character device. */ +#define IFDIR 0040000 /* Directory file. */ +#define IFBLK 0060000 /* Block device. */ +#define IFREG 0100000 /* Regular file. */ +#define IFLNK 0120000 /* Symbolic link. */ +#define IFSOCK 0140000 /* UNIX domain socket. */ diff --git a/ufs/dir.c b/ufs/dir.c new file mode 100644 index 00000000..7a8cfa55 --- /dev/null +++ b/ufs/dir.c @@ -0,0 +1,988 @@ +/* Directory management routines + + Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2002, 2007 + Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2, or (at + your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include "ufs.h" +#include "dir.h" + +#include <string.h> +#include <stdio.h> +#include <dirent.h> + +#undef d_ino + +enum slot_status +{ + /* This means we haven't yet found room for a new entry. */ + LOOKING, + + /* This means that the specified entry is free and should be used. */ + TAKE, + + /* This means that the specified entry has enough room at the end + to hold the new entry. */ + SHRINK, + + /* This means that there is enough space in the block, but not in + any one single entry, so they all have to be shifted to make + room. */ + COMPRESS, + + /* This means that the directory will have to be grown to hold the + entry. */ + EXTEND, + + /* For removal and rename, this means that this is the location + of the entry found. */ + HERE_TIS, +}; + +struct dirstat +{ + /* Type of followp operation expected */ + enum lookup_type type; + + /* One of the statuses above */ + enum slot_status stat; + + /* Mapped address and length of directory */ + vm_address_t mapbuf; + vm_size_t mapextent; + + /* Index of this directory block. */ + int idx; + + /* For stat COMPRESS, this is the address (inside mapbuf) + of the first direct in the directory block to be compressed. */ + /* For stat HERE_TIS, SHRINK, and TAKE, this is the entry referenced. */ + struct directory_entry *entry; + + /* For stat HERE_TIS, type REMOVE, this is the address of the immediately + previous direct in this directory block, or zero if this is the first. */ + struct directory_entry *preventry; + + /* For stat COMPRESS, this is the number of bytes needed to be copied + in order to undertake the compression. */ + size_t nbytes; +}; + +const size_t diskfs_dirstat_size = sizeof (struct dirstat); + +/* Initialize DS such that diskfs_drop_dirstat will ignore it. */ +void +diskfs_null_dirstat (struct dirstat *ds) +{ + ds->type = LOOKUP; +} + +static error_t +dirscanblock (vm_address_t blockoff, struct node *dp, int idx, + const char *name, int namelen, enum lookup_type type, + struct dirstat *ds, ino_t *inum); + +/* Implement the diskfs_lookup from the diskfs library. See + <hurd/diskfs.h> for the interface specification. */ +error_t +diskfs_lookup_hard (struct node *dp, const char *name, enum lookup_type type, + struct node **npp, struct dirstat *ds, struct protid *cred) +{ + error_t err; + ino_t inum; + int namelen; + int spec_dotdot; + struct node *np = 0; + int retry_dotdot = 0; + memory_object_t memobj; + vm_prot_t prot = + (type == LOOKUP) ? VM_PROT_READ : (VM_PROT_READ | VM_PROT_WRITE); + vm_address_t buf = 0; + vm_size_t buflen = 0; + int blockaddr; + int idx, lastidx; + int looped; + + if ((type == REMOVE) || (type == RENAME)) + assert (npp); + + if (npp) + *npp = 0; + + spec_dotdot = type & SPEC_DOTDOT; + type &= ~SPEC_DOTDOT; + + namelen = strlen (name); + + if (namelen > MAXNAMLEN) + { + if (ds) + diskfs_null_dirstat (ds); + return ENAMETOOLONG; + } + + try_again: + if (ds) + { + ds->type = LOOKUP; + ds->mapbuf = 0; + ds->mapextent = 0; + } + if (buf) + { + munmap ((caddr_t) buf, buflen); + buf = 0; + } + if (ds && (type == CREATE || type == RENAME)) + ds->stat = LOOKING; + + /* Map in the directory contents. */ + memobj = diskfs_get_filemap (dp, prot); + + if (memobj == MACH_PORT_NULL) + return errno; + + buf = 0; + /* We allow extra space in case we have to do an EXTEND. */ + buflen = round_page (dp->dn_stat.st_size + DIRBLKSIZ); + err = vm_map (mach_task_self (), + &buf, buflen, 0, 1, memobj, 0, 0, prot, prot, 0); + mach_port_deallocate (mach_task_self (), memobj); + + inum = 0; + + diskfs_set_node_atime (dp); + + /* Start the lookup at DP->dn->dir_idx. */ + idx = dp->dn->dir_idx; + if (idx * DIRBLKSIZ > dp->dn_stat.st_size) + idx = 0; /* just in case */ + blockaddr = buf + idx * DIRBLKSIZ; + looped = (idx == 0); + lastidx = idx; + if (lastidx == 0) + lastidx = dp->dn_stat.st_size / DIRBLKSIZ; + + while (!looped || idx < lastidx) + { + err = dirscanblock (blockaddr, dp, idx, name, namelen, type, ds, &inum); + if (!err) + { + dp->dn->dir_idx = idx; + break; + } + if (err != ENOENT) + { + munmap ((caddr_t) buf, buflen); + return err; + } + + blockaddr += DIRBLKSIZ; + idx++; + if (blockaddr - buf >= dp->dn_stat.st_size && !looped) + { + /* We've gotten to the end; start back at the beginning */ + looped = 1; + blockaddr = buf; + idx = 0; + } + } + + diskfs_set_node_atime (dp); + if (diskfs_synchronous) + diskfs_node_update (dp, 1); + + /* If err is set here, it's ENOENT, and we don't want to + think about that as an error yet. */ + err = 0; + + if (inum && npp) + { + if (namelen != 2 || name[0] != '.' || name[1] != '.') + { + if (inum == dp->dn->number) + { + np = dp; + diskfs_nref (np); + } + else + { + err = diskfs_cached_lookup (inum, &np); + if (err) + goto out; + } + } + + /* We are looking up .. */ + /* Check to see if this is the root of the filesystem. */ + else if (dp->dn->number == 2) + { + err = EAGAIN; + goto out; + } + + /* We can't just do diskfs_cached_lookup, because we would then deadlock. + So we do this. Ick. */ + else if (retry_dotdot) + { + /* Check to see that we got the same answer as last time. */ + if (inum != retry_dotdot) + { + /* Drop what we *thought* was .. (but isn't any more) and + try *again*. */ + diskfs_nput (np); + mutex_unlock (&dp->lock); + err = diskfs_cached_lookup (inum, &np); + mutex_lock (&dp->lock); + if (err) + goto out; + retry_dotdot = inum; + goto try_again; + } + /* Otherwise, we got it fine and np is already set properly. */ + } + else if (!spec_dotdot) + { + /* Lock them in the proper order, and then + repeat the directory scan to see if this is still + right. */ + mutex_unlock (&dp->lock); + err = diskfs_cached_lookup (inum, &np); + mutex_lock (&dp->lock); + if (err) + goto out; + retry_dotdot = inum; + goto try_again; + } + + /* Here below are the spec dotdot cases. */ + else if (type == RENAME || type == REMOVE) + np = ifind (inum); + + else if (type == LOOKUP) + { + diskfs_nput (dp); + err = diskfs_cached_lookup (inum, &np); + if (err) + goto out; + } + else + assert (0); + } + + if ((type == CREATE || type == RENAME) && !inum && ds && ds->stat == LOOKING) + { + /* We didn't find any room, so mark ds to extend the dir */ + ds->type = CREATE; + ds->stat = EXTEND; + ds->idx = dp->dn_stat.st_size / DIRBLKSIZ; + } + + /* Return to the user; if we can't, release the reference + (and lock) we acquired above. */ + out: + /* Deallocate or save the mapping. */ + if ((err && err != ENOENT) + || !ds + || ds->type == LOOKUP) + { + munmap ((caddr_t) buf, buflen); + if (ds) + ds->type = LOOKUP; /* set to be ignored by drop_dirstat */ + } + else + { + ds->mapbuf = buf; + ds->mapextent = buflen; + } + + if (np) + { + assert (npp); + if (err) + { + if (!spec_dotdot) + { + /* Normal case */ + if (np == dp) + diskfs_nrele (np); + else + diskfs_nput (np); + } + else if (type == RENAME || type == REMOVE) + /* We just did ifind to get np; that allocates + no new references, so we don't have anything to do */ + ; + else if (type == LOOKUP) + /* We did diskfs_cached_lookup */ + diskfs_nput (np); + } + else + *npp = np; + } + + return err ? : inum ? 0 : ENOENT; +} + +/* Scan block at address BLKADDR (of node DP; block index IDX), for + name NAME of length NAMELEN. Args TYPE, DS are as for + diskfs_lookup. If found, set *INUM to the inode number, else + return ENOENT. */ +static error_t +dirscanblock (vm_address_t blockaddr, struct node *dp, int idx, + const char *name, int namelen, enum lookup_type type, + struct dirstat *ds, ino_t *inum) +{ + int nfree = 0; + int needed = 0; + vm_address_t currentoff, prevoff; + struct directory_entry *entry = 0; + int nentries = 0; + size_t nbytes = 0; + int looking = 0; + int countcopies = 0; + int consider_compress = 0; + + if (ds && (ds->stat == LOOKING + || ds->stat == COMPRESS)) + { + looking = 1; + countcopies = 1; + needed = DIRSIZ (namelen); + } + + for (currentoff = blockaddr, prevoff = 0; + currentoff < blockaddr + DIRBLKSIZ; + prevoff = currentoff, currentoff += read_disk_entry (entry->d_reclen)) + { + entry = (struct directory_entry *)currentoff; + + if (!entry->d_reclen + || read_disk_entry (entry->d_reclen) % 4 + || DIRECT_NAMLEN (entry) > MAXNAMLEN + || (currentoff + read_disk_entry (entry->d_reclen) + > blockaddr + DIRBLKSIZ) + || entry->d_name[DIRECT_NAMLEN (entry)] + || DIRSIZ (DIRECT_NAMLEN (entry)) > read_disk_entry (entry->d_reclen) + || memchr (entry->d_name, '\0', DIRECT_NAMLEN (entry))) + { + fprintf (stderr, "Bad directory entry: inode: %Ld offset: %zd\n", + dp->dn->number, currentoff - blockaddr + idx * DIRBLKSIZ); + return ENOENT; + } + + if (looking || countcopies) + { + int thisfree; + + /* Count how much free space this entry has in it. */ + if (entry->d_ino == 0) + thisfree = read_disk_entry (entry->d_reclen); + else + thisfree = (read_disk_entry (entry->d_reclen) + - DIRSIZ (DIRECT_NAMLEN (entry))); + + /* If this isn't at the front of the block, then it will + have to be copied if we do a compression; count the + number of bytes there too. */ + if (countcopies && currentoff != blockaddr) + nbytes += DIRSIZ (DIRECT_NAMLEN (entry)); + + if (ds->stat == COMPRESS && nbytes > ds->nbytes) + /* The previously found compress is better than + this one, so don't bother counting any more. */ + countcopies = 0; + + if (thisfree >= needed) + { + ds->type = CREATE; + ds->stat = read_disk_entry (entry->d_ino) == 0 ? TAKE : SHRINK; + ds->entry = entry; + ds->idx = idx; + looking = countcopies = 0; + } + else + { + nfree += thisfree; + if (nfree >= needed) + consider_compress = 1; + } + } + + if (entry->d_ino) + nentries++; + + if (DIRECT_NAMLEN (entry) == namelen + && entry->d_name[0] == name[0] + && entry->d_ino + && !bcmp (entry->d_name, name, namelen)) + break; + } + + if (consider_compress + && (ds->type == LOOKING + || (ds->type == COMPRESS && ds->nbytes > nbytes))) + { + ds->type = CREATE; + ds->stat = COMPRESS; + ds->entry = (struct directory_entry *) blockaddr; + ds->idx = idx; + ds->nbytes = nbytes; + } + + if (currentoff >= blockaddr + DIRBLKSIZ) + { + int i; + /* The name is not in this block. */ + + /* Because we scanned the entire block, we should write + down how many entries there were. */ + if (!dp->dn->dirents) + { + dp->dn->dirents = malloc ((dp->dn_stat.st_size / DIRBLKSIZ) + * sizeof (int)); + for (i = 0; i < dp->dn_stat.st_size/DIRBLKSIZ; i++) + dp->dn->dirents[i] = -1; + } + /* Make sure the count is correct if there is one now. */ + assert (dp->dn->dirents[idx] == -1 + || dp->dn->dirents[idx] == nentries); + dp->dn->dirents[idx] = nentries; + + return ENOENT; + } + + /* We have found the required name. */ + + if (ds && type == CREATE) + ds->type = LOOKUP; /* it's invalid now */ + else if (ds && (type == REMOVE || type == RENAME)) + { + ds->type = type; + ds->stat = HERE_TIS; + ds->entry = entry; + ds->idx = idx; + ds->preventry = (struct directory_entry *) prevoff; + } + + *inum = read_disk_entry (entry->d_ino); + return 0; +} + +/* Following a lookup call for CREATE, this adds a node to a directory. + DP is the directory to be modified; NAME is the name to be entered; + NP is the node being linked in; DS is the cached information returned + by lookup; CRED describes the user making the call. This call may + only be made if the directory has been held locked continuously since + the preceding lookup call, and only if that call returned ENOENT. */ +error_t +diskfs_direnter_hard(struct node *dp, + const char *name, + struct node *np, + struct dirstat *ds, + struct protid *cred) +{ + struct directory_entry *new; + int namelen = strlen (name); + int needed = DIRSIZ (namelen); + int oldneeded; + vm_address_t fromoff, tooff; + int totfreed; + error_t err; + size_t oldsize = 0; + + assert (ds->type == CREATE); + + dp->dn_set_mtime = 1; + + switch (ds->stat) + { + case TAKE: + /* We are supposed to consume this slot. */ + assert (ds->entry->d_ino == 0 + && read_disk_entry (ds->entry->d_reclen) >= needed); + + write_disk_entry (ds->entry->d_ino, np->dn->number); + DIRECT_NAMLEN (ds->entry) = namelen; + if (direct_symlink_extension) + ds->entry->d_type = IFTODT (np->dn_stat.st_mode); + bcopy (name, ds->entry->d_name, namelen + 1); + + break; + + case SHRINK: + /* We are supposed to take the extra space at the end + of this slot. */ + oldneeded = DIRSIZ (DIRECT_NAMLEN (ds->entry)); + assert (read_disk_entry (ds->entry->d_reclen) - oldneeded >= needed); + + new = (struct directory_entry *) ((vm_address_t) ds->entry + oldneeded); + + write_disk_entry (new->d_ino, np->dn->number); + write_disk_entry (new->d_reclen, + read_disk_entry (ds->entry->d_reclen) - oldneeded); + DIRECT_NAMLEN (new) = namelen; + if (direct_symlink_extension) + new->d_type = IFTODT (np->dn_stat.st_mode); + bcopy (name, new->d_name, namelen + 1); + + write_disk_entry (ds->entry->d_reclen, oldneeded); + + break; + + case COMPRESS: + /* We are supposed to move all the entries to the + front of the block, giving each the minimum + necessary room. This should free up enough space + for the new entry. */ + fromoff = tooff = (vm_address_t) ds->entry; + + while (fromoff < (vm_address_t) ds->entry + DIRBLKSIZ) + { + struct directory_entry *from = (struct directory_entry *)fromoff; + struct directory_entry *to = (struct directory_entry *) tooff; + int fromreclen = read_disk_entry (from->d_reclen); + + if (from->d_ino != 0) + { + assert (fromoff >= tooff); + + bcopy (from, to, fromreclen); + write_disk_entry (to->d_reclen, DIRSIZ (DIRECT_NAMLEN (to))); + + tooff += read_disk_entry (to->d_reclen); + } + fromoff += fromreclen; + } + + totfreed = (vm_address_t) ds->entry + DIRBLKSIZ - tooff; + assert (totfreed >= needed); + + new = (struct directory_entry *) tooff; + write_disk_entry (new->d_ino, np->dn->number); + write_disk_entry (new->d_reclen, totfreed); + DIRECT_NAMLEN (new) = namelen; + if (direct_symlink_extension) + new->d_type = IFTODT (np->dn_stat.st_mode); + bcopy (name, new->d_name, namelen + 1); + break; + + case EXTEND: + /* Extend the file. */ + assert (needed <= DIRBLKSIZ); + + oldsize = dp->dn_stat.st_size; + if ((off_t)(oldsize + DIRBLKSIZ) != dp->dn_stat.st_size + DIRBLKSIZ) + { + /* We can't possibly map the whole directory in. */ + munmap ((caddr_t) ds->mapbuf, ds->mapextent); + return EOVERFLOW; + } + while (oldsize + DIRBLKSIZ > dp->allocsize) + { + err = diskfs_grow (dp, oldsize + DIRBLKSIZ, cred); + if (err) + { + munmap ((caddr_t) ds->mapbuf, ds->mapextent); + return err; + } + } + + new = (struct directory_entry *) (ds->mapbuf + oldsize); + + dp->dn_stat.st_size = oldsize + DIRBLKSIZ; + dp->dn_set_ctime = 1; + + write_disk_entry (new->d_ino, np->dn->number); + write_disk_entry (new->d_reclen, DIRBLKSIZ); + DIRECT_NAMLEN (new) = namelen; + if (direct_symlink_extension) + new->d_type = IFTODT (np->dn_stat.st_mode); + bcopy (name, new->d_name, namelen + 1); + break; + + default: + assert (0); + } + + dp->dn_set_mtime = 1; + + munmap ((caddr_t) ds->mapbuf, ds->mapextent); + + if (ds->stat != EXTEND) + { + /* If we are keeping count of this block, then keep the count up + to date. */ + if (dp->dn->dirents && dp->dn->dirents[ds->idx] != -1) + dp->dn->dirents[ds->idx]++; + } + else + { + int i; + /* It's cheap, so start a count here even if we aren't counting + anything at all. */ + if (dp->dn->dirents) + { + dp->dn->dirents = realloc (dp->dn->dirents, + (dp->dn_stat.st_size / DIRBLKSIZ + * sizeof (int))); + for (i = oldsize / DIRBLKSIZ; + i < dp->dn_stat.st_size / DIRBLKSIZ; + i++) + dp->dn->dirents[i] = -1; + + dp->dn->dirents[ds->idx] = 1; + } + else + { + dp->dn->dirents = malloc (dp->dn_stat.st_size / DIRBLKSIZ + * sizeof (int)); + for (i = 0; i < dp->dn_stat.st_size / DIRBLKSIZ; i++) + dp->dn->dirents[i] = -1; + dp->dn->dirents[ds->idx] = 1; + } + } + + diskfs_file_update (dp, 1); + + return 0; +} + +/* Following a lookup call for REMOVE, this removes the link from the + directory. DP is the directory being changed and DS is the cached + information returned from lookup. This call is only valid if the + directory has been locked continously since the call to lookup, and + only if that call succeeded. */ +error_t +diskfs_dirremove_hard(struct node *dp, + struct dirstat *ds) +{ + assert (ds->type == REMOVE); + assert (ds->stat == HERE_TIS); + + dp->dn_set_mtime = 1; + + if (ds->preventry == 0) + ds->entry->d_ino = 0; + else + { + assert ((vm_address_t) ds->entry - (vm_address_t) ds->preventry + == read_disk_entry (ds->preventry->d_reclen)); + write_disk_entry (ds->preventry->d_reclen, + (read_disk_entry (ds->preventry->d_reclen) + + read_disk_entry (ds->entry->d_reclen))); + } + + dp->dn_set_mtime = 1; + + munmap ((caddr_t) ds->mapbuf, ds->mapextent); + + /* If we are keeping count of this block, then keep the count up + to date. */ + if (dp->dn->dirents && dp->dn->dirents[ds->idx] != -1) + dp->dn->dirents[ds->idx]--; + + diskfs_file_update (dp, 1); + + return 0; +} + + +/* Following a lookup call for RENAME, this changes the inode number + on a directory entry. DP is the directory being changed; NP is + the new node being linked in; DP is the cached information returned + by lookup. This call is only valid if the directory has been locked + continuously since the call to lookup, and only if that call + succeeded. */ +error_t +diskfs_dirrewrite_hard(struct node *dp, + struct node *np, + struct dirstat *ds) +{ + assert (ds->type == RENAME); + assert (ds->stat == HERE_TIS); + + dp->dn_set_mtime = 1; + write_disk_entry (ds->entry->d_ino, np->dn->number); + if (direct_symlink_extension) + ds->entry->d_type = IFTODT (np->dn_stat.st_mode); + dp->dn_set_mtime = 1; + + munmap ((caddr_t) ds->mapbuf, ds->mapextent); + + diskfs_file_update (dp, 1); + + return 0; +} + +/* Tell if DP is an empty directory (has only "." and ".." entries). */ +/* This routine must be called from inside a catch_exception (). */ +int +diskfs_dirempty(struct node *dp, + struct protid *cred) +{ + struct directory_entry *entry; + vm_address_t buf, curoff; + memory_object_t memobj; + error_t err; + + memobj = diskfs_get_filemap (dp, VM_PROT_READ); + + if (memobj == MACH_PORT_NULL) + /* XXX should reflect error properly */ + return 0; + + buf = 0; + + err = vm_map (mach_task_self (), &buf, dp->dn_stat.st_size, 0, + 1, memobj, 0, 0, VM_PROT_READ, VM_PROT_READ, 0); + mach_port_deallocate (mach_task_self (), memobj); + assert (!err); + + diskfs_set_node_atime (dp); + + for (curoff = buf; + curoff < buf + dp->dn_stat.st_size; + curoff += read_disk_entry (entry->d_reclen)) + { + entry = (struct directory_entry *) curoff; + + if (entry->d_ino != 0 + && (DIRECT_NAMLEN (entry) > 2 + || entry->d_name[0] != '.' + || (entry->d_name[1] != '.' + && entry->d_name[1] != '\0'))) + { + munmap ((caddr_t) buf, dp->dn_stat.st_size); + diskfs_set_node_atime (dp); + if (diskfs_synchronous) + diskfs_node_update (dp, 1); + return 0; + } + } + diskfs_set_node_atime (dp); + if (diskfs_synchronous) + diskfs_node_update (dp, 1); + munmap ((caddr_t) buf, dp->dn_stat.st_size); + return 1; +} + +/* Make DS an invalid dirstat. */ +error_t +diskfs_drop_dirstat (struct node *dp, struct dirstat *ds) +{ + if (ds->type != LOOKUP) + { + assert (ds->mapbuf); + munmap ((caddr_t) ds->mapbuf, ds->mapextent); + ds->type = LOOKUP; + } + return 0; +} + + +/* Count the entries in directory block NB for directory DP and + write the answer down in its dirents array. As a side affect + fill BUF with the block. */ +static error_t +count_dirents (struct node *dp, int nb, char *buf) +{ + size_t amt; + char *offinblk; + struct directory_entry *entry; + int count = 0; + error_t err; + + assert (dp->dn->dirents); + assert ((nb + 1) * DIRBLKSIZ <= dp->dn_stat.st_size); + + err = diskfs_node_rdwr (dp, buf, nb * DIRBLKSIZ, DIRBLKSIZ, 0, 0, &amt); + if (err) + return err; + assert (amt == DIRBLKSIZ); + + for (offinblk = buf; + offinblk < buf + DIRBLKSIZ; + offinblk += read_disk_entry (entry->d_reclen)) + { + entry = (struct directory_entry *) offinblk; + if (entry->d_ino) + count++; + } + + assert (dp->dn->dirents[nb] == -1 || dp->dn->dirents[nb] == count); + dp->dn->dirents[nb] = count; + return 0; +} + +/* Implement the disikfs_get_directs callback as described in + <hurd/diskfs.h>. */ +error_t +diskfs_get_directs (struct node *dp, + int entry, + int nentries, + char **data, + size_t *datacnt, + vm_size_t bufsiz, + int *amt) +{ + int blkno; + int nblks; + int curentry; + char buf[DIRBLKSIZ]; + char *bufp; + int bufvalid; + error_t err; + int i; + char *datap; + struct directory_entry *entryp; + int allocsize; + size_t checklen; + struct dirent *userp; + + nblks = dp->dn_stat.st_size/DIRBLKSIZ; + + if (!dp->dn->dirents) + { + dp->dn->dirents = malloc (nblks * sizeof (int)); + for (i = 0; i < nblks; i++) + dp->dn->dirents[i] = -1; + } + + /* Scan through the entries to find ENTRY. If we encounter + a -1 in the process then stop to fill it. When we run + off the end, ENTRY is too big. */ + curentry = 0; + bufvalid = 0; + for (blkno = 0; blkno < nblks; blkno++) + { + if (dp->dn->dirents[blkno] == -1) + { + err = count_dirents (dp, blkno, buf); + if (err) + return err; + bufvalid = 1; + } + + if (curentry + dp->dn->dirents[blkno] > entry) + /* ENTRY starts in this block. */ + break; + + curentry += dp->dn->dirents[blkno]; + + bufvalid = 0; + } + + if (blkno == nblks) + { + /* We reached the end of the directory without seeing ENTRY. + This is treated as an EOF condition, meaning we return + success with empty results. */ + *datacnt = 0; + *amt = 0; + return 0; + } + + /* Allocate enough space to hold the maximum we might return */ + if (!bufsiz || bufsiz > dp->dn_stat.st_size) + allocsize = round_page (dp->dn_stat.st_size); + else + allocsize = round_page (bufsiz); + + if (allocsize > *datacnt) + *data = mmap (0, allocsize, PROT_READ|PROT_WRITE, MAP_ANON, 0, 0); + + /* Set bufp appropriately */ + bufp = buf; + if (curentry != entry) + { + /* Look through the block to find out where to start, + setting bufp appropriately. */ + if (!bufvalid) + { + err = diskfs_node_rdwr (dp, buf, blkno * DIRBLKSIZ, DIRBLKSIZ, + 0, 0, &checklen); + if (err) + return err; + assert (checklen == DIRBLKSIZ); + bufvalid = 1; + } + for (i = 0, bufp = buf; + i < entry - curentry && bufp - buf < DIRBLKSIZ; + (bufp + += read_disk_entry (((struct directory_entry *)bufp)->d_reclen)), + i++) + ; + /* Make sure we didn't run off the end. */ + assert (bufp - buf < DIRBLKSIZ); + } + + i = 0; + datap = *data; + + /* Copy the entries, one at a time. */ + while (((nentries == -1) || (i < nentries)) + && (!bufsiz || (datap - *data < bufsiz) ) + && blkno < nblks) + { + if (!bufvalid) + { + err = diskfs_node_rdwr (dp, buf, blkno * DIRBLKSIZ, DIRBLKSIZ, + 0, 0, &checklen); + if (err) + return err; + assert (checklen == DIRBLKSIZ); + bufvalid = 1; + bufp = buf; + } + + entryp = (struct directory_entry *)bufp; + + if (entryp->d_ino) + { + userp = (struct dirent *) datap; + + userp->d_fileno = read_disk_entry (entryp->d_ino); + userp->d_reclen = DIRSIZ (DIRECT_NAMLEN (entryp)); + userp->d_namlen = DIRECT_NAMLEN (entryp); + bcopy (entryp->d_name, userp->d_name, DIRECT_NAMLEN (entryp) + 1); + userp->d_type = DT_UNKNOWN; /* until fixed */ + i++; + datap += DIRSIZ (DIRECT_NAMLEN (entryp)); + } + + bufp += read_disk_entry (entryp->d_reclen); + if (bufp - buf == DIRBLKSIZ) + { + blkno++; + bufvalid = 0; + } + } + + /* We've copied all we can. If we allocated our own array + but didn't fill all of it, then free whatever memory we didn't use. */ + if (allocsize > *datacnt) + { + if (round_page (datap - *data) < allocsize) + munmap (*data + round_page (datap - *data), + allocsize - round_page (datap - *data)); + } + + /* Set variables for return */ + *datacnt = datap - *data; + *amt = i; + return 0; +} diff --git a/ufs/dir.h b/ufs/dir.h new file mode 100644 index 00000000..5730ef44 --- /dev/null +++ b/ufs/dir.h @@ -0,0 +1,163 @@ +/* Modified from BSD by Michael I. Bushnell for GNU Hurd ufs server. */ +/* + * Copyright (c) 1982, 1986, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * (c) UNIX System Laboratories, Inc. + * All or some portions of this file are derived from material licensed + * to the University of California by American Telephone and Telegraph + * Co. or Unix System Laboratories, Inc. and are reproduced herein with + * the permission of UNIX System Laboratories, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)dir.h 8.2 (Berkeley) 1/21/94 + */ + +#ifndef _DIR_H_ +#define _DIR_H_ + +#include <endian.h> + +/* + * A directory consists of some number of blocks of DIRBLKSIZ + * bytes, where DIRBLKSIZ is chosen such that it can be transferred + * to disk in a single atomic operation (e.g. 512 bytes on most machines). + * + * Each DIRBLKSIZ byte block contains some number of directory entry + * structures, which are of variable length. Each directory entry has + * a struct direct at the front of it, containing its inode number, + * the length of the entry, and the length of the name contained in + * the entry. These are followed by the name padded to a 4 byte boundary + * with null bytes. All names are guaranteed null terminated. + * The maximum length of a name in a directory is MAXNAMLEN. + * + * The macro DIRSIZ(fmt, dp) gives the amount of space required to represent + * a directory entry. Free space in a directory is represented by + * entries which have dp->d_reclen > DIRSIZ(fmt, dp). All DIRBLKSIZ bytes + * in a directory block are claimed by the directory entries. This + * usually results in the last entry in a directory having a large + * dp->d_reclen. When entries are deleted from a directory, the + * space is returned to the previous entry in the same directory + * block by increasing its dp->d_reclen. If the first entry of + * a directory block is free, then its dp->d_ino is set to 0. + * Entries other than the first in a directory do not normally have + * dp->d_ino set to 0. + */ +#define DIRBLKSIZ DEV_BSIZE +#undef MAXNAMLEN +#define MAXNAMLEN 255 + +/* Don't call this struct DIRECT because the library defines that + (sometimes) in a possible different way. */ + +struct directory_entry { + u_long d_ino; /* inode number of entry */ + u_short d_reclen; /* length of this record */ + u_char d_type; /* file type, see below */ + u_char d_namlen; /* length of string in d_name */ + char d_name[MAXNAMLEN + 1]; /* name with length <= MAXNAMLEN */ +}; + +/* Return the type from a struct directory_entry, paying attention to whether + this filesystem supports the type extension */ +#define DIRECT_TYPE(dp) (direct_symlink_extension ? (dp)->d_type : DT_UNKNOWN) + +/* Return the namlen from a struct direct, paying attention to whether + this filesystem supports the type extension */ +#if (BYTE_ORDER == LITTLE_ENDIAN) +#define DIRECT_NAMLEN(dp) (*(direct_symlink_extension || swab_disk \ + ? &(dp)->d_namlen \ + : &(dp)->d_type)) +#else +#define DIRECT_NAMLEN(dp) (*(!direct_symlink_extension && swab_disk \ + ? &(dp)->d_type \ + : &(dp)->d_namlen)) +#endif + +/* + * The DIRSIZ macro gives the minimum record length which will hold + * the directory entry. This requires the amount of space in struct direct + * without the d_name field, plus enough space for the name with a terminating + * null byte (dp->d_namlen+1), rounded up to a 4 byte boundary. + */ +/* In BSD this macro takes a struct direct. Modified by MIB here to + take the namelen (as computed by strlen). */ +#define DIRSIZ(namelen) \ + ((sizeof (struct directory_entry) - (MAXNAMLEN+1)) + (((namelen)+1 + 3) &~ 3)) + +#if 0 /* This is the BSD definition */ +#if (BYTE_ORDER == LITTLE_ENDIAN) +#define DIRSIZ(oldfmt, dp) \ + ((oldfmt) ? \ + ((sizeof (struct direct) - (MAXNAMLEN+1)) + (((dp)->d_type+1 + 3) &~ 3)) : \ + ((sizeof (struct direct) - (MAXNAMLEN+1)) + (((dp)->d_namlen+1 + 3) &~ 3))) +#else +#define DIRSIZ(oldfmt, dp) \ + ((sizeof (struct direct) - (MAXNAMLEN+1)) + (((dp)->d_namlen+1 + 3) &~ 3)) +#endif +#endif /* 0 */ + +#define OLDDIRFMT 1 +#define NEWDIRFMT 0 + +#if 0 /* Not used in GNU */ +/* + * Template for manipulating directories. + * Should use struct direct's, but the name field + * is MAXNAMLEN - 1, and this just won't do. + */ +struct dirtemplate { + u_long dot_ino; + short dot_reclen; + u_char dot_type; + u_char dot_namlen; + char dot_name[4]; /* must be multiple of 4 */ + u_long dotdot_ino; + short dotdot_reclen; + u_char dotdot_type; + u_char dotdot_namlen; + char dotdot_name[4]; /* ditto */ +}; + +/* + * This is the old format of directories, sanz type element. + */ +struct odirtemplate { + u_long dot_ino; + short dot_reclen; + u_short dot_namlen; + char dot_name[4]; /* must be multiple of 4 */ + u_long dotdot_ino; + short dotdot_reclen; + u_short dotdot_namlen; + char dotdot_name[4]; /* ditto */ +}; +#endif /* 0 */ + +#endif /* !_DIR_H_ */ diff --git a/ufs/fs.h b/ufs/fs.h new file mode 100644 index 00000000..a2a3cc9b --- /dev/null +++ b/ufs/fs.h @@ -0,0 +1,509 @@ +/* + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)fs.h 8.7 (Berkeley) 4/19/94 + */ + +/* + * Each disk drive contains some number of file systems. + * A file system consists of a number of cylinder groups. + * Each cylinder group has inodes and data. + * + * A file system is described by its super-block, which in turn + * describes the cylinder groups. The super-block is critical + * data and is replicated in each cylinder group to protect against + * catastrophic loss. This is done at `newfs' time and the critical + * super-block data does not change, so the copies need not be + * referenced further unless disaster strikes. + * + * For file system fs, the offsets of the various blocks of interest + * are given in the super block as: + * [fs->fs_sblkno] Super-block + * [fs->fs_cblkno] Cylinder group block + * [fs->fs_iblkno] Inode blocks + * [fs->fs_dblkno] Data blocks + * The beginning of cylinder group cg in fs, is given by + * the ``cgbase(fs, cg)'' macro. + * + * The first boot and super blocks are given in absolute disk addresses. + * The byte-offset forms are preferred, as they don't imply a sector size. + */ +#define BBSIZE 8192 +#define SBSIZE 8192 +#define BBOFF ((off_t)(0)) +#define SBOFF ((off_t)(BBOFF + BBSIZE)) +#define BBLOCK ((daddr_t)(0)) +#define SBLOCK ((daddr_t)(BBLOCK + BBSIZE / DEV_BSIZE)) + +/* + * Addresses stored in inodes are capable of addressing fragments + * of `blocks'. File system blocks of at most size MAXBSIZE can + * be optionally broken into 2, 4, or 8 pieces, each of which is + * addressible; these pieces may be DEV_BSIZE, or some multiple of + * a DEV_BSIZE unit. + * + * Large files consist of exclusively large data blocks. To avoid + * undue wasted disk space, the last data block of a small file may be + * allocated as only as many fragments of a large block as are + * necessary. The file system format retains only a single pointer + * to such a fragment, which is a piece of a single large block that + * has been divided. The size of such a fragment is determinable from + * information in the inode, using the ``blksize(fs, ip, lbn)'' macro. + * + * The file system records space availability at the fragment level; + * to determine block availability, aligned fragments are examined. + */ + +/* + * The file system is made out of blocks of at most MAXBSIZE units, with + * smaller units (fragments) only in the last direct block. MAXBSIZE + * primarily determines the size of buffers in the buffer pool. It may be + * made larger without any effect on existing file systems; however making + * it smaller make make some file systems unmountable. + */ +#define MAXBSIZE MAXPHYS +#define MAXFRAG 8 + +/* + * MINBSIZE is the smallest allowable block size. + * In order to insure that it is possible to create files of size + * 2^32 with only two levels of indirection, MINBSIZE is set to 4096. + * MINBSIZE must be big enough to hold a cylinder group block, + * thus changes to (struct cg) must keep its size within MINBSIZE. + * Note that super blocks are always of size SBSIZE, + * and that both SBSIZE and MAXBSIZE must be >= MINBSIZE. + */ +#define MINBSIZE 4096 + +/* + * The path name on which the file system is mounted is maintained + * in fs_fsmnt. MAXMNTLEN defines the amount of space allocated in + * the super block for this name. + * The limit on the amount of summary information per file system + * is defined by MAXCSBUFS. It is currently parameterized for a + * maximum of two million cylinders. + */ +#define MAXMNTLEN 512 +#define MAXCSBUFS 32 + +/* + * A summary of contiguous blocks of various sizes is maintained + * in each cylinder group. Normally this is set by the initial + * value of fs_maxcontig. To conserve space, a maximum summary size + * is set by FS_MAXCONTIG. + */ +#define FS_MAXCONTIG 16 + +/* + * MINFREE gives the minimum acceptable percentage of file system + * blocks which may be free. If the freelist drops below this level + * only the superuser may continue to allocate blocks. This may + * be set to 0 if no reserve of free blocks is deemed necessary, + * however throughput drops by fifty percent if the file system + * is run at between 95% and 100% full; thus the minimum default + * value of fs_minfree is 5%. However, to get good clustering + * performance, 10% is a better choice. hence we use 10% as our + * default value. With 10% free space, fragmentation is not a + * problem, so we choose to optimize for time. + */ +#define MINFREE 5 +#define DEFAULTOPT FS_OPTTIME + +/* + * Per cylinder group information; summarized in blocks allocated + * from first cylinder group data blocks. These blocks have to be + * read in from fs_csaddr (size fs_cssize) in addition to the + * super block. + * + * N.B. sizeof(struct csum) must be a power of two in order for + * the ``fs_cs'' macro to work (see below). + */ +struct csum { + long cs_ndir; /* number of directories */ + long cs_nbfree; /* number of free blocks */ + long cs_nifree; /* number of free inodes */ + long cs_nffree; /* number of free frags */ +}; + +/* + * Super block for a file system. + */ +struct fs { + struct fs *fs_link; /* linked list of file systems */ + struct fs *fs_rlink; /* used for incore super blocks */ + daddr_t fs_sblkno; /* addr of super-block in filesys */ + daddr_t fs_cblkno; /* offset of cyl-block in filesys */ + daddr_t fs_iblkno; /* offset of inode-blocks in filesys */ + daddr_t fs_dblkno; /* offset of first data after cg */ + long fs_cgoffset; /* cylinder group offset in cylinder */ + long fs_cgmask; /* used to calc mod fs_ntrak */ + time_t fs_time; /* last time written */ + long fs_size; /* number of blocks in fs */ + long fs_dsize; /* number of data blocks in fs */ + long fs_ncg; /* number of cylinder groups */ + long fs_bsize; /* size of basic blocks in fs */ + long fs_fsize; /* size of frag blocks in fs */ + long fs_frag; /* number of frags in a block in fs */ +/* these are configuration parameters */ + long fs_minfree; /* minimum percentage of free blocks */ + long fs_rotdelay; /* num of ms for optimal next block */ + long fs_rps; /* disk revolutions per second */ +/* these fields can be computed from the others */ + long fs_bmask; /* ``blkoff'' calc of blk offsets */ + long fs_fmask; /* ``fragoff'' calc of frag offsets */ + long fs_bshift; /* ``lblkno'' calc of logical blkno */ + long fs_fshift; /* ``numfrags'' calc number of frags */ +/* these are configuration parameters */ + long fs_maxcontig; /* max number of contiguous blks */ + long fs_maxbpg; /* max number of blks per cyl group */ +/* these fields can be computed from the others */ + long fs_fragshift; /* block to frag shift */ + long fs_fsbtodb; /* fsbtodb and dbtofsb shift constant */ + long fs_sbsize; /* actual size of super block */ + long fs_csmask; /* csum block offset */ + long fs_csshift; /* csum block number */ + long fs_nindir; /* value of NINDIR */ + long fs_inopb; /* value of INOPB */ + long fs_nspf; /* value of NSPF */ +/* yet another configuration parameter */ + long fs_optim; /* optimization preference, see below */ +/* these fields are derived from the hardware */ + long fs_npsect; /* # sectors/track including spares */ + long fs_interleave; /* hardware sector interleave */ + long fs_trackskew; /* sector 0 skew, per track */ + long fs_headswitch; /* head switch time, usec */ + long fs_trkseek; /* track-to-track seek, usec */ +/* sizes determined by number of cylinder groups and their sizes */ + daddr_t fs_csaddr; /* blk addr of cyl grp summary area */ + long fs_cssize; /* size of cyl grp summary area */ + long fs_cgsize; /* cylinder group size */ +/* these fields are derived from the hardware */ + long fs_ntrak; /* tracks per cylinder */ + long fs_nsect; /* sectors per track */ + long fs_spc; /* sectors per cylinder */ +/* this comes from the disk driver partitioning */ + long fs_ncyl; /* cylinders in file system */ +/* these fields can be computed from the others */ + long fs_cpg; /* cylinders per group */ + long fs_ipg; /* inodes per group */ + long fs_fpg; /* blocks per group * fs_frag */ +/* this data must be re-computed after crashes */ + struct csum fs_cstotal; /* cylinder summary information */ +/* these fields are cleared at mount time */ + char fs_fmod; /* super block modified flag */ + char fs_clean; /* file system is clean flag */ + char fs_ronly; /* mounted read-only flag */ + char fs_flags; /* currently unused flag */ + char fs_fsmnt[MAXMNTLEN]; /* name mounted on */ +/* these fields retain the current block allocation info */ + long fs_cgrotor; /* last cg searched */ + struct csum *fs_csp[MAXCSBUFS];/* list of fs_cs info buffers */ + long fs_cpc; /* cyl per cycle in postbl */ + short fs_opostbl[16][8]; /* old rotation block list head */ + long fs_sparecon[50]; /* reserved for future constants */ + long fs_contigsumsize; /* size of cluster summary array */ + long fs_maxsymlinklen; /* max length of an internal symlink */ + long fs_inodefmt; /* format of on-disk inodes */ + u_quad_t fs_maxfilesize; /* maximum representable file size */ + quad_t fs_qbmask; /* ~fs_bmask - for use with quad size */ + quad_t fs_qfmask; /* ~fs_fmask - for use with quad size */ + long fs_state; /* validate fs_clean field */ + long fs_postblformat; /* format of positional layout tables */ + long fs_nrpos; /* number of rotational positions */ + long fs_postbloff; /* (short) rotation block list head */ + long fs_rotbloff; /* (u_char) blocks for each rotation */ + long fs_magic; /* magic number */ + u_char fs_space[1]; /* list of blocks for each rotation */ +/* actually longer */ +}; +/* + * Filesystem idetification + */ +#define FS_MAGIC 0x011954 /* the fast filesystem magic number */ +#define FS_OKAY 0x7c269d38 /* superblock checksum */ +#define FS_42INODEFMT -1 /* 4.2BSD inode format */ +#define FS_44INODEFMT 2 /* 4.4BSD inode format */ +/* + * Preference for optimization. + */ +#define FS_OPTTIME 0 /* minimize allocation time */ +#define FS_OPTSPACE 1 /* minimize disk fragmentation */ + +/* + * Rotational layout table format types + */ +#define FS_42POSTBLFMT -1 /* 4.2BSD rotational table format */ +#define FS_DYNAMICPOSTBLFMT 1 /* dynamic rotational table format */ +/* + * Macros for access to superblock array structures + */ +#define fs_postbl(fs, cylno) \ + (((fs)->fs_postblformat == FS_42POSTBLFMT) \ + ? ((fs)->fs_opostbl[cylno]) \ + : ((short *)((char *)(fs) + (fs)->fs_postbloff) + (cylno) * (fs)->fs_nrpos)) +#define fs_rotbl(fs) \ + (((fs)->fs_postblformat == FS_42POSTBLFMT) \ + ? ((fs)->fs_space) \ + : ((u_char *)((char *)(fs) + (fs)->fs_rotbloff))) + +/* + * The size of a cylinder group is calculated by CGSIZE. The maximum size + * is limited by the fact that cylinder groups are at most one block. + * Its size is derived from the size of the maps maintained in the + * cylinder group and the (struct cg) size. + */ +#define CGSIZE(fs) \ + /* base cg */ (sizeof(struct cg) + sizeof(long) + \ + /* blktot size */ (fs)->fs_cpg * sizeof(long) + \ + /* blks size */ (fs)->fs_cpg * (fs)->fs_nrpos * sizeof(short) + \ + /* inode map */ howmany((fs)->fs_ipg, NBBY) + \ + /* block map */ howmany((fs)->fs_cpg * (fs)->fs_spc / NSPF(fs), NBBY) +\ + /* if present */ ((fs)->fs_contigsumsize <= 0 ? 0 : \ + /* cluster sum */ (fs)->fs_contigsumsize * sizeof(long) + \ + /* cluster map */ howmany((fs)->fs_cpg * (fs)->fs_spc / NSPB(fs), NBBY))) + +#if 0 /* Wrong for GNU Hurd ufs; we don't use fs_csp at all. */ +/* + * Convert cylinder group to base address of its global summary info. + * + * N.B. This macro assumes that sizeof(struct csum) is a power of two. + */ +#define fs_cs(fs, indx) \ + fs_csp[(indx) >> (fs)->fs_csshift][(indx) & ~(fs)->fs_csmask] +#else +/* Global variable csum is declared in ufs.h; use it instead + of fs_cs stuff. */ +#define fs_cs(fs, indx) this will generate a syntax error. +#endif + +/* + * Cylinder group block for a file system. + */ +#define CG_MAGIC 0x090255 +struct cg { + struct cg *cg_link; /* linked list of cyl groups */ + long cg_magic; /* magic number */ + time_t cg_time; /* time last written */ + long cg_cgx; /* we are the cgx'th cylinder group */ + short cg_ncyl; /* number of cyl's this cg */ + short cg_niblk; /* number of inode blocks this cg */ + long cg_ndblk; /* number of data blocks this cg */ + struct csum cg_cs; /* cylinder summary information */ + long cg_rotor; /* position of last used block */ + long cg_frotor; /* position of last used frag */ + long cg_irotor; /* position of last used inode */ + long cg_frsum[MAXFRAG]; /* counts of available frags */ + long cg_btotoff; /* (long) block totals per cylinder */ + long cg_boff; /* (short) free block positions */ + long cg_iusedoff; /* (char) used inode map */ + long cg_freeoff; /* (u_char) free block map */ + long cg_nextfreeoff; /* (u_char) next available space */ + long cg_clustersumoff; /* (long) counts of avail clusters */ + long cg_clusteroff; /* (char) free cluster map */ + long cg_nclusterblks; /* number of clusters this cg */ + long cg_sparecon[13]; /* reserved for future use */ + u_char cg_space[1]; /* space for cylinder group maps */ +/* actually longer */ +}; +/* + * Macros for access to cylinder group array structures + */ +#define cg_blktot(cgp) \ + (((cgp)->cg_magic != CG_MAGIC) \ + ? (((struct ocg *)(cgp))->cg_btot) \ + : ((long *)((char *)(cgp) + (cgp)->cg_btotoff))) +#define cg_blks(fs, cgp, cylno) \ + (((cgp)->cg_magic != CG_MAGIC) \ + ? (((struct ocg *)(cgp))->cg_b[cylno]) \ + : ((short *)((char *)(cgp) + (cgp)->cg_boff) + (cylno) * (fs)->fs_nrpos)) +#define cg_inosused(cgp) \ + (((cgp)->cg_magic != CG_MAGIC) \ + ? (((struct ocg *)(cgp))->cg_iused) \ + : ((char *)((char *)(cgp) + (cgp)->cg_iusedoff))) +#define cg_blksfree(cgp) \ + (((cgp)->cg_magic != CG_MAGIC) \ + ? (((struct ocg *)(cgp))->cg_free) \ + : ((u_char *)((char *)(cgp) + (cgp)->cg_freeoff))) +#define cg_chkmagic(cgp) \ + ((cgp)->cg_magic == CG_MAGIC || ((struct ocg *)(cgp))->cg_magic == CG_MAGIC) +#define cg_clustersfree(cgp) \ + ((u_char *)((char *)(cgp) + (cgp)->cg_clusteroff)) +#define cg_clustersum(cgp) \ + ((long *)((char *)(cgp) + (cgp)->cg_clustersumoff)) + +/* + * The following structure is defined + * for compatibility with old file systems. + */ +struct ocg { + struct ocg *cg_link; /* linked list of cyl groups */ + struct ocg *cg_rlink; /* used for incore cyl groups */ + time_t cg_time; /* time last written */ + long cg_cgx; /* we are the cgx'th cylinder group */ + short cg_ncyl; /* number of cyl's this cg */ + short cg_niblk; /* number of inode blocks this cg */ + long cg_ndblk; /* number of data blocks this cg */ + struct csum cg_cs; /* cylinder summary information */ + long cg_rotor; /* position of last used block */ + long cg_frotor; /* position of last used frag */ + long cg_irotor; /* position of last used inode */ + long cg_frsum[8]; /* counts of available frags */ + long cg_btot[32]; /* block totals per cylinder */ + short cg_b[32][8]; /* positions of free blocks */ + char cg_iused[256]; /* used inode map */ + long cg_magic; /* magic number */ + u_char cg_free[1]; /* free block map */ +/* actually longer */ +}; + +/* + * Turn file system block numbers into disk block addresses. + * This maps file system blocks to device size blocks. + */ +#define fsbtodb(fs, b) ((b) << (fs)->fs_fsbtodb) +#define dbtofsb(fs, b) ((b) >> (fs)->fs_fsbtodb) + +/* + * Cylinder group macros to locate things in cylinder groups. + * They calc file system addresses of cylinder group data structures. + */ +#define cgbase(fs, c) ((daddr_t)((fs)->fs_fpg * (c))) +#define cgdmin(fs, c) (cgstart(fs, c) + (fs)->fs_dblkno) /* 1st data */ +#define cgimin(fs, c) (cgstart(fs, c) + (fs)->fs_iblkno) /* inode blk */ +#define cgsblock(fs, c) (cgstart(fs, c) + (fs)->fs_sblkno) /* super blk */ +#define cgtod(fs, c) (cgstart(fs, c) + (fs)->fs_cblkno) /* cg block */ +#define cgstart(fs, c) \ + (cgbase(fs, c) + (fs)->fs_cgoffset * ((c) & ~((fs)->fs_cgmask))) + +/* + * Macros for handling inode numbers: + * inode number to file system block offset. + * inode number to cylinder group number. + * inode number to file system block address. + */ +#define ino_to_cg(fs, x) ((x) / (fs)->fs_ipg) +#define ino_to_fsba(fs, x) \ + ((daddr_t)(cgimin(fs, ino_to_cg(fs, x)) + \ + (blkstofrags((fs), (((x) % (fs)->fs_ipg) / INOPB(fs)))))) +#define ino_to_fsbo(fs, x) ((x) % INOPB(fs)) + +/* + * Give cylinder group number for a file system block. + * Give cylinder group block number for a file system block. + */ +#define dtog(fs, d) ((d) / (fs)->fs_fpg) +#define dtogd(fs, d) ((d) % (fs)->fs_fpg) + +/* + * Extract the bits for a block from a map. + * Compute the cylinder and rotational position of a cyl block addr. + */ +#define blkmap(fs, map, loc) \ + (((map)[(loc) / NBBY] >> ((loc) % NBBY)) & (0xff >> (NBBY - (fs)->fs_frag))) +#define cbtocylno(fs, bno) \ + ((bno) * NSPF(fs) / (fs)->fs_spc) +#define cbtorpos(fs, bno) \ + (((bno) * NSPF(fs) % (fs)->fs_spc / (fs)->fs_nsect * (fs)->fs_trackskew + \ + (bno) * NSPF(fs) % (fs)->fs_spc % (fs)->fs_nsect * (fs)->fs_interleave) % \ + (fs)->fs_nsect * (fs)->fs_nrpos / (fs)->fs_npsect) + +/* + * The following macros optimize certain frequently calculated + * quantities by using shifts and masks in place of divisions + * modulos and multiplications. + */ +#define blkoff(fs, loc) /* calculates (loc % fs->fs_bsize) */ \ + ((loc) & (fs)->fs_qbmask) +#define fragoff(fs, loc) /* calculates (loc % fs->fs_fsize) */ \ + ((loc) & (fs)->fs_qfmask) +#define lblktosize(fs, blk) /* calculates (blk * fs->fs_bsize) */ \ + ((blk) << (fs)->fs_bshift) +#define lblkno(fs, loc) /* calculates (loc / fs->fs_bsize) */ \ + ((loc) >> (fs)->fs_bshift) +#define numfrags(fs, loc) /* calculates (loc / fs->fs_fsize) */ \ + ((loc) >> (fs)->fs_fshift) +#define blkroundup(fs, size) /* calculates roundup(size, fs->fs_bsize) */ \ + (((size) + (fs)->fs_qbmask) & (fs)->fs_bmask) +#define fragroundup(fs, size) /* calculates roundup(size, fs->fs_fsize) */ \ + (((size) + (fs)->fs_qfmask) & (fs)->fs_fmask) +#define fragstoblks(fs, frags) /* calculates (frags / fs->fs_frag) */ \ + ((frags) >> (fs)->fs_fragshift) +#define blkstofrags(fs, blks) /* calculates (blks * fs->fs_frag) */ \ + ((blks) << (fs)->fs_fragshift) +#define fragnum(fs, fsb) /* calculates (fsb % fs->fs_frag) */ \ + ((fsb) & ((fs)->fs_frag - 1)) +#define blknum(fs, fsb) /* calculates rounddown(fsb, fs->fs_frag) */ \ + ((fsb) &~ ((fs)->fs_frag - 1)) + +/* + * Determine the number of available frags given a + * percentage to hold in reserve + */ +#define freespace(fs, percentreserved) \ + (blkstofrags((fs), (fs)->fs_cstotal.cs_nbfree) + \ + (fs)->fs_cstotal.cs_nffree - ((fs)->fs_dsize * (percentreserved) / 100)) + +/* + * Determining the size of a file block in the file system. + */ +/* Changed from BSD to use allocsize instead of i_size. */ +#define blksize(fs, np, lbn) \ + (((lbn) >= NDADDR || (np)->allocsize >= ((lbn) + 1) << (fs)->fs_bshift) \ + ? (fs)->fs_bsize \ + : (fragroundup(fs, blkoff(fs, (np)->allocsize)))) + +#if 0 /* Don't use this */ +#define dblksize(fs, dip, lbn) \ + (((lbn) >= NDADDR || (dip)->di_size >= ((lbn) + 1) << (fs)->fs_bshift) \ + ? (fs)->fs_bsize \ + : (fragroundup(fs, blkoff(fs, (dip)->di_size)))) +#endif + +/* + * Number of disk sectors per block; assumes DEV_BSIZE byte sector size. + */ +#define NSPB(fs) ((fs)->fs_nspf << (fs)->fs_fragshift) +#define NSPF(fs) ((fs)->fs_nspf) + +/* + * INOPB is the number of inodes in a secondary storage block. + */ +#define INOPB(fs) ((fs)->fs_inopb) +#define INOPF(fs) ((fs)->fs_inopb >> (fs)->fs_fragshift) + +/* + * NINDIR is the number of indirects in a file system block. + */ +#define NINDIR(fs) ((fs)->fs_nindir) + +extern int inside[], around[]; +extern u_char *fragtbl[]; diff --git a/ufs/hyper.c b/ufs/hyper.c new file mode 100644 index 00000000..ece327a2 --- /dev/null +++ b/ufs/hyper.c @@ -0,0 +1,414 @@ +/* Fetching and storing the hypermetadata (superblock and cg summary info). + Copyright (C) 1994, 95, 96, 97, 98, 1999 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2, or (at + your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include "ufs.h" +#include <string.h> +#include <stdio.h> +#include <error.h> +#include <hurd/store.h> + +static int ufs_clean; /* fs clean before we started writing? */ + +static int oldformat; + +void *zeroblock; + +struct fs *sblock; +struct csum *csum; + +void +swab_sblock (struct fs *sblock) +{ + int i, j; + + sblock->fs_sblkno = swab_long (sblock->fs_sblkno); + sblock->fs_cblkno = swab_long (sblock->fs_cblkno); + sblock->fs_iblkno = swab_long (sblock->fs_iblkno); + sblock->fs_dblkno = swab_long (sblock->fs_dblkno); + sblock->fs_cgoffset = swab_long (sblock->fs_cgoffset); + sblock->fs_cgmask = swab_long (sblock->fs_cgmask); + sblock->fs_time = swab_long (sblock->fs_time); + sblock->fs_size = swab_long (sblock->fs_size); + sblock->fs_dsize = swab_long (sblock->fs_dsize); + sblock->fs_ncg = swab_long (sblock->fs_ncg); + sblock->fs_bsize = swab_long (sblock->fs_bsize); + sblock->fs_fsize = swab_long (sblock->fs_fsize); + sblock->fs_frag = swab_long (sblock->fs_frag); + sblock->fs_minfree = swab_long (sblock->fs_minfree); + sblock->fs_rotdelay = swab_long (sblock->fs_rotdelay); + sblock->fs_rps = swab_long (sblock->fs_rps); + sblock->fs_bmask = swab_long (sblock->fs_bmask); + sblock->fs_fmask = swab_long (sblock->fs_fmask); + sblock->fs_bshift = swab_long (sblock->fs_bshift); + sblock->fs_fshift = swab_long (sblock->fs_fshift); + sblock->fs_maxcontig = swab_long (sblock->fs_maxcontig); + sblock->fs_maxbpg = swab_long (sblock->fs_maxbpg); + sblock->fs_fragshift = swab_long (sblock->fs_fragshift); + sblock->fs_fsbtodb = swab_long (sblock->fs_fsbtodb); + sblock->fs_sbsize = swab_long (sblock->fs_sbsize); + sblock->fs_csmask = swab_long (sblock->fs_csmask); + sblock->fs_csshift = swab_long (sblock->fs_csshift); + sblock->fs_nindir = swab_long (sblock->fs_nindir); + sblock->fs_inopb = swab_long (sblock->fs_inopb); + sblock->fs_nspf = swab_long (sblock->fs_nspf); + sblock->fs_optim = swab_long (sblock->fs_optim); + sblock->fs_npsect = swab_long (sblock->fs_npsect); + sblock->fs_interleave = swab_long (sblock->fs_interleave); + sblock->fs_trackskew = swab_long (sblock->fs_trackskew); + sblock->fs_headswitch = swab_long (sblock->fs_headswitch); + sblock->fs_trkseek = swab_long (sblock->fs_trkseek); + sblock->fs_csaddr = swab_long (sblock->fs_csaddr); + sblock->fs_cssize = swab_long (sblock->fs_cssize); + sblock->fs_cgsize = swab_long (sblock->fs_cgsize); + sblock->fs_ntrak = swab_long (sblock->fs_ntrak); + sblock->fs_nsect = swab_long (sblock->fs_nsect); + sblock->fs_spc = swab_long (sblock->fs_spc); + sblock->fs_ncyl = swab_long (sblock->fs_ncyl); + sblock->fs_cpg = swab_long (sblock->fs_cpg); + sblock->fs_ipg = swab_long (sblock->fs_ipg); + sblock->fs_fpg = swab_long (sblock->fs_fpg); + sblock->fs_cstotal.cs_ndir = swab_long (sblock->fs_cstotal.cs_ndir); + sblock->fs_cstotal.cs_nbfree = swab_long (sblock->fs_cstotal.cs_nbfree); + sblock->fs_cstotal.cs_nifree = swab_long (sblock->fs_cstotal.cs_nifree); + sblock->fs_cstotal.cs_nffree = swab_long (sblock->fs_cstotal.cs_nffree); + /* fs_fmod, fs_clean, fs_ronly, fs_flags, fs_fsmnt are all char */ + sblock->fs_cgrotor = swab_long (sblock->fs_cgrotor); + sblock->fs_cpc = swab_long (sblock->fs_cpc); + sblock->fs_contigsumsize = swab_long (sblock->fs_contigsumsize); + sblock->fs_maxsymlinklen = swab_long (sblock->fs_maxsymlinklen); + sblock->fs_inodefmt = swab_long (sblock->fs_inodefmt); + sblock->fs_maxfilesize = swab_long_long (sblock->fs_maxfilesize); + sblock->fs_qbmask = swab_long_long (sblock->fs_qbmask); + sblock->fs_state = swab_long (sblock->fs_state); + sblock->fs_postblformat = swab_long (sblock->fs_postblformat); + sblock->fs_nrpos = swab_long (sblock->fs_nrpos); + sblock->fs_postbloff = swab_long (sblock->fs_postbloff); + sblock->fs_rotbloff = swab_long (sblock->fs_rotbloff); + sblock->fs_magic = swab_long (sblock->fs_magic); + + /* Tables */ + if (sblock->fs_postblformat == FS_42POSTBLFMT) + for (i = 0; i < 16; i++) + for (j = 0; j < 8; j++) + sblock->fs_opostbl[i][j] = swab_short (sblock->fs_opostbl[i][j]); + else + for (i = 0; i < sblock->fs_cpc; i++) + for (j = 0; j < sblock->fs_nrpos; j++) + fs_postbl(sblock, j)[i] + = swab_short (fs_postbl (sblock, j)[i]); + + /* The rot table is all chars */ +} + +void +swab_csums (struct csum *csum) +{ + int i; + + for (i = 0; i < sblock->fs_ncg; i++) + { + csum[i].cs_ndir = swab_long (csum[i].cs_ndir); + csum[i].cs_nbfree = swab_long (csum[i].cs_nbfree); + csum[i].cs_nifree = swab_long (csum[i].cs_nifree); + csum[i].cs_nffree = swab_long (csum[i].cs_nffree); + } +} + +void +get_hypermetadata (void) +{ + error_t err; + + if (!sblock) + sblock = malloc (SBSIZE); + + /* Free previous values. */ + if (zeroblock) + munmap ((caddr_t) zeroblock, sblock->fs_bsize); + if (csum) + free (csum); + + err = diskfs_catch_exception (); + assert_perror (err); + bcopy (disk_image + SBOFF, sblock, SBSIZE); + diskfs_end_catch_exception (); + + if ((swab_long (sblock->fs_magic)) == FS_MAGIC) + { + swab_disk = 1; + swab_sblock (sblock); + } + else + swab_disk = 0; + + if (sblock->fs_magic != FS_MAGIC) + { + fprintf (stderr, "Bad magic number %#lx (should be %#x)\n", + sblock->fs_magic, FS_MAGIC); + exit (1); + } + if (sblock->fs_bsize > 8192) + { + fprintf (stderr, "Block size %ld is too big (max is 8192 bytes)\n", + sblock->fs_bsize); + exit (1); + } + if (sblock->fs_bsize < sizeof (struct fs)) + { + fprintf (stderr, "Block size %ld is too small (min is %Zd bytes)\n", + sblock->fs_bsize, sizeof (struct fs)); + exit (1); + } + + if (sblock->fs_maxsymlinklen > (long)MAXSYMLINKLEN) + { + fprintf (stderr, "Max shortcut symlinklen %ld is too big (max is %ld)\n", + sblock->fs_maxsymlinklen, (long)MAXSYMLINKLEN); + exit (1); + } + + assert ((__vm_page_size % DEV_BSIZE) == 0); + assert ((sblock->fs_bsize % DEV_BSIZE) == 0); + assert (__vm_page_size <= sblock->fs_bsize); + + /* Examine the clean bit and force read-only if unclean. */ + ufs_clean = sblock->fs_clean; + if (! ufs_clean) + { + error (0, 0, + "%s: warning: FILESYSTEM NOT UNMOUNTED CLEANLY; PLEASE fsck", + diskfs_disk_name); + if (! diskfs_readonly) + { + diskfs_readonly = 1; + error (0, 0, + "%s: MOUNTED READ-ONLY; MUST USE `fsysopts --writable'", + diskfs_disk_name); + } + } + + /* If this is an old filesystem, then we have some more + work to do; some crucial constants might not be set; we + are therefore forced to set them here. */ + + if (sblock->fs_npsect < sblock->fs_nsect) + sblock->fs_npsect = sblock->fs_nsect; + + if (sblock->fs_interleave < 1) + sblock->fs_interleave = 1; + + if (sblock->fs_postblformat == FS_42POSTBLFMT) + sblock->fs_nrpos = 8; + + if (sblock->fs_inodefmt < FS_44INODEFMT) + { + quad_t sizepb = sblock->fs_bsize; + int i; + + oldformat = 1; + sblock->fs_maxfilesize = sblock->fs_bsize * NDADDR - 1; + for (i = 0; i < NIADDR; i++) + { + sizepb *= NINDIR (sblock); + sblock->fs_maxfilesize += sizepb; + } + sblock->fs_qbmask = ~sblock->fs_bmask; + sblock->fs_qfmask = ~sblock->fs_fmask; + } + + /* Find out if we support the 4.4 symlink/dirtype extension */ + if (sblock->fs_maxsymlinklen > 0) + direct_symlink_extension = 1; + else + direct_symlink_extension = 0; + + csum = malloc (fsaddr (sblock, howmany (sblock->fs_cssize, + sblock->fs_fsize))); + + assert (!diskfs_catch_exception ()); + bcopy (disk_image + fsaddr (sblock, sblock->fs_csaddr), + csum, + fsaddr (sblock, howmany (sblock->fs_cssize, sblock->fs_fsize))); + diskfs_end_catch_exception (); + + if (swab_disk) + swab_csums (csum); + + if (store->size < sblock->fs_size * sblock->fs_fsize) + { + fprintf (stderr, + "Disk size (%Ld) less than necessary " + "(superblock says we need %ld)\n", + store->size, sblock->fs_size * sblock->fs_fsize); + exit (1); + } + + zeroblock = mmap (0, sblock->fs_bsize, PROT_READ|PROT_WRITE, MAP_ANON, 0, 0); + + /* If the filesystem has new features in it, don't pay attention to + the user's request not to use them. */ + if ((sblock->fs_inodefmt == FS_44INODEFMT + || direct_symlink_extension) + && compat_mode == COMPAT_BSD42) + { + compat_mode = COMPAT_BSD44; + error (0, 0, + "4.2 compat mode requested on 4.4 fs--switched to 4.4 mode"); + } +} + +/* Write the csum data. This isn't backed by a pager because it is + taken from ordinary data blocks and might not be an even number + of pages; in that case writing it through the pager would nuke whatever + pages came after it on the disk and were backed by file pagers. */ +error_t +diskfs_set_hypermetadata (int wait, int clean) +{ + error_t err; + + spin_lock (&alloclock); + + if (csum_dirty) + { + /* Copy into a page-aligned buffer to avoid bugs in kernel device + code. */ + void *buf = 0; + size_t read = 0; + size_t bufsize = round_page (fragroundup (sblock, sblock->fs_cssize)); + + err = store_read (store, + fsbtodb (sblock, sblock->fs_csaddr) + << log2_dev_blocks_per_dev_bsize, + bufsize, &buf, &read); + if (err) + return err; + else if (read != bufsize) + err = EIO; + else + { + size_t wrote; + bcopy (csum, buf, sblock->fs_cssize); + if (swab_disk) + swab_csums ((struct csum *)buf); + err = store_write (store, + fsbtodb (sblock, sblock->fs_csaddr) + << log2_dev_blocks_per_dev_bsize, + buf, bufsize, &wrote); + if (!err && wrote != bufsize) + err = EIO; + } + + munmap (buf, read); + + if (err) + { + spin_unlock (&alloclock); + return err; + } + + csum_dirty = 0; + } + + if (clean && ufs_clean && !sblock->fs_clean) + { + /* The filesystem is clean, so set the clean flag. */ + sblock->fs_clean = 1; + sblock_dirty = 1; + } + else if (!clean && sblock->fs_clean) + { + /* Clear the clean flag */ + sblock->fs_clean = 0; + sblock_dirty = 1; + wait = 1; /* must be synchronous */ + } + + spin_unlock (&alloclock); + + /* Update the superblock if necessary (clean bit was just set). */ + copy_sblock (); + + sync_disk (wait); + return 0; +} + +/* Copy the sblock into the disk */ +void +copy_sblock () +{ + error_t err; + + err = diskfs_catch_exception (); + assert_perror (err); + + spin_lock (&alloclock); + + if (sblock_dirty) + { + assert (! diskfs_readonly); + + if (sblock->fs_postblformat == FS_42POSTBLFMT + || oldformat + || swab_disk) + { + char sblockcopy[SBSIZE]; + struct fs *sbcopy = (struct fs *)sblockcopy; + bcopy (sblock, sblockcopy, SBSIZE); + if (sblock->fs_postblformat == FS_42POSTBLFMT) + sbcopy->fs_nrpos = -1; + if (oldformat) + { + sbcopy->fs_maxfilesize = -1; + sbcopy->fs_qbmask = -1; + sbcopy->fs_qfmask = -1; + } + if (swab_disk) + swab_sblock (sbcopy); + bcopy (sbcopy, disk_image + SBOFF, SBSIZE); + } + else + bcopy (sblock, disk_image + SBOFF, SBSIZE); + record_poke (disk_image + SBOFF, SBSIZE); + sblock_dirty = 0; + } + + spin_unlock (&alloclock); + + diskfs_end_catch_exception (); +} + +void +diskfs_readonly_changed (int readonly) +{ + (*(readonly ? store_set_flags : store_clear_flags)) (store, STORE_READONLY); + + mprotect (disk_image, store->size, PROT_READ | (readonly ? 0 : PROT_WRITE)); + + if (readonly) + { + /* We know we are sync'd now. The superblock is marked as dirty + because we cleared the clean flag immediately after sync'ing. + But now we want to leave it marked clean and not touch it further. */ + sblock_dirty = 0; + return; + } + + strcpy (sblock->fs_fsmnt, "Hurd /"); /* XXX */ + + if (!sblock->fs_clean) + error (0, 0, "WARNING: UNCLEANED FILESYSTEM NOW WRITABLE"); +} diff --git a/ufs/inode.c b/ufs/inode.c new file mode 100644 index 00000000..1a8a7098 --- /dev/null +++ b/ufs/inode.c @@ -0,0 +1,703 @@ +/* Inode management routines + + Copyright (C) 1994, 1995, 1996, 1997, 1998, 2000, 2001, 2002, 2007 + Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2, or (at + your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include "ufs.h" +#include <string.h> +#include <unistd.h> +#include <stdio.h> +#include <netinet/in.h> +#include <fcntl.h> +#include <hurd/store.h> + +#define INOHSZ 512 +#if ((INOHSZ&(INOHSZ-1)) == 0) +#define INOHASH(ino) ((ino)&(INOHSZ-1)) +#else +#define INOHASH(ino) (((unsigned)(ino))%INOHSZ) +#endif + +static struct node *nodehash[INOHSZ]; +static error_t read_disknode (struct node *np); + +spin_lock_t gennumberlock = SPIN_LOCK_INITIALIZER; + +/* Initialize the inode hash table. */ +void +inode_init () +{ + int n; + for (n = 0; n < INOHSZ; n++) + nodehash[n] = 0; +} + +/* Fetch inode INUM, set *NPP to the node structure; + gain one user reference and lock the node. */ +error_t +diskfs_cached_lookup (ino_t inum, struct node **npp) +{ + struct disknode *dn; + struct node *np; + error_t err; + + spin_lock (&diskfs_node_refcnt_lock); + for (np = nodehash[INOHASH(inum)]; np; np = np->dn->hnext) + { + if (np->dn->number != inum) + continue; + + np->references++; + spin_unlock (&diskfs_node_refcnt_lock); + mutex_lock (&np->lock); + *npp = np; + return 0; + } + + dn = malloc (sizeof (struct disknode)); + + dn->number = inum; + dn->dirents = 0; + dn->dir_idx = 0; + + rwlock_init (&dn->allocptrlock); + dn->dirty = 0; + dn->fileinfo = 0; + + np = diskfs_make_node (dn); + np->cache_id = inum; + + mutex_lock (&np->lock); + dn->hnext = nodehash[INOHASH(inum)]; + if (dn->hnext) + dn->hnext->dn->hprevp = &dn->hnext; + dn->hprevp = &nodehash[INOHASH(inum)]; + nodehash[INOHASH(inum)] = np; + spin_unlock (&diskfs_node_refcnt_lock); + + err = read_disknode (np); + + if (!diskfs_check_readonly () && !np->dn_stat.st_gen) + { + spin_lock (&gennumberlock); + if (++nextgennumber < diskfs_mtime->seconds) + nextgennumber = diskfs_mtime->seconds; + np->dn_stat.st_gen = nextgennumber; + spin_unlock (&gennumberlock); + np->dn_set_ctime = 1; + } + + if (err) + return err; + else + { + *npp = np; + return 0; + } +} + +/* Lookup node INUM (which must have a reference already) and return it + without allocating any new references. */ +struct node * +ifind (ino_t inum) +{ + struct node *np; + + spin_lock (&diskfs_node_refcnt_lock); + for (np = nodehash[INOHASH(inum)]; np; np = np->dn->hnext) + { + if (np->dn->number != inum) + continue; + + assert (np->references); + spin_unlock (&diskfs_node_refcnt_lock); + return np; + } + assert (0); +} + +/* The last reference to a node has gone away; drop + it from the hash table and clean all state in the dn structure. */ +void +diskfs_node_norefs (struct node *np) +{ + *np->dn->hprevp = np->dn->hnext; + if (np->dn->hnext) + np->dn->hnext->dn->hprevp = np->dn->hprevp; + if (np->dn->dirents) + free (np->dn->dirents); + assert (!np->dn->fileinfo); + free (np->dn); + free (np); +} + +/* The last hard reference to a node has gone away; arrange to have + all the weak references dropped that can be. */ +void +diskfs_try_dropping_softrefs (struct node *np) +{ + drop_pager_softrefs (np); +} + +/* The last hard reference to a node has gone away. */ +void +diskfs_lost_hardrefs (struct node *np) +{ +#ifdef notanymore + struct port_info *pi; + struct pager *p; + + /* Check and see if there is a pager which has only + one reference (ours). If so, then drop that reference, + breaking the cycle. The complexity in this routine + is all due to this cycle. */ + + if (np->dn->fileinfo) + { + spin_lock (&_libports_portrefcntlock); + pi = (struct port_info *) np->dn->fileinfo->p; + if (pi->refcnt == 1) + { + + /* The only way to get a new reference to the pager + in this state is to call diskfs_get_filemap; this + can't happen as long as we hold NP locked. So + we can safely unlock _libports_portrefcntlock for + the following call. */ + spin_unlock (&_libports_portrefcntlock); + + /* Right now the node is locked with no hard refs; + this is an anomolous situation. Before messing with + the reference count on the file pager, we have to + give ourselves a reference back so that we are really + allowed to hold the lock. Then we can do the + unreference. */ + p = np->dn->fileinfo->p; + np->dn->fileinfo = 0; + diskfs_nref (np); + pager_unreference (p); + + assert (np->references == 1 && np->light_references == 0); + + /* This will do the real deallocate. Whew. */ + diskfs_nput (np); + } + else + spin_unlock (&_libports_portrefcntlock); + } +#endif +} + +/* A new hard reference to a node has been created; it's now OK to + have unused weak references. */ +void +diskfs_new_hardrefs (struct node *np) +{ + allow_pager_softrefs (np); +} + +/* Read stat information out of the dinode. */ +static error_t +read_disknode (struct node *np) +{ + struct stat *st = &np->dn_stat; + struct dinode *di = dino (np->dn->number); + error_t err; + + err = diskfs_catch_exception (); + if (err) + return err; + + st->st_fstype = FSTYPE_UFS; + st->st_fsid = getpid (); /* This call is very cheap. */ + st->st_ino = np->dn->number; + st->st_gen = read_disk_entry (di->di_gen); + st->st_rdev = read_disk_entry(di->di_rdev); + st->st_mode = (((read_disk_entry (di->di_model) + | (read_disk_entry (di->di_modeh) << 16)) + & ~S_ITRANS) + | (di->di_trans ? S_IPTRANS : 0)); + st->st_nlink = read_disk_entry (di->di_nlink); + st->st_size = read_disk_entry (di->di_size); + st->st_atim.tv_sec = read_disk_entry (di->di_atime.tv_sec); + st->st_atim.tv_nsec = read_disk_entry (di->di_atime.tv_nsec); + st->st_mtim.tv_sec = read_disk_entry (di->di_mtime.tv_sec); + st->st_mtim.tv_nsec = read_disk_entry (di->di_mtime.tv_nsec); + st->st_ctim.tv_sec = read_disk_entry (di->di_ctime.tv_sec); + st->st_ctim.tv_nsec = read_disk_entry (di->di_ctime.tv_nsec); + st->st_blksize = sblock->fs_bsize; + st->st_blocks = read_disk_entry (di->di_blocks); + st->st_flags = read_disk_entry (di->di_flags); + + if (sblock->fs_inodefmt < FS_44INODEFMT) + { + st->st_uid = read_disk_entry (di->di_ouid); + st->st_gid = read_disk_entry (di->di_ogid); + st->st_author = st->st_uid; + np->author_tracks_uid = 1; + } + else + { + st->st_uid = read_disk_entry (di->di_uid); + st->st_gid = read_disk_entry (di->di_gid); + st->st_author = read_disk_entry (di->di_author); + if (st->st_author == -1) + st->st_author = st->st_uid; + } + + diskfs_end_catch_exception (); + if (!S_ISBLK (st->st_mode) && !S_ISCHR (st->st_mode)) + st->st_rdev = 0; + + if (S_ISLNK (st->st_mode) + && direct_symlink_extension + && st->st_size < sblock->fs_maxsymlinklen) + np->allocsize = 0; + else + { + if (lblkno (sblock, np->dn_stat.st_size) < NDADDR) + np->allocsize = fragroundup (sblock, st->st_size); + else + np->allocsize = blkroundup (sblock, st->st_size); + } + + return 0; +} + +error_t diskfs_node_reload (struct node *node) +{ + if (node->dn->dirents) + { + free (node->dn->dirents); + node->dn->dirents = 0; + } + flush_node_pager (node); + read_disknode (node); + return 0; +} + +/* Return 0 if NP's author can be changed to AUTHOR; otherwise return an + error code. */ +error_t +diskfs_validate_author_change (struct node *np, uid_t author) +{ + if (compat_mode == COMPAT_GNU) + return 0; + else + /* For non-hurd filesystems, the author & owner are the same. */ + return (author == np->dn_stat.st_uid) ? 0 : EINVAL; +} + +static void +write_node (struct node *np) +{ + struct stat *st = &np->dn_stat; + struct dinode *di = dino (np->dn->number); + error_t err; + + if (np->dn_stat_dirty) + { + assert (!diskfs_readonly); + + err = diskfs_catch_exception (); + if (err) + return; + + write_disk_entry (di->di_gen, st->st_gen); + + if (S_ISBLK (st->st_mode) || S_ISCHR (st->st_mode)) + write_disk_entry (di->di_rdev, st->st_rdev); + + /* We happen to know that the stat mode bits are the same + as the ufs mode bits. */ + + if (compat_mode == COMPAT_GNU) + { + mode_t mode = st->st_mode & ~S_ITRANS; + write_disk_entry (di->di_model, mode & 0xffff); + write_disk_entry (di->di_modeh, (mode >> 16) & 0xffff); + } + else + { + write_disk_entry (di->di_model, st->st_mode & 0xffff & ~S_ITRANS); + di->di_modeh = 0; + } + + if (compat_mode != COMPAT_BSD42) + { + write_disk_entry (di->di_uid, st->st_uid); + write_disk_entry (di->di_gid, st->st_gid); + } + + if (sblock->fs_inodefmt < FS_44INODEFMT) + { + write_disk_entry (di->di_ouid, st->st_uid & 0xffff); + write_disk_entry (di->di_ogid, st->st_gid & 0xffff); + } + else if (compat_mode == COMPAT_GNU) + write_disk_entry (di->di_author, st->st_author); + + write_disk_entry (di->di_nlink, st->st_nlink); + write_disk_entry (di->di_size, st->st_size); + write_disk_entry (di->di_atime.tv_sec, st->st_atim.tv_sec); + write_disk_entry (di->di_atime.tv_nsec, st->st_atim.tv_nsec); + write_disk_entry (di->di_mtime.tv_sec, st->st_mtim.tv_sec); + write_disk_entry (di->di_mtime.tv_nsec, st->st_mtim.tv_nsec); + write_disk_entry (di->di_ctime.tv_sec, st->st_ctim.tv_sec); + write_disk_entry (di->di_ctime.tv_nsec, st->st_ctim.tv_nsec); + write_disk_entry (di->di_blocks, st->st_blocks); + write_disk_entry (di->di_flags, st->st_flags); + + diskfs_end_catch_exception (); + np->dn_stat_dirty = 0; + record_poke (di, sizeof (struct dinode)); + } +} + +/* See if we should create a symlink by writing it directly into + the block pointer array. Returning EINVAL tells diskfs to do it + the usual way. */ +static error_t +create_symlink_hook (struct node *np, const char *target) +{ + int len = strlen (target); + error_t err; + struct dinode *di; + + if (!direct_symlink_extension) + return EINVAL; + + assert (compat_mode != COMPAT_BSD42); + + if (len >= sblock->fs_maxsymlinklen) + return EINVAL; + + err = diskfs_catch_exception (); + if (err) + return err; + + di = dino (np->dn->number); + bcopy (target, di->di_shortlink, len); + np->dn_stat.st_size = len; + np->dn_set_ctime = 1; + np->dn_set_mtime = 1; + record_poke (di, sizeof (struct dinode)); + + diskfs_end_catch_exception (); + return 0; +} +error_t (*diskfs_create_symlink_hook)(struct node *, const char *) + = create_symlink_hook; + +/* Check if this symlink is stored directly in the block pointer array. + Returning EINVAL tells diskfs to do it the usual way. */ +static error_t +read_symlink_hook (struct node *np, + char *buf) +{ + error_t err; + + if (!direct_symlink_extension + || np->dn_stat.st_size >= sblock->fs_maxsymlinklen) + return EINVAL; + + err = diskfs_catch_exception (); + if (err) + return err; + + bcopy ((dino (np->dn->number))->di_shortlink, buf, np->dn_stat.st_size); + + diskfs_set_node_atime (np); + + diskfs_end_catch_exception (); + return 0; +} +error_t (*diskfs_read_symlink_hook)(struct node *, char *) + = read_symlink_hook; + +error_t +diskfs_node_iterate (error_t (*fun)(struct node *)) +{ + struct node *np; + struct item {struct item *next; struct node *np;} *list = 0; + struct item *i; + error_t err; + int n; + + /* Acquire a reference on all the nodes in the hash table + and enter them into a list on the stack. */ + spin_lock (&diskfs_node_refcnt_lock); + for (n = 0; n < INOHSZ; n++) + for (np = nodehash[n]; np; np = np->dn->hnext) + { + np->references++; + i = alloca (sizeof (struct item)); + i->next = list; + i->np = np; + list = i; + } + spin_unlock (&diskfs_node_refcnt_lock); + + err = 0; + for (i = list; i; i = i->next) + { + if (!err) + { + mutex_lock (&i->np->lock); + err = (*fun)(i->np); + mutex_unlock (&i->np->lock); + } + diskfs_nrele (i->np); + } + return err; +} + +/* Write all active disknodes into the dinode pager. */ +void +write_all_disknodes () +{ + error_t + helper (struct node *np) + { + diskfs_set_node_times (np); + write_node (np); + return 0; + } + + diskfs_node_iterate (helper); +} + +void +diskfs_write_disknode (struct node *np, int wait) +{ + write_node (np); + if (wait) + sync_dinode (np->dn->number, 1); +} + +/* Implement the diskfs_set_statfs callback from the diskfs library; + see <hurd/diskfs.h> for the interface description. */ +error_t +diskfs_set_statfs (struct statfs *st) +{ + st->f_type = FSTYPE_UFS; + st->f_bsize = sblock->fs_fsize; + st->f_blocks = sblock->fs_dsize; + st->f_bfree = (sblock->fs_cstotal.cs_nbfree * sblock->fs_frag + + sblock->fs_cstotal.cs_nffree); + st->f_bavail = ((sblock->fs_dsize * (100 - sblock->fs_minfree) / 100) + - (sblock->fs_dsize - st->f_bfree)); + if (st->f_bfree < ((sblock->fs_dsize * (100 - sblock->fs_minfree) / 100))) + st->f_bavail = 0; + st->f_files = sblock->fs_ncg * sblock->fs_ipg - 2; /* not 0 or 1 */ + st->f_ffree = sblock->fs_cstotal.cs_nifree; + st->f_fsid = getpid (); + st->f_namelen = 0; + st->f_favail = st->f_ffree; + st->f_frsize = sblock->fs_fsize; + return 0; +} + +/* Implement the diskfs_set_translator callback from the diskfs + library; see <hurd/diskfs.h> for the interface description. */ +error_t +diskfs_set_translator (struct node *np, const char *name, u_int namelen, + struct protid *cred) +{ + daddr_t blkno; + error_t err; + char buf[sblock->fs_bsize]; + struct dinode *di; + + if (compat_mode != COMPAT_GNU) + return EOPNOTSUPP; + + if (namelen + sizeof (u_int) > sblock->fs_bsize) + return ENAMETOOLONG; + + err = diskfs_catch_exception (); + if (err) + return err; + + di = dino (np->dn->number); + blkno = read_disk_entry (di->di_trans); + + if (namelen && !blkno) + { + /* Allocate block for translator */ + err = ffs_alloc (np, 0, 0, sblock->fs_bsize, &blkno, cred); + if (err) + { + diskfs_end_catch_exception (); + return err; + } + write_disk_entry (di->di_trans, blkno); + record_poke (di, sizeof (struct dinode)); + np->dn_set_ctime = 1; + } + else if (!namelen && blkno) + { + /* Clear block for translator going away. */ + ffs_blkfree (np, blkno, sblock->fs_bsize); + di->di_trans = 0; + record_poke (di, sizeof (struct dinode)); + np->dn_stat.st_blocks -= btodb (sblock->fs_bsize); + np->dn_stat.st_mode &= ~S_IPTRANS; + np->dn_set_ctime = 1; + } + + if (namelen) + { + bcopy (&namelen, buf, sizeof (u_int)); + bcopy (name, buf + sizeof (u_int), namelen); + + bcopy (buf, disk_image + fsaddr (sblock, blkno), sblock->fs_bsize); + sync_disk_blocks (blkno, sblock->fs_bsize, 1); + + np->dn_stat.st_mode |= S_IPTRANS; + np->dn_set_ctime = 1; + } + + diskfs_end_catch_exception (); + return err; +} + +/* Implement the diskfs_get_translator callback from the diskfs library. + See <hurd/diskfs.h> for the interface description. */ +error_t +diskfs_get_translator (struct node *np, char **namep, u_int *namelen) +{ + error_t err; + daddr_t blkno; + u_int datalen; + const void *transloc; + + err = diskfs_catch_exception (); + if (err) + return err; + + blkno = read_disk_entry ((dino (np->dn->number))->di_trans); + assert (blkno); + transloc = disk_image + fsaddr (sblock, blkno); + + datalen = *(u_int *)transloc; + if (datalen > sblock->fs_bsize - sizeof (u_int)) + err = EFTYPE; + else + { + *namep = malloc (datalen); + if (*namep == NULL) + err = ENOMEM; + memcpy (*namep, transloc + sizeof (u_int), datalen); + } + + diskfs_end_catch_exception (); + + *namelen = datalen; + return 0; +} + +/* Called when all hard ports have gone away. */ +void +diskfs_shutdown_soft_ports () +{ + /* Should initiate termination of internally held pager ports + (the only things that should be soft) XXX */ +} + +/* Return a description of the storage of the file. */ +/* In STORAGE_DATA are the following, in network byte order: + + Inode number (4 bytes) + disk address of transator spec (4 bytes) + disk address of inode structure (4 bytes) + offset into inode block holding inode (4 bytes) */ +error_t +diskfs_S_file_get_storage_info (struct protid *cred, + mach_port_t **ports, + mach_msg_type_name_t *ports_type, + mach_msg_type_number_t *num_ports, + int **ints, mach_msg_type_number_t *num_ints, + off_t **offsets, + mach_msg_type_number_t *num_offsets, + char **data, mach_msg_type_number_t *data_len) +{ + error_t err; + struct node *np; + struct store *file_store; + struct store_run runs[NDADDR]; + size_t num_runs = 0; + + if (! cred) + return EOPNOTSUPP; + + np = cred->po->np; + mutex_lock (&np->lock); + + /* See if this file fits in the direct block pointers. If not, punt + for now. (Reading indir blocks is a pain, and I'm postponing + pain.) XXX */ + if (np->allocsize > NDADDR * sblock->fs_bsize) + { + mutex_unlock (&np->lock); + return EINVAL; + } + + err = diskfs_catch_exception (); + if (! err) + if (!direct_symlink_extension + || np->dn_stat.st_size >= sblock->fs_maxsymlinklen + || !S_ISLNK (np->dn_stat.st_mode)) + /* Copy the block pointers */ + { + int i; + struct store_run *run = runs; + struct dinode *di = dino (np->dn->number); + + for (i = 0; i < NDADDR; i++) + { + store_offset_t start = fsbtodb (sblock, read_disk_entry (di->di_db[i])); + store_offset_t length = + (((i + 1) * sblock->fs_bsize > np->allocsize) + ? np->allocsize - i * sblock->fs_bsize + : sblock->fs_bsize); + start <<= log2_dev_blocks_per_dev_bsize; + length <<= log2_dev_blocks_per_dev_bsize; + if (num_runs == 0 || run->start + run->length != start) + *run++ = (struct store_run){ start, length }; + else + run->length += length; + } + } + diskfs_end_catch_exception (); + + mutex_unlock (&np->lock); + + if (! err) + err = store_clone (store, &file_store); + if (! err) + { + err = store_remap (file_store, runs, num_runs, &file_store); + if (! err) + err = store_return (file_store, ports, num_ports, ints, num_ints, + offsets, num_offsets, data, data_len); + store_free (file_store); + } + *ports_type = MACH_MSG_TYPE_COPY_SEND; + + return err; +} diff --git a/ufs/main.c b/ufs/main.c new file mode 100644 index 00000000..242107f4 --- /dev/null +++ b/ufs/main.c @@ -0,0 +1,210 @@ +/* + Copyright (C) 1994,95,96,97,98,99,2002 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2, or (at + your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + + +#include "ufs.h" +#include <stdarg.h> +#include <stdio.h> +#include <error.h> +#include <device/device.h> +#include <fcntl.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <argz.h> +#include <argp.h> +#include <hurd/store.h> + +struct node *diskfs_root_node; + +struct store *store = 0; +struct store_parsed *store_parsed = 0; + +char *diskfs_disk_name = 0; + +/* Number of device blocks per DEV_BSIZE block. */ +unsigned log2_dev_blocks_per_dev_bsize = 0; + +/* Set diskfs_root_node to the root inode. */ +static void +warp_root (void) +{ + error_t err; + err = diskfs_cached_lookup (2, &diskfs_root_node); + assert (!err); + mutex_unlock (&diskfs_root_node->lock); +} + +/* XXX */ +struct mutex printf_lock = MUTEX_INITIALIZER; +int printf (const char *fmt, ...) +{ + va_list arg; + int done; + va_start (arg, fmt); + mutex_lock (&printf_lock); + done = vprintf (fmt, arg); + mutex_unlock (&printf_lock); + va_end (arg); + return done; +} + +int diskfs_readonly; + +/* Ufs-specific options. XXX this should be moved so it can be done at + runtime as well as startup. */ +static const struct argp_option +options[] = +{ + {"compat", 'C', "FMT", 0, + "FMT may be GNU, 4.4, or 4.2, and determines which filesystem extensions" + " are written onto the disk (default is GNU)"}, + {0} +}; + +/* Parse a ufs-specific command line option. */ +static error_t +parse_opt (int key, char *arg, struct argp_state *state) +{ + switch (key) + { + enum compat_mode mode; + + case 'C': + if (strcasecmp (arg, "gnu") == 0) + mode = COMPAT_GNU; + else if (strcmp (arg, "4.4") == 0) + mode = COMPAT_BSD44; + else if (strcmp (arg, "4.2") == 0) + { + if (sblock + && (sblock->fs_inodefmt == FS_44INODEFMT + || direct_symlink_extension)) + { + argp_failure (state, 0, 0, + "4.2 compat mode requested on 4.4 fs"); + return EINVAL; + } + mode = COMPAT_BSD42; + } + else + { + argp_error (state, "%s: Unknown compatibility mode", arg); + return EINVAL; + } + + state->hook = (void *)mode; /* Save it for the end. */ + break; + + case ARGP_KEY_INIT: + state->child_inputs[0] = state->input; + state->hook = (void *)compat_mode; break; + case ARGP_KEY_SUCCESS: + compat_mode = (enum compat_mode)state->hook; break; + + default: + return ARGP_ERR_UNKNOWN; + } + return 0; +} + +/* Add our startup arguments to the standard diskfs set. */ +static const struct argp_child startup_children[] = + {{&diskfs_store_startup_argp}, {0}}; +static struct argp startup_argp = {options, parse_opt, 0, 0, startup_children}; + +/* Similarly at runtime. */ +static const struct argp_child runtime_children[] = + {{&diskfs_std_runtime_argp}, {0}}; +static struct argp runtime_argp = {options, parse_opt, 0, 0, runtime_children}; + +struct argp *diskfs_runtime_argp = (struct argp *)&runtime_argp; + +/* Override the standard diskfs routine so we can add our own output. */ +error_t +diskfs_append_args (char **argz, size_t *argz_len) +{ + error_t err; + + /* Get the standard things. */ + err = diskfs_append_std_options (argz, argz_len); + + if (!err && compat_mode != COMPAT_GNU) + err = argz_add (argz, argz_len, + ((compat_mode == COMPAT_BSD42) + ? "--compat=4.2" + : "--compat=4.4")); + + if (! err) + err = store_parsed_append_args (store_parsed, argz, argz_len); + + return err; +} + +int +main (int argc, char **argv) +{ + mach_port_t bootstrap; + + /* Initialize the diskfs library, parse arguments, and open the store. + This starts the first diskfs thread for us. */ + store = diskfs_init_main (&startup_argp, argc, argv, + &store_parsed, &bootstrap); + + if (store->block_size > DEV_BSIZE) + error (4, 0, "%s: Bad device block size %zd (should be <= %d)", + diskfs_disk_name, store->block_size, DEV_BSIZE); + if (store->size < SBSIZE + SBOFF) + error (5, 0, "%s: Disk too small (%Ld bytes)", diskfs_disk_name, + store->size); + + log2_dev_blocks_per_dev_bsize = 0; + while ((1 << log2_dev_blocks_per_dev_bsize) < DEV_BSIZE) + log2_dev_blocks_per_dev_bsize++; + log2_dev_blocks_per_dev_bsize -= store->log2_block_size; + + /* Map the entire disk. */ + create_disk_pager (); + + get_hypermetadata (); + + inode_init (); + + /* Find our root node. */ + warp_root (); + + /* Now that we are all set up to handle requests, and diskfs_root_node is + set properly, it is safe to export our fsys control port to the + outside world. */ + diskfs_startup_diskfs (bootstrap, 0); + + /* SET HOST NAME */ + + /* And this thread is done with its work. */ + cthread_exit (0); + + return 0; +} + +error_t +diskfs_reload_global_state () +{ + flush_pokes (); + pager_flush (diskfs_disk_pager, 1); + get_hypermetadata (); + return 0; +} diff --git a/ufs/pager.c b/ufs/pager.c new file mode 100644 index 00000000..3038932d --- /dev/null +++ b/ufs/pager.c @@ -0,0 +1,806 @@ +/* Pager for ufs + Copyright (C) 1994, 1995, 1996, 1997, 1999 Free Software Foundation + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2, or (at + your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include "ufs.h" +#include <strings.h> +#include <stdio.h> +#include <unistd.h> +#include <hurd/store.h> + +spin_lock_t node2pagelock = SPIN_LOCK_INITIALIZER; + +spin_lock_t unlocked_pagein_lock = SPIN_LOCK_INITIALIZER; + +#ifdef DONT_CACHE_MEMORY_OBJECTS +#define MAY_CACHE 0 +#else +#define MAY_CACHE 1 +#endif + +struct port_bucket *pager_bucket; + +/* Mapped image of the disk. */ +void *disk_image; + +/* Find the location on disk of page OFFSET in pager UPI. Return the + disk address (in disk block) in *ADDR. If *NPLOCK is set on + return, then release that mutex after I/O on the data has + completed. Set DISKSIZE to be the amount of valid data on disk. + (If this is an unallocated block, then set *ADDR to zero.) + ISREAD is non-zero iff this is for a pagein. */ +static error_t +find_address (struct user_pager_info *upi, + vm_address_t offset, + daddr_t *addr, + int *disksize, + struct rwlock **nplock, + int isread) +{ + error_t err; + struct rwlock *lock; + + assert (upi->type == DISK || upi->type == FILE_DATA); + + if (upi->type == DISK) + { + *disksize = __vm_page_size; + *addr = offset / DEV_BSIZE; + *nplock = 0; + return 0; + } + else + { + struct iblock_spec indirs[NIADDR + 1]; + struct node *np; + + np = upi->np; + + if (isread) + { + try_again: + + /* If we should allow an unlocked pagein, do so. (This + still has a slight race; there could be a pageout in progress + which is blocked on NP->np->allocptrlock itself. In that + case the pagein that should proceed unimpeded is blocked + in the pager library waiting for the pageout to complete. + I think this is sufficiently rare to put it off for the time + being.) */ + + spin_lock (&unlocked_pagein_lock); + if (offset >= upi->allow_unlocked_pagein + && (offset + vm_page_size + <= upi->allow_unlocked_pagein + upi->unlocked_pagein_length)) + { + spin_unlock (&unlocked_pagein_lock); + *nplock = 0; + goto have_lock; + } + spin_unlock (&unlocked_pagein_lock); + + /* Block on the rwlock if necessary; but when we wake up, + don't acquire it; check again from the top. + This is mutated inline from rwlock.h. */ + lock = &np->dn->allocptrlock; + mutex_lock (&lock->master); + if (lock->readers == -1 || lock->writers_waiting) + { + lock->readers_waiting++; + condition_wait (&lock->wakeup, &lock->master); + lock->readers_waiting--; + mutex_unlock (&lock->master); + goto try_again; + } + lock->readers++; + mutex_unlock (&lock->master); + *nplock = lock; + } + else + { + rwlock_reader_lock (&np->dn->allocptrlock); + *nplock = &np->dn->allocptrlock; + } + + have_lock: + + if (offset >= np->allocsize) + { + if (*nplock) + rwlock_reader_unlock (*nplock); + if (isread) + return EIO; + else + { + *addr = 0; + *disksize = 0; + return 0; + } + } + + if (offset + __vm_page_size > np->allocsize) + *disksize = np->allocsize - offset; + else + *disksize = __vm_page_size; + + err = fetch_indir_spec (np, lblkno (sblock, offset), indirs); + if (err && *nplock) + rwlock_reader_unlock (*nplock); + else + { + if (indirs[0].bno) + *addr = (fsbtodb (sblock, indirs[0].bno) + + blkoff (sblock, offset) / DEV_BSIZE); + else + *addr = 0; + } + + return err; + } +} + + +/* Implement the pager_read_page callback from the pager library. See + <hurd/pager.h> for the interface description. */ +error_t +pager_read_page (struct user_pager_info *pager, + vm_offset_t page, + vm_address_t *buf, + int *writelock) +{ + error_t err; + struct rwlock *nplock; + daddr_t addr; + int disksize; + + err = find_address (pager, page, &addr, &disksize, &nplock, 1); + if (err) + return err; + + if (addr) + { + size_t read = 0; + err = store_read (store, addr << log2_dev_blocks_per_dev_bsize, + disksize, (void **)buf, &read); + if (read != disksize) + err = EIO; + if (!err && disksize != __vm_page_size) + bzero ((void *)(*buf + disksize), __vm_page_size - disksize); + *writelock = 0; + } + else + { +#if 0 + printf ("Write-locked pagein Object %#x\tOffset %#x\n", pager, page); + fflush (stdout); +#endif + *buf = (vm_address_t) mmap (0, vm_page_size, PROT_READ|PROT_WRITE, + MAP_ANON, 0, 0); + *writelock = 1; + } + + if (nplock) + rwlock_reader_unlock (nplock); + + return err; +} + +/* Implement the pager_write_page callback from the pager library. See + <hurd/pager.h> for the interface description. */ +error_t +pager_write_page (struct user_pager_info *pager, + vm_offset_t page, + vm_address_t buf) +{ + daddr_t addr; + int disksize; + struct rwlock *nplock; + error_t err; + + err = find_address (pager, page, &addr, &disksize, &nplock, 0); + if (err) + return err; + + if (addr) + { + size_t wrote; + err = store_write (store, addr << log2_dev_blocks_per_dev_bsize, + (void *)buf, disksize, &wrote); + if (wrote != disksize) + err = EIO; + } + else + err = 0; + + if (nplock) + rwlock_reader_unlock (nplock); + + return err; +} + +/* Implement the pager_unlock_page callback from the pager library. See + <hurd/pager.h> for the interface description. */ +error_t +pager_unlock_page (struct user_pager_info *pager, + vm_offset_t address) +{ + struct node *np; + error_t err; + struct iblock_spec indirs[NIADDR + 1]; + daddr_t bno; + struct disknode *dn; + struct dinode *di; + + /* Zero an sblock->fs_bsize piece of disk starting at BNO, + synchronously. We do this on newly allocated indirect + blocks before setting the pointer to them to ensure that an + indirect block absolutely never points to garbage. */ + void zero_disk_block (int bno) + { + bzero (indir_block (bno), sblock->fs_bsize); + sync_disk_blocks (bno, sblock->fs_bsize, 1); + }; + + /* Problem--where to get cred values for allocation here? */ + +#if 0 + printf ("Unlock page request, Object %#x\tOffset %#x...", pager, address); + fflush (stdout); +#endif + + if (pager->type == DISK) + return 0; + + np = pager->np; + dn = np->dn; + di = dino (dn->number); + + rwlock_writer_lock (&dn->allocptrlock); + + /* If this is the last block, we don't let it get unlocked. */ + if (address + __vm_page_size + > blkroundup (sblock, np->allocsize) - sblock->fs_bsize) + { + printf ("attempt to unlock at last block denied\n"); + fflush (stdout); + rwlock_writer_unlock (&dn->allocptrlock); + return EIO; + } + + err = fetch_indir_spec (np, lblkno (sblock, address), indirs); + if (err) + { + rwlock_writer_unlock (&dn->allocptrlock); + return EIO; + } + + err = diskfs_catch_exception (); + if (err) + { + rwlock_writer_unlock (&dn->allocptrlock); + return EIO; + } + + /* See if we need a triple indirect block; fail if we do. */ + assert (indirs[0].offset == -1 + || indirs[1].offset == -1 + || indirs[2].offset == -1); + + /* Check to see if this block is allocated. */ + if (indirs[0].bno == 0) + { + size_t wrote; + + if (indirs[0].offset == -1) + { + err = ffs_alloc (np, lblkno (sblock, address), + ffs_blkpref (np, lblkno (sblock, address), + lblkno (sblock, address), di->di_db), + sblock->fs_bsize, &bno, 0); + if (err) + goto out; + + assert (lblkno (sblock, address) < NDADDR); + err = store_write (store, + fsbtodb (sblock, bno) + << log2_dev_blocks_per_dev_bsize, + zeroblock, sblock->fs_bsize, &wrote); + if (!err && wrote != sblock->fs_bsize) + err = EIO; + if (err) + goto out; + + indirs[0].bno = bno; + write_disk_entry (di->di_db[lblkno (sblock, address)], bno); + record_poke (di, sizeof (struct dinode)); + } + else + { + daddr_t *siblock; + + /* We need to set siblock to the single indirect block + array; see if the single indirect block is allocated. */ + if (indirs[1].bno == 0) + { + if (indirs[1].offset == -1) + { + err = ffs_alloc (np, lblkno (sblock, address), + ffs_blkpref (np, lblkno (sblock, address), + INDIR_SINGLE, di->di_ib), + sblock->fs_bsize, &bno, 0); + if (err) + goto out; + zero_disk_block (bno); + indirs[1].bno = bno; + write_disk_entry (di->di_ib[INDIR_SINGLE], bno); + record_poke (di, sizeof (struct dinode)); + } + else + { + daddr_t *diblock; + + /* We need to set diblock to the double indirect + block array; see if the double indirect block is + allocated. */ + if (indirs[2].bno == 0) + { + /* This assert because triple indirection is + not supported. */ + assert (indirs[2].offset == -1); + + err = ffs_alloc (np, lblkno (sblock, address), + ffs_blkpref (np, lblkno (sblock, + address), + INDIR_DOUBLE, di->di_ib), + sblock->fs_bsize, &bno, 0); + if (err) + goto out; + zero_disk_block (bno); + indirs[2].bno = bno; + write_disk_entry (di->di_ib[INDIR_DOUBLE], bno); + record_poke (di, sizeof (struct dinode)); + } + + diblock = indir_block (indirs[2].bno); + mark_indir_dirty (np, indirs[2].bno); + + /* Now we can allocate the single indirect block */ + + err = ffs_alloc (np, lblkno (sblock, address), + ffs_blkpref (np, lblkno (sblock, address), + indirs[1].offset, diblock), + sblock->fs_bsize, &bno, 0); + if (err) + goto out; + zero_disk_block (bno); + indirs[1].bno = bno; + write_disk_entry (diblock[indirs[1].offset], bno); + record_poke (diblock, sblock->fs_bsize); + } + } + + siblock = indir_block (indirs[1].bno); + mark_indir_dirty (np, indirs[1].bno); + + /* Now we can allocate the data block. */ + + err = ffs_alloc (np, lblkno (sblock, address), + ffs_blkpref (np, lblkno (sblock, address), + indirs[0].offset, siblock), + sblock->fs_bsize, &bno, 0); + if (err) + goto out; + + err = store_write (store, + fsbtodb (sblock, bno) + << log2_dev_blocks_per_dev_bsize, + zeroblock, sblock->fs_bsize, &wrote); + if (!err && wrote != sblock->fs_bsize) + err = EIO; + if (err) + goto out; + + indirs[0].bno = bno; + write_disk_entry (siblock[indirs[0].offset], bno); + record_poke (siblock, sblock->fs_bsize); + } + } + + out: + diskfs_end_catch_exception (); + rwlock_writer_unlock (&dn->allocptrlock); + return err; +} + +/* Implement the pager_report_extent callback from the pager library. See + <hurd/pager.h> for the interface description. */ +inline error_t +pager_report_extent (struct user_pager_info *pager, + vm_address_t *offset, + vm_size_t *size) +{ + assert (pager->type == DISK || pager->type == FILE_DATA); + + *offset = 0; + + if (pager->type == DISK) + *size = store->size; + else + *size = pager->np->allocsize; + + return 0; +} + +/* Implement the pager_clear_user_data callback from the pager library. + See <hurd/pager.h> for the interface description. */ +void +pager_clear_user_data (struct user_pager_info *upi) +{ + /* XXX Do the right thing for the disk pager here too. */ + if (upi->type == FILE_DATA) + { + spin_lock (&node2pagelock); + if (upi->np->dn->fileinfo == upi) + upi->np->dn->fileinfo = 0; + spin_unlock (&node2pagelock); + diskfs_nrele_light (upi->np); + } + free (upi); +} + +void +pager_dropweak (struct user_pager_info *upi __attribute__ ((unused))) +{ +} + + + +/* Create the DISK pager. */ +void +create_disk_pager (void) +{ + struct user_pager_info *upi = malloc (sizeof (struct user_pager_info)); + + upi->type = DISK; + upi->np = 0; + pager_bucket = ports_create_bucket (); + diskfs_start_disk_pager (upi, pager_bucket, MAY_CACHE, store->size, + &disk_image); + upi->p = diskfs_disk_pager; +} + +/* This syncs a single file (NP) to disk. Wait for all I/O to complete + if WAIT is set. NP->lock must be held. */ +void +diskfs_file_update (struct node *np, + int wait) +{ + struct dirty_indir *d, *tmp; + struct user_pager_info *upi; + + spin_lock (&node2pagelock); + upi = np->dn->fileinfo; + if (upi) + ports_port_ref (upi->p); + spin_unlock (&node2pagelock); + + if (upi) + { + pager_sync (upi->p, wait); + ports_port_deref (upi->p); + } + + for (d = np->dn->dirty; d; d = tmp) + { + sync_disk_blocks (d->bno, sblock->fs_bsize, wait); + tmp = d->next; + free (d); + } + np->dn->dirty = 0; + + diskfs_node_update (np, wait); +} + +/* Invalidate any pager data associated with NODE. */ +void +flush_node_pager (struct node *node) +{ + struct user_pager_info *upi; + struct disknode *dn = node->dn; + struct dirty_indir *dirty = dn->dirty; + + spin_lock (&node2pagelock); + upi = dn->fileinfo; + if (upi) + ports_port_ref (upi->p); + spin_unlock (&node2pagelock); + + if (upi) + { + pager_flush (upi->p, 1); + ports_port_deref (upi->p); + } + + dn->dirty = 0; + + while (dirty) + { + struct dirty_indir *next = dirty->next; + free (dirty); + dirty = next; + } +} + +/* Call this to create a FILE_DATA pager and return a send right. + NP must be locked. PROT is the max protection desired. */ +mach_port_t +diskfs_get_filemap (struct node *np, vm_prot_t prot) +{ + struct user_pager_info *upi; + mach_port_t right; + + assert (S_ISDIR (np->dn_stat.st_mode) + || S_ISREG (np->dn_stat.st_mode) + || (S_ISLNK (np->dn_stat.st_mode) + && (!direct_symlink_extension + || np->dn_stat.st_size >= sblock->fs_maxsymlinklen))); + + spin_lock (&node2pagelock); + do + if (!np->dn->fileinfo) + { + upi = malloc (sizeof (struct user_pager_info)); + upi->type = FILE_DATA; + upi->np = np; + upi->max_prot = prot; + upi->allow_unlocked_pagein = 0; + upi->unlocked_pagein_length = 0; + diskfs_nref_light (np); + upi->p = pager_create (upi, pager_bucket, + MAY_CACHE, MEMORY_OBJECT_COPY_DELAY); + if (upi->p == 0) + { + diskfs_nrele_light (np); + free (upi); + spin_unlock (&node2pagelock); + return MACH_PORT_NULL; + } + np->dn->fileinfo = upi; + right = pager_get_port (np->dn->fileinfo->p); + ports_port_deref (np->dn->fileinfo->p); + } + else + { + np->dn->fileinfo->max_prot |= prot; + + /* Because NP->dn->fileinfo->p is not a real reference, + this might be nearly deallocated. If that's so, then + the port right will be null. In that case, clear here + and loop. The deallocation will complete separately. */ + right = pager_get_port (np->dn->fileinfo->p); + if (right == MACH_PORT_NULL) + np->dn->fileinfo = 0; + } + while (right == MACH_PORT_NULL); + + spin_unlock (&node2pagelock); + + mach_port_insert_right (mach_task_self (), right, right, + MACH_MSG_TYPE_MAKE_SEND); + + return right; +} + +/* Call this when we should turn off caching so that unused memory object + ports get freed. */ +void +drop_pager_softrefs (struct node *np) +{ + struct user_pager_info *upi; + + spin_lock (&node2pagelock); + upi = np->dn->fileinfo; + if (upi) + ports_port_ref (upi->p); + spin_unlock (&node2pagelock); + + if (MAY_CACHE && upi) + pager_change_attributes (upi->p, 0, MEMORY_OBJECT_COPY_DELAY, 0); + if (upi) + ports_port_deref (upi->p); +} + +/* Call this when we should turn on caching because it's no longer + important for unused memory object ports to get freed. */ +void +allow_pager_softrefs (struct node *np) +{ + struct user_pager_info *upi; + + spin_lock (&node2pagelock); + upi = np->dn->fileinfo; + if (upi) + ports_port_ref (upi->p); + spin_unlock (&node2pagelock); + + if (MAY_CACHE && upi) + pager_change_attributes (upi->p, 1, MEMORY_OBJECT_COPY_DELAY, 0); + if (upi) + ports_port_deref (upi->p); +} + +static void +block_caching () +{ + error_t block_cache (void *arg) + { + struct pager *p = arg; + + pager_change_attributes (p, 0, MEMORY_OBJECT_COPY_DELAY, 1); + return 0; + } + + /* Loop through the pagers and turn off caching one by one, + synchronously. That should cause termination of each pager. */ + ports_bucket_iterate (pager_bucket, block_cache); +} + +static void +enable_caching () +{ + error_t enable_cache (void *arg) + { + struct pager *p = arg; + struct user_pager_info *upi = pager_get_upi (p); + + pager_change_attributes (p, 1, MEMORY_OBJECT_COPY_DELAY, 0); + + /* It's possible that we didn't have caching on before, because + the user here is the only reference to the underlying node + (actually, that's quite likely inside this particular + routine), and if that node has no links. So dinkle the node + ref counting scheme here, which will cause caching to be + turned off, if that's really necessary. */ + if (upi->type == FILE_DATA) + { + diskfs_nref (upi->np); + diskfs_nrele (upi->np); + } + + return 0; + } + + ports_bucket_iterate (pager_bucket, enable_cache); +} + +/* Tell diskfs if there are pagers exported, and if none, then + prevent any new ones from showing up. */ +int +diskfs_pager_users () +{ + int npagers = ports_count_bucket (pager_bucket); + + if (npagers <= 1) + return 0; + + if (MAY_CACHE) + { + block_caching (); + + /* Give it a second; the kernel doesn't actually shutdown + immediately. XXX */ + sleep (1); + + npagers = ports_count_bucket (pager_bucket); + if (npagers <= 1) + return 0; + + /* Darn, there are actual honest users. Turn caching back on, + and return failure. */ + enable_caching (); + } + + ports_enable_bucket (pager_bucket); + + return 1; +} + +/* Return the bitwise or of the maximum prot parameter (the second arg to + diskfs_get_filemap) for all active user pagers. */ +vm_prot_t +diskfs_max_user_pager_prot () +{ + vm_prot_t max_prot = 0; + int npagers = ports_count_bucket (pager_bucket); + + if (npagers > 1) + /* More than just the disk pager. */ + { + error_t add_pager_max_prot (void *v_p) + { + struct pager *p = v_p; + struct user_pager_info *upi = pager_get_upi (p); + if (upi->type == FILE_DATA) + max_prot |= upi->max_prot; + /* Stop iterating if MAX_PROT is as filled as it's going to get. */ + return max_prot == (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE); + } + + block_caching (); /* Make any silly pagers go away. */ + + /* Give it a second; the kernel doesn't actually shutdown + immediately. XXX */ + sleep (1); + + ports_bucket_iterate (pager_bucket, add_pager_max_prot); + + enable_caching (); + } + + ports_enable_bucket (pager_bucket); + + return max_prot; +} + +/* Call this to find out the struct pager * corresponding to the + FILE_DATA pager of inode IP. This should be used *only* as a subsequent + argument to register_memory_fault_area, and will be deleted when + the kernel interface is fixed. NP must be locked. */ +struct pager * +diskfs_get_filemap_pager_struct (struct node *np) +{ + /* This is safe because fileinfo can't be cleared; there must be + an active mapping for this to be called. */ + return np->dn->fileinfo->p; +} + +/* Shutdown all the pagers. */ +void +diskfs_shutdown_pager () +{ + error_t shutdown_one (void *arg) + { + struct pager *p = arg; + /* Don't ever shut down the disk pager. */ + if (p != diskfs_disk_pager) + pager_shutdown (p); + return 0; + } + + copy_sblock (); + write_all_disknodes (); + ports_bucket_iterate (pager_bucket, shutdown_one); + sync_disk (1); +} + +/* Sync all the pagers. */ +void +diskfs_sync_everything (int wait) +{ + error_t sync_one (void *arg) + { + struct pager *p = arg; + /* Make sure the disk pager is done last. */ + if (p != diskfs_disk_pager) + pager_sync (p, wait); + return 0; + } + + copy_sblock (); + write_all_disknodes (); + ports_bucket_iterate (pager_bucket, sync_one); + sync_disk (wait); +} diff --git a/ufs/pokeloc.c b/ufs/pokeloc.c new file mode 100644 index 00000000..267aa106 --- /dev/null +++ b/ufs/pokeloc.c @@ -0,0 +1,85 @@ +/* Remember where we've written the disk to speed up sync + Copyright (C) 1994, 1996 Free Software Foundation, Inc. + Written by Michael I. Bushnell. + + This file is part of the GNU Hurd. + + The GNU Hurd is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2, or (at + your option) any later version. + + The GNU Hurd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include "ufs.h" + +struct pokeloc +{ + vm_offset_t offset; + vm_size_t length; + struct pokeloc *next; +}; + +struct pokeloc *pokelist; +spin_lock_t pokelistlock = SPIN_LOCK_INITIALIZER; + +/* Remember that data here on the disk has been modified. */ +void +record_poke (void *loc, vm_size_t length) +{ + struct pokeloc *pl = malloc (sizeof (struct pokeloc)); + vm_offset_t offset; + + offset = loc - disk_image; + pl->offset = trunc_page (offset); + pl->length = round_page (offset + length) - pl->offset; + + spin_lock (&pokelistlock); + pl->next = pokelist; + pokelist = pl; + spin_unlock (&pokelistlock); +} + +/* Get rid of any outstanding pokes. */ +void +flush_pokes () +{ + struct pokeloc *pl; + + spin_lock (&pokelistlock); + pl = pokelist; + pokelist = 0; + spin_unlock (&pokelistlock); + + while (pl) + { + struct pokeloc *next = pl->next; + free (pl); + pl = next; + } +} + +/* Sync all the modified pieces of disk */ +void +sync_disk (int wait) +{ + struct pokeloc *pl, *tmp; + + spin_lock (&pokelistlock); + for (pl = pokelist; pl; pl = tmp) + { + pager_sync_some (diskfs_disk_pager, pl->offset, pl->length, wait); + tmp = pl->next; + free (pl); + } + pokelist = 0; + spin_unlock (&pokelistlock); +} + diff --git a/ufs/sizes.c b/ufs/sizes.c new file mode 100644 index 00000000..58cbfc98 --- /dev/null +++ b/ufs/sizes.c @@ -0,0 +1,719 @@ +/* File growth and truncation + Copyright (C) 1993, 1994, 1995, 1996, 1997, 1999 Free Software Foundation + +This file is part of the GNU Hurd. + +The GNU Hurd is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +The GNU Hurd is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with the GNU Hurd; see the file COPYING. If not, write to +the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* Written by Michael I. Bushnell. */ + +#include "ufs.h" +#include <string.h> + +#ifdef DONT_CACHE_MEMORY_OBJECTS +#define MAY_CACHE 0 +#else +#define MAY_CACHE 1 +#endif + +static int indir_release (struct node *np, daddr_t bno, int level); +static void poke_pages (memory_object_t, vm_offset_t, vm_offset_t); + +/* Implement the diskfs_truncate callback; sse <hurd/diskfs.h> for the + interface description. */ +error_t +diskfs_truncate (struct node *np, + off_t length) +{ + int offset; + struct dinode *di = dino (np->dn->number); + volatile int blocksfreed = 0; + error_t err; + int i; + struct iblock_spec indirs[NIADDR + 1]; + volatile daddr_t lbn; + struct user_pager_info *upi; + + if (length >= np->dn_stat.st_size) + return 0; + + diskfs_check_readonly (); + assert (!diskfs_readonly); + + /* First check to see if this is a kludged symlink; if so + this is special. */ + if (direct_symlink_extension && S_ISLNK (np->dn_stat.st_mode) + && np->dn_stat.st_size < sblock->fs_maxsymlinklen) + { + error_t err; + + err = diskfs_catch_exception (); + if (err) + return err; + bzero ((char *)di->di_shortlink + length, np->dn_stat.st_size - length); + record_poke (di, sizeof (struct dinode)); + diskfs_end_catch_exception (); + np->dn_stat.st_size = length; + np->dn_set_ctime = np->dn_set_mtime = 1; + diskfs_node_update (np, 1); + return 0; + } + + /* If the file is not being trucated to a block boundary, + the zero the partial bit in the new last block. */ + offset = blkoff (sblock, length); + if (offset) + { + int bsize; /* size of new last block */ + int savesize = np->allocsize; + + np->allocsize = length; /* temporary */ + bsize = blksize (sblock, np, lblkno (sblock, length)); + np->allocsize = savesize; + diskfs_node_rdwr (np, zeroblock, length, bsize - offset, 1, 0, 0); + diskfs_file_update (np, 1); + } + + /* Now flush all the data past the new size from the kernel. + Also force any delayed copies of this data to take place + immediately. (We are implicitly changing the data to zeros + and doing it without the kernel's immediate knowledge; + accordingl we must help out the kernel thusly.) */ + spin_lock (&node2pagelock); + upi = np->dn->fileinfo; + if (upi) + ports_port_ref (upi->p); + spin_unlock (&node2pagelock); + + if (upi) + { + mach_port_t obj; + + pager_change_attributes (upi->p, MAY_CACHE, + MEMORY_OBJECT_COPY_NONE, 1); + obj = diskfs_get_filemap (np, VM_PROT_READ | VM_PROT_WRITE); + if (obj != MACH_PORT_NULL) + { + /* XXX should cope with errors from diskfs_get_filemap */ + poke_pages (obj, round_page (length), round_page (np->allocsize)); + mach_port_deallocate (mach_task_self (), obj); + pager_flush_some (upi->p, round_page (length), + np->allocsize - length, 1); + } + ports_port_deref (upi->p); + } + + rwlock_writer_lock (&np->dn->allocptrlock); + + /* Update the size on disk; fsck will finish freeing blocks if necessary + should we crash. */ + np->dn_stat.st_size = length; + np->dn_set_mtime = 1; + np->dn_set_ctime = 1; + diskfs_node_update (np, 1); + + /* Find out the location information for the last block to + be retained */ + lbn = lblkno (sblock, length - 1); + err = fetch_indir_spec (np, lbn, indirs); + /* err XXX */ + + /* We don't support triple indirs */ + assert (indirs[3].offset == -2); + + err = diskfs_catch_exception (); + /* err XXX */ + + /* BSD carefully finds out how far to clear; it's vastly simpler + to just clear everything after the new last block. */ + + /* Free direct blocks */ + if (indirs[0].offset < 0) + { + /* ...mapped from the inode. */ + for (i = lbn + 1; i < NDADDR; i++) + if (di->di_db[i]) + { + long bsize = blksize (sblock, np, i); + ffs_blkfree (np, read_disk_entry (di->di_db[i]), bsize); + di->di_db[i] = 0; + blocksfreed += btodb (bsize); + } + } + else + { + /* ... or mapped from sindir */ + if (indirs[1].bno) + { + daddr_t *sindir = indir_block (indirs[1].bno); + for (i = indirs[0].offset + 1; i < NINDIR (sblock); i++) + if (sindir[i]) + { + ffs_blkfree (np, read_disk_entry (sindir[i]), + sblock->fs_bsize); + sindir[i] = 0; + blocksfreed += btodb (sblock->fs_bsize); + } + record_poke (sindir, sblock->fs_bsize); + } + } + + /* Free single indirect blocks */ + if (indirs[1].offset < 0) + { + /* ...mapped from the inode */ + if (di->di_ib[INDIR_SINGLE] && indirs[1].offset == -2) + { + blocksfreed += indir_release (np, + read_disk_entry (di->di_ib + [INDIR_SINGLE]), + INDIR_SINGLE); + di->di_ib[INDIR_SINGLE] = 0; + } + } + else + { + /* ...or mapped from dindir */ + if (indirs[2].bno) + { + daddr_t *dindir = indir_block (indirs[2].bno); + for (i = indirs[1].offset + 1; i < NINDIR (sblock); i++) + if (dindir[i]) + { + blocksfreed += indir_release (np, + read_disk_entry (dindir[i]), + INDIR_SINGLE); + dindir[i] = 0; + } + record_poke (dindir, sblock->fs_bsize); + } + } + + /* Free double indirect block */ + assert (indirs[2].offset < 0); /* which must be mapped from the inode */ + if (indirs[2].offset == -2) + { + if (di->di_ib[INDIR_DOUBLE]) + { + blocksfreed += indir_release (np, + read_disk_entry (di->di_ib + [INDIR_DOUBLE]), + INDIR_DOUBLE); + di->di_ib[INDIR_DOUBLE] = 0; + } + } + + /* Finally, check to see if the new last direct block is + changing size; if so release any frags necessary. */ + if (lbn >= 0 && lbn < NDADDR && di->di_db[lbn]) + { + long oldspace, newspace; + daddr_t bn; + + bn = read_disk_entry (di->di_db[lbn]); + oldspace = blksize (sblock, np, lbn); + np->allocsize = fragroundup (sblock, length); + newspace = blksize (sblock, np, lbn); + + assert (newspace); + + if (oldspace - newspace) + { + bn += numfrags (sblock, newspace); + ffs_blkfree (np, bn, oldspace - newspace); + blocksfreed += btodb (oldspace - newspace); + } + } + else + { + if (lbn > NDADDR) + np->allocsize = blkroundup (sblock, length); + else + np->allocsize = fragroundup (sblock, length); + } + + record_poke (di, sizeof (struct dinode)); + + np->dn_stat.st_blocks -= blocksfreed; + np->dn_set_ctime = 1; + diskfs_node_update (np, 1); + + rwlock_writer_unlock (&np->dn->allocptrlock); + + /* At this point the last block (as defined by np->allocsize) + might not be allocated. We need to allocate it to maintain + the rule that the last block of a file is always allocated. */ + + if (np->allocsize && indirs[0].bno == 0) + { + /* The strategy is to reduce LBN until we get one that's allocated; + then reduce allocsize accordingly, then call diskfs_grow. */ + + do + err = fetch_indir_spec (np, --lbn, indirs); + /* err XXX */ + while (indirs[0].bno == 0 && lbn >= 0); + + assert ((lbn + 1) * sblock->fs_bsize < np->allocsize); + np->allocsize = (lbn + 1) * sblock->fs_bsize; + + diskfs_grow (np, length, 0); + } + + diskfs_end_catch_exception (); + + /* Now we can permit delayed copies again. */ + spin_lock (&node2pagelock); + upi = np->dn->fileinfo; + if (upi) + ports_port_ref (upi->p); + spin_unlock (&node2pagelock); + if (upi) + { + pager_change_attributes (upi->p, MAY_CACHE, + MEMORY_OBJECT_COPY_DELAY, 0); + ports_port_deref (upi->p); + } + + return err; +} + +/* Free indirect block BNO of level LEVEL; recursing if necessary + to free other indirect blocks. Return the number of disk + blocks freed. */ +static int +indir_release (struct node *np, daddr_t bno, int level) +{ + int count = 0; + daddr_t *addrs; + int i; + struct dirty_indir *d, *prev, *next; + + assert (bno); + + addrs = indir_block (bno); + for (i = 0; i < NINDIR (sblock); i++) + if (addrs[i]) + { + if (level == INDIR_SINGLE) + { + ffs_blkfree (np, read_disk_entry (addrs[i]), sblock->fs_bsize); + count += btodb (sblock->fs_bsize); + } + else + count += indir_release (np, read_disk_entry (addrs[i]), level - 1); + } + + /* Subtlety: this block is no longer necessary; the information + the kernel has cached corresponding to ADDRS is now unimportant. + Consider that if this block is allocated to a file, it will then + be double cached and the kernel might decide to write out + the disk_image version of the block. So we have to flush + the block from the kernel's memory, making sure we do it + synchronously--and BEFORE we attach it to the free list + with ffs_blkfree. */ + pager_flush_some (diskfs_disk_pager, fsaddr (sblock, bno), sblock->fs_bsize, 1); + + /* We should also take this block off the inode's list of + dirty indirect blocks if it's there. */ + prev = 0; + d = np->dn->dirty; + while (d) + { + next = d->next; + if (d->bno == bno) + { + if (prev) + prev->next = next; + else + np->dn->dirty = next; + free (d); + } + else + { + prev = d; + next = d->next; + } + d = next; + } + + /* Free designated block */ + ffs_blkfree (np, bno, sblock->fs_bsize); + count += btodb (sblock->fs_bsize); + + return count; +} + + +/* Offer data at BUF from START of LEN bytes of file NP. */ +void +offer_data (struct node *np, + off_t start, + size_t len, + vm_address_t buf) +{ + vm_address_t addr; + + len = round_page (len); + + assert (start % vm_page_size == 0); + + assert (np->dn->fileinfo); + for (addr = start; addr < start + len; addr += vm_page_size) + pager_offer_page (np->dn->fileinfo->p, 1, 0, addr, buf + (addr - start)); +} + +/* Logical block LBN of node NP has been extended with ffs_realloccg. + It used to be allocated at OLD_PBN and is now at NEW_PBN. The old + size was OLD_SIZE; it is now NEW_SIZE bytes long. Arrange for the data + on disk to be kept consistent, and free the old block if it has moved. + Return one iff we've actually moved data around on disk. */ +int +block_extended (struct node *np, + daddr_t lbn, + daddr_t old_pbn, + daddr_t new_pbn, + size_t old_size, + size_t new_size) +{ + /* Make sure that any pages of this block which just became allocated + don't get paged in from disk. */ + if (round_page (old_size) < round_page (new_size)) + offer_data (np, lbn * sblock->fs_bsize + round_page (old_size), + round_page (new_size) - round_page (old_size), + (vm_address_t)zeroblock); + + if (old_pbn != new_pbn) + { + memory_object_t mapobj; + error_t err; + vm_address_t mapaddr; + volatile int *pokeaddr; + + /* Map in this part of the file */ + mapobj = diskfs_get_filemap (np, VM_PROT_WRITE | VM_PROT_READ); + + /* XXX Should cope with errors from diskfs_get_filemap and back + out the operation here. */ + assert (mapobj); + + err = vm_map (mach_task_self (), &mapaddr, round_page (old_size), 0, 1, + mapobj, lbn * sblock->fs_bsize, 0, + VM_PROT_READ|VM_PROT_WRITE, VM_PROT_READ|VM_PROT_WRITE, 0); + assert_perror (err); + + /* Allow these pageins to occur even though we're holding the lock */ + spin_lock (&unlocked_pagein_lock); + np->dn->fileinfo->allow_unlocked_pagein = lbn * sblock->fs_bsize; + np->dn->fileinfo->unlocked_pagein_length = round_page (old_size); + spin_unlock (&unlocked_pagein_lock); + + /* Make sure all waiting pageins see this change. */ + mutex_lock (&np->dn->allocptrlock.master); + condition_broadcast (&np->dn->allocptrlock.wakeup); + mutex_unlock (&np->dn->allocptrlock.master); + + /* Force the pages in core and make sure they are dirty */ + for (pokeaddr = (int *)mapaddr; + pokeaddr < (int *) (mapaddr + round_page (old_size)); + pokeaddr += vm_page_size / sizeof (*pokeaddr)) + *pokeaddr = *pokeaddr; + + /* Turn off the special pagein permission */ + spin_lock (&unlocked_pagein_lock); + np->dn->fileinfo->allow_unlocked_pagein = 0; + np->dn->fileinfo->unlocked_pagein_length = 0; + spin_unlock (&unlocked_pagein_lock); + + /* Undo mapping */ + mach_port_deallocate (mach_task_self (), mapobj); + munmap ((caddr_t) mapaddr, round_page (old_size)); + + /* Now it's OK to free the old block */ + ffs_blkfree (np, old_pbn, old_size); + + /* Tell caller that we've moved data */ + return 1; + } + else + return 0; +} + + +/* Implement the diskfs_grow callback; see <hurd/diskfs.h> for the + interface description. */ +error_t +diskfs_grow (struct node *np, + off_t end, + struct protid *cred) +{ + daddr_t lbn, olbn; + int size, osize; + error_t err; + struct dinode *di = dino (np->dn->number); + mach_port_t pagerpt; + int need_sync = 0; + + /* Zero an sblock->fs_bsize piece of disk starting at BNO, + synchronously. We do this on newly allocated indirect + blocks before setting the pointer to them to ensure that an + indirect block absolutely never points to garbage. */ + void zero_disk_block (int bno) + { + bzero (indir_block (bno), sblock->fs_bsize); + sync_disk_blocks (bno, sblock->fs_bsize, 1); + }; + + /* Check to see if we don't actually have to do anything */ + if (end <= np->allocsize) + return 0; + + diskfs_check_readonly (); + assert (!diskfs_readonly); + + /* This reference will ensure that NP->dn->fileinfo stays allocated. */ + pagerpt = diskfs_get_filemap (np, VM_PROT_WRITE|VM_PROT_READ); + + if (pagerpt == MACH_PORT_NULL) + return errno; + + /* The new last block of the file. */ + lbn = lblkno (sblock, end - 1); + + /* This is the size of that block if it is in the NDADDR array. */ + size = fragroundup (sblock, blkoff (sblock, end)); + if (size == 0) + size = sblock->fs_bsize; + + rwlock_writer_lock (&np->dn->allocptrlock); + + /* The old last block of the file. */ + olbn = lblkno (sblock, np->allocsize - 1); + + /* This is the size of that block if it is in the NDADDR array. */ + osize = fragroundup (sblock, blkoff (sblock, np->allocsize)); + if (osize == 0) + osize = sblock->fs_bsize; + + /* If this end point is a new block and the file currently + has a fragment, then expand the fragment to a full block. */ + if (np->allocsize && olbn < NDADDR && olbn < lbn) + { + if (osize < sblock->fs_bsize) + { + daddr_t old_pbn, bno; + err = ffs_realloccg (np, olbn, + ffs_blkpref (np, lbn, lbn, di->di_db), + osize, sblock->fs_bsize, &bno, cred); + if (err) + goto out; + + old_pbn = read_disk_entry (di->di_db[olbn]); + + need_sync = block_extended (np, olbn, old_pbn, bno, + osize, sblock->fs_bsize); + + write_disk_entry (di->di_db[olbn], bno); + record_poke (di, sizeof (struct dinode)); + np->dn_set_ctime = 1; + } + } + + if (lbn < NDADDR) + { + daddr_t bno, old_pbn = read_disk_entry (di->di_db[lbn]); + + if (old_pbn != 0) + { + /* The last block is already allocated. Therefore we + must be expanding the fragment. Make sure that's really + what we're up to. */ + assert (size > osize); + assert (lbn == olbn); + + err = ffs_realloccg (np, lbn, + ffs_blkpref (np, lbn, lbn, di->di_db), + osize, size, &bno, cred); + if (err) + goto out; + + need_sync = block_extended (np, lbn, old_pbn, bno, osize, size); + + write_disk_entry (di->di_db[lbn], bno); + record_poke (di, sizeof (struct dinode)); + np->dn_set_ctime = 1; + } + else + { + /* Allocate a new last block. */ + err = ffs_alloc (np, lbn, + ffs_blkpref (np, lbn, lbn, di->di_db), + size, &bno, cred); + if (err) + goto out; + + + offer_data (np, lbn * sblock->fs_bsize, size, + (vm_address_t)zeroblock); + write_disk_entry (di->di_db[lbn], bno); + record_poke (di, sizeof (struct dinode)); + np->dn_set_ctime = 1; + } + } + else + { + struct iblock_spec indirs[NIADDR + 1]; + daddr_t *siblock; + daddr_t bno; + + /* Count the number of levels of indirection. */ + err = fetch_indir_spec (np, lbn, indirs); + if (err) + goto out; + + /* Make sure we didn't miss the NDADDR case + above somehow. */ + assert (indirs[0].offset != -1); + + /* See if we need a triple indirect block; fail if so. */ + assert (indirs[1].offset == -1 || indirs[2].offset == -1); + + /* Check to see if this block is allocated. If it is + that's an error. */ + assert (indirs[0].bno == 0); + + /* We need to set SIBLOCK to the single indirect block + array; see if the single indirect block is allocated. */ + if (indirs[1].bno == 0) + { + /* Allocate it. */ + if (indirs[1].offset == -1) + { + err = ffs_alloc (np, lbn, + ffs_blkpref (np, lbn, INDIR_SINGLE, di->di_ib), + sblock->fs_bsize, &bno, 0); + if (err) + goto out; + zero_disk_block (bno); + indirs[1].bno = bno; + write_disk_entry (di->di_ib[INDIR_SINGLE], bno); + record_poke (di, sizeof (struct dinode)); + } + else + { + daddr_t *diblock; + + /* We need to set diblock to the double indirect block + array; see if the double indirect block is allocated. */ + if (indirs[2].bno == 0) + { + /* This assert because triple indirection is not + supported. */ + assert (indirs[2].offset == -1); + err = ffs_alloc (np, lbn, + ffs_blkpref (np, lbn, + INDIR_DOUBLE, di->di_ib), + sblock->fs_bsize, &bno, 0); + if (err) + goto out; + zero_disk_block (bno); + indirs[2].bno = bno; + write_disk_entry (di->di_ib[INDIR_DOUBLE], bno); + record_poke (di, sizeof (struct dinode)); + } + + diblock = indir_block (indirs[2].bno); + mark_indir_dirty (np, indirs[2].bno); + + /* Now we can allocate the single indirect block */ + err = ffs_alloc (np, lbn, + ffs_blkpref (np, lbn, + indirs[1].offset, diblock), + sblock->fs_bsize, &bno, 0); + if (err) + goto out; + zero_disk_block (bno); + indirs[1].bno = bno; + write_disk_entry (diblock[indirs[1].offset], bno); + record_poke (diblock, sblock->fs_bsize); + } + } + + siblock = indir_block (indirs[1].bno); + mark_indir_dirty (np, indirs[1].bno); + + /* Now we can allocate the data block. */ + err = ffs_alloc (np, lbn, + ffs_blkpref (np, lbn, indirs[0].offset, siblock), + sblock->fs_bsize, &bno, 0); + if (err) + goto out; + offer_data (np, lbn * sblock->fs_bsize, sblock->fs_bsize, + (vm_address_t)zeroblock); + indirs[0].bno = bno; + write_disk_entry (siblock[indirs[0].offset], bno); + record_poke (siblock, sblock->fs_bsize); + } + + out: + mach_port_deallocate (mach_task_self (), pagerpt); + if (!err) + { + int newallocsize; + if (lbn < NDADDR) + newallocsize = lbn * sblock->fs_bsize + size; + else + newallocsize = (lbn + 1) * sblock->fs_bsize; + assert (newallocsize > np->allocsize); + np->allocsize = newallocsize; + } + + rwlock_writer_unlock (&np->dn->allocptrlock); + + if (need_sync) + diskfs_file_update (np, 1); + + return err; +} + +/* Write something to each page from START to END inclusive of memory + object OBJ, but make sure the data doesns't actually change. */ +static void +poke_pages (memory_object_t obj, + vm_offset_t start, + vm_offset_t end) +{ + vm_address_t addr, poke; + vm_size_t len; + error_t err; + + while (start < end) + { + len = 8 * vm_page_size; + if (len > end - start) + len = end - start; + addr = 0; + err = vm_map (mach_task_self (), &addr, len, 0, 1, obj, start, 0, + VM_PROT_WRITE|VM_PROT_READ, VM_PROT_READ|VM_PROT_WRITE, 0); + if (!err) + { + for (poke = addr; poke < addr + len; poke += vm_page_size) + *(volatile int *)poke = *(volatile int *)poke; + munmap ((caddr_t) addr, len); + } + start += len; + } +} + diff --git a/ufs/subr.c b/ufs/subr.c new file mode 100644 index 00000000..2b356ddc --- /dev/null +++ b/ufs/subr.c @@ -0,0 +1,264 @@ +/* + * Copyright (c) 1982, 1986, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ffs_subr.c 8.2 (Berkeley) 9/21/93 + */ + +#include "ufs.h" + +#if 0 /* Not needed in GNU Hurd ufs. */ +/* + * Return buffer with the contents of block "offset" from the beginning of + * directory "ip". If "res" is non-zero, fill it in with a pointer to the + * remaining space in the directory. + */ +int +ffs_blkatoff(ap) + struct vop_blkatoff_args /* { + struct vnode *a_vp; + off_t a_offset; + char **a_res; + struct buf **a_bpp; + } */ *ap; +{ + struct inode *ip; + register struct fs *fs; + struct buf *bp; + daddr_t lbn; + int bsize, error; + + ip = VTOI(ap->a_vp); + fs = ip->i_fs; + lbn = lblkno(fs, ap->a_offset); + bsize = blksize(fs, ip, lbn); + + *ap->a_bpp = NULL; + if (error = bread(ap->a_vp, lbn, bsize, NOCRED, &bp)) { + brelse(bp); + return (error); + } + if (ap->a_res) + *ap->a_res = (char *)bp->b_data + blkoff(fs, ap->a_offset); + *ap->a_bpp = bp; + return (0); +} +#endif /* 0 */ + +/* + * Update the frsum fields to reflect addition or deletion + * of some frags. + */ +void +ffs_fragacct(fs, fragmap, fraglist, cnt) + struct fs *fs; + int fragmap; + long fraglist[]; + int cnt; +{ + int inblk; + register int field, subfield; + register int siz, pos; + + inblk = (int)(fragtbl[fs->fs_frag][fragmap]) << 1; + fragmap <<= 1; + for (siz = 1; siz < fs->fs_frag; siz++) { + if ((inblk & (1 << (siz + (fs->fs_frag % NBBY)))) == 0) + continue; + field = around[siz]; + subfield = inside[siz]; + for (pos = siz; pos <= fs->fs_frag; pos++) { + if ((fragmap & field) == subfield) { + fraglist[siz] += cnt; + pos += siz; + field <<= siz; + subfield <<= siz; + } + field <<= 1; + subfield <<= 1; + } + } +} + +#if 0 /* Not needed in GNU Hurd ufs. */ +void +ffs_checkoverlap(bp, ip) + struct buf *bp; + struct inode *ip; +{ + register struct buf *ebp, *ep; + register daddr_t start, last; + struct vnode *vp; + + ebp = &buf[nbuf]; + start = bp->b_blkno; + last = start + btodb(bp->b_bcount) - 1; + for (ep = buf; ep < ebp; ep++) { + if (ep == bp || (ep->b_flags & B_INVAL) || + ep->b_vp == NULLVP) + continue; + if (VOP_BMAP(ep->b_vp, (daddr_t)0, &vp, (daddr_t)0, NULL)) + continue; + if (vp != ip->i_devvp) + continue; + /* look for overlap */ + if (ep->b_bcount == 0 || ep->b_blkno > last || + ep->b_blkno + btodb(ep->b_bcount) <= start) + continue; + vprint("Disk overlap", vp); + (void)printf("\tstart %d, end %d overlap start %d, end %d\n", + start, last, ep->b_blkno, + ep->b_blkno + btodb(ep->b_bcount) - 1); + panic("Disk buffer overlap"); + } +} +#endif /* 0 */ + +/* + * block operations + * + * check if a block is available + */ +int +ffs_isblock(fs, cp, h) + struct fs *fs; + unsigned char *cp; + daddr_t h; +{ + unsigned char mask; + + switch ((int)fs->fs_frag) { + case 8: + return (cp[h] == 0xff); + case 4: + mask = 0x0f << ((h & 0x1) << 2); + return ((cp[h >> 1] & mask) == mask); + case 2: + mask = 0x03 << ((h & 0x3) << 1); + return ((cp[h >> 2] & mask) == mask); + case 1: + mask = 0x01 << (h & 0x7); + return ((cp[h >> 3] & mask) == mask); + default: + assert (0); + } +} + +/* + * take a block out of the map + */ +void +ffs_clrblock(fs, cp, h) + struct fs *fs; + u_char *cp; + daddr_t h; +{ + + switch ((int)fs->fs_frag) { + case 8: + cp[h] = 0; + return; + case 4: + cp[h >> 1] &= ~(0x0f << ((h & 0x1) << 2)); + return; + case 2: + cp[h >> 2] &= ~(0x03 << ((h & 0x3) << 1)); + return; + case 1: + cp[h >> 3] &= ~(0x01 << (h & 0x7)); + return; + default: + assert (0); + } +} + +/* + * put a block into the map + */ +void +ffs_setblock(fs, cp, h) + struct fs *fs; + unsigned char *cp; + daddr_t h; +{ + + switch ((int)fs->fs_frag) { + + case 8: + cp[h] = 0xff; + return; + case 4: + cp[h >> 1] |= (0x0f << ((h & 0x1) << 2)); + return; + case 2: + cp[h >> 2] |= (0x03 << ((h & 0x3) << 1)); + return; + case 1: + cp[h >> 3] |= (0x01 << (h & 0x7)); + return; + default: + assert (0); + } +} + +/* Taken from 4.4 BSD sys/libkern/skpc.c: + @(#)skpc.c 8.1 (Berkeley) 6/10/93 +*/ +int +skpc(mask0, size, cp0) + int mask0; + int size; + char *cp0; +{ + register u_char *cp, *end, mask; + + mask = mask0; + cp = (u_char *)cp0; + for (end = &cp[size]; cp < end && *cp == mask; ++cp); + return (end - cp); +} + +/* Taken from 4.4 BSD sys/libkern/scanc.c: + @(#)scanc.c 8.1 (Berkeley) 6/10/93 +*/ +int +scanc(size, cp, table, mask0) + u_int size; + register u_char *cp, table[]; + int mask0; +{ + register u_char *end; + register u_char mask; + + mask = mask0; + for (end = &cp[size]; cp < end && (table[*cp] & mask) == 0; ++cp); + return (end - cp); +} diff --git a/ufs/tables.c b/ufs/tables.c new file mode 100644 index 00000000..d345b9e4 --- /dev/null +++ b/ufs/tables.c @@ -0,0 +1,138 @@ +/* Modified from BSD for GNU Hurd ufs server by Michael I. Bushnell. */ +/* + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ffs_tables.c 8.1 (Berkeley) 6/11/93 + */ + +#include <sys/types.h> +#include "fs.h" + +/* + * Bit patterns for identifying fragments in the block map + * used as ((map & around) == inside) + */ +int around[9] = { + 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff, 0x1ff, 0x3ff +}; +int inside[9] = { + 0x0, 0x2, 0x6, 0xe, 0x1e, 0x3e, 0x7e, 0xfe, 0x1fe +}; + +/* + * Given a block map bit pattern, the frag tables tell whether a + * particular size fragment is available. + * + * used as: + * if ((1 << (size - 1)) & fragtbl[fs->fs_frag][map] { + * at least one fragment of the indicated size is available + * } + * + * These tables are used by the scanc instruction on the VAX to + * quickly find an appropriate fragment. + */ +u_char fragtbl124[256] = { + 0x00, 0x16, 0x16, 0x2a, 0x16, 0x16, 0x26, 0x4e, + 0x16, 0x16, 0x16, 0x3e, 0x2a, 0x3e, 0x4e, 0x8a, + 0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e, + 0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e, + 0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e, + 0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e, + 0x2a, 0x3e, 0x3e, 0x2a, 0x3e, 0x3e, 0x2e, 0x6e, + 0x3e, 0x3e, 0x3e, 0x3e, 0x2a, 0x3e, 0x6e, 0xaa, + 0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e, + 0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e, + 0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e, + 0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e, + 0x26, 0x36, 0x36, 0x2e, 0x36, 0x36, 0x26, 0x6e, + 0x36, 0x36, 0x36, 0x3e, 0x2e, 0x3e, 0x6e, 0xae, + 0x4e, 0x5e, 0x5e, 0x6e, 0x5e, 0x5e, 0x6e, 0x4e, + 0x5e, 0x5e, 0x5e, 0x7e, 0x6e, 0x7e, 0x4e, 0xce, + 0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e, + 0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e, + 0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e, + 0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e, + 0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e, + 0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e, + 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x7e, + 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x7e, 0xbe, + 0x2a, 0x3e, 0x3e, 0x2a, 0x3e, 0x3e, 0x2e, 0x6e, + 0x3e, 0x3e, 0x3e, 0x3e, 0x2a, 0x3e, 0x6e, 0xaa, + 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x7e, + 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x7e, 0xbe, + 0x4e, 0x5e, 0x5e, 0x6e, 0x5e, 0x5e, 0x6e, 0x4e, + 0x5e, 0x5e, 0x5e, 0x7e, 0x6e, 0x7e, 0x4e, 0xce, + 0x8a, 0x9e, 0x9e, 0xaa, 0x9e, 0x9e, 0xae, 0xce, + 0x9e, 0x9e, 0x9e, 0xbe, 0xaa, 0xbe, 0xce, 0x8a, +}; + +u_char fragtbl8[256] = { + 0x00, 0x01, 0x01, 0x02, 0x01, 0x01, 0x02, 0x04, + 0x01, 0x01, 0x01, 0x03, 0x02, 0x03, 0x04, 0x08, + 0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05, + 0x02, 0x03, 0x03, 0x02, 0x04, 0x05, 0x08, 0x10, + 0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05, + 0x01, 0x01, 0x01, 0x03, 0x03, 0x03, 0x05, 0x09, + 0x02, 0x03, 0x03, 0x02, 0x03, 0x03, 0x02, 0x06, + 0x04, 0x05, 0x05, 0x06, 0x08, 0x09, 0x10, 0x20, + 0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05, + 0x01, 0x01, 0x01, 0x03, 0x03, 0x03, 0x05, 0x09, + 0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05, + 0x03, 0x03, 0x03, 0x03, 0x05, 0x05, 0x09, 0x11, + 0x02, 0x03, 0x03, 0x02, 0x03, 0x03, 0x02, 0x06, + 0x03, 0x03, 0x03, 0x03, 0x02, 0x03, 0x06, 0x0a, + 0x04, 0x05, 0x05, 0x06, 0x05, 0x05, 0x06, 0x04, + 0x08, 0x09, 0x09, 0x0a, 0x10, 0x11, 0x20, 0x40, + 0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05, + 0x01, 0x01, 0x01, 0x03, 0x03, 0x03, 0x05, 0x09, + 0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05, + 0x03, 0x03, 0x03, 0x03, 0x05, 0x05, 0x09, 0x11, + 0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05, + 0x01, 0x01, 0x01, 0x03, 0x03, 0x03, 0x05, 0x09, + 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x07, + 0x05, 0x05, 0x05, 0x07, 0x09, 0x09, 0x11, 0x21, + 0x02, 0x03, 0x03, 0x02, 0x03, 0x03, 0x02, 0x06, + 0x03, 0x03, 0x03, 0x03, 0x02, 0x03, 0x06, 0x0a, + 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x07, + 0x02, 0x03, 0x03, 0x02, 0x06, 0x07, 0x0a, 0x12, + 0x04, 0x05, 0x05, 0x06, 0x05, 0x05, 0x06, 0x04, + 0x05, 0x05, 0x05, 0x07, 0x06, 0x07, 0x04, 0x0c, + 0x08, 0x09, 0x09, 0x0a, 0x09, 0x09, 0x0a, 0x0c, + 0x10, 0x11, 0x11, 0x12, 0x20, 0x21, 0x40, 0x80, +}; + +/* + * The actual fragtbl array. + */ +u_char *fragtbl[MAXFRAG + 1] = { + 0, fragtbl124, fragtbl124, 0, fragtbl124, 0, 0, 0, fragtbl8, +}; diff --git a/ufs/ufs.h b/ufs/ufs.h new file mode 100644 index 00000000..5d823ebc --- /dev/null +++ b/ufs/ufs.h @@ -0,0 +1,289 @@ +/* + Copyright (C) 1994, 1995, 1996, 1997, 1999 Free Software Foundation + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2, or (at + your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include <mach.h> +#include <hurd.h> +#include <sys/mman.h> +#include <hurd/ports.h> +#include <hurd/pager.h> +#include <hurd/fshelp.h> +#include <hurd/iohelp.h> +#include <hurd/diskfs.h> +#include <sys/mman.h> +#include <assert.h> +#include "fs.h" +#include "dinode.h" + +/* Define this if memory objects should not be cached by the kernel. + Normally, don't define it, but defining it causes a much greater rate + of paging requests, which may be helpful in catching bugs. */ + +/* #undef DONT_CACHE_MEMORY_OBJECTS */ + +struct disknode +{ + ino_t number; + + int dir_idx; + + /* For a directory, this array holds the number of directory entries in + each DIRBLKSIZE piece of the directory. */ + int *dirents; + + /* Links on hash list. */ + struct node *hnext, **hprevp; + + struct rwlock allocptrlock; + + struct dirty_indir *dirty; + + struct user_pager_info *fileinfo; +}; + +/* Identifies a particular block and where it's found + when interpreting indirect block structure. */ +struct iblock_spec +{ + /* Disk address of block */ + daddr_t bno; + + /* Offset in next block up; -1 if it's in the inode itself. */ + int offset; +}; + +/* Identifies an indirect block owned by this file which + might be dirty. */ +struct dirty_indir +{ + daddr_t bno; /* Disk address of block. */ + struct dirty_indir *next; +}; + +struct user_pager_info +{ + struct node *np; + enum pager_type + { + DISK, + FILE_DATA, + } type; + struct pager *p; + vm_prot_t max_prot; + + vm_offset_t allow_unlocked_pagein; + vm_size_t unlocked_pagein_length; +}; + +#include <hurd/diskfs-pager.h> + +/* The physical media. */ +extern struct store *store; +/* What the user specified. */ +extern struct store_parsed *store_parsed; + +/* Mapped image of the disk. */ +extern void *disk_image; + +extern void *zeroblock; + +extern struct fs *sblock; +extern struct csum *csum; +int sblock_dirty; +int csum_dirty; + +spin_lock_t node2pagelock; + +spin_lock_t alloclock; + +spin_lock_t gennumberlock; +u_long nextgennumber; + +spin_lock_t unlocked_pagein_lock; + +/* The compat_mode specifies whether or not we write + extensions onto the disk. */ +enum compat_mode +{ + COMPAT_GNU = 0, + COMPAT_BSD42 = 1, + COMPAT_BSD44 = 2, +} compat_mode; + +/* If this is set, then this filesystem has two extensions: + 1) directory entries include the type field. + 2) symlink targets might be written directly in the di_db field + of the dinode. */ +int direct_symlink_extension; + +/* If this is set, then the disk is byteswapped from native order. */ +int swab_disk; + +/* Number of device blocks per DEV_BSIZE block. */ +unsigned log2_dev_blocks_per_dev_bsize; + +/* Handy macros */ +#define DEV_BSIZE 512 +#define NBBY 8 +#define btodb(n) ((n) / DEV_BSIZE) +#define howmany(x,y) (((x)+((y)-1))/(y)) +#define roundup(x, y) ((((x)+((y)-1))/(y))*(y)) +#define isclr(a, i) (((a)[(i)/NBBY] & (1<<((i)%NBBY))) == 0) +#define isset(a, i) ((a)[(i)/NBBY] & (1<<((i)%NBBY))) +#define setbit(a,i) ((a)[(i)/NBBY] |= 1<<((i)%NBBY)) +#define clrbit(a,i) ((a)[(i)/NBBY] &= ~(1<<(i)%NBBY)) +#define fsaddr(fs,n) (fsbtodb(fs,n)*DEV_BSIZE) + + +/* Functions for looking inside disk_image */ + +/* Convert an inode number to the dinode on disk. */ +extern inline struct dinode * +dino (ino_t inum) +{ + return (struct dinode *) + (disk_image + + fsaddr (sblock, ino_to_fsba (sblock, inum)) + + ino_to_fsbo (sblock, inum) * sizeof (struct dinode)); +} + +/* Convert a indirect block number to a daddr_t table. */ +extern inline daddr_t * +indir_block (daddr_t bno) +{ + return (daddr_t *) (disk_image + fsaddr (sblock, bno)); +} + +/* Convert a cg number to the cylinder group. */ +extern inline struct cg * +cg_locate (int ncg) +{ + return (struct cg *) (disk_image + fsaddr (sblock, cgtod (sblock, ncg))); +} + +/* Sync part of the disk */ +extern inline void +sync_disk_blocks (daddr_t blkno, size_t nbytes, int wait) +{ + pager_sync_some (diskfs_disk_pager, fsaddr (sblock, blkno), nbytes, wait); +} + +/* Sync an disk inode */ +extern inline void +sync_dinode (int inum, int wait) +{ + sync_disk_blocks (ino_to_fsba (sblock, inum), sblock->fs_fsize, wait); +} + + +/* Functions for byte swapping */ +extern inline short +swab_short (short arg) +{ + return (((arg & 0xff) << 8) + | ((arg & 0xff00) >> 8)); +} + +extern inline long +swab_long (long arg) +{ + return (((long) swab_short (arg & 0xffff) << 16) + | swab_short ((arg & 0xffff0000) >> 16)); +} + +extern inline long long +swab_long_long (long long arg) +{ + return (((long long) swab_long (arg & 0xffffffff) << 32) + | swab_long ((arg & 0xffffffff00000000LL) >> 32)); +} + +/* Return ENTRY, after byteswapping it if necessary */ +#define read_disk_entry(entry) \ +({ \ + typeof (entry) ret; \ + if (!swab_disk || sizeof (entry) == 1) \ + ret = (entry); \ + else if (sizeof (entry) == 2) \ + ret = swab_short (entry); \ + else if (sizeof (entry) == 4) \ + ret = swab_long (entry); \ + else \ + abort (); \ + ret; \ +}) + +/* Execute A = B, but byteswap it along the way if necessary */ +#define write_disk_entry(a,b) \ +({ \ + if (!swab_disk || sizeof (a) == 1) \ + ((a) = (b)); \ + else if (sizeof (a) == 2) \ + ((a) = (swab_short (b))); \ + else if (sizeof (a) == 4) \ + ((a) = (swab_long (b))); \ + else \ + abort (); \ +}) + + + + + +/* From alloc.c: */ +error_t ffs_alloc (struct node *, daddr_t, daddr_t, int, daddr_t *, + struct protid *); +void ffs_blkfree(struct node *, daddr_t bno, long size); +daddr_t ffs_blkpref (struct node *, daddr_t, int, daddr_t *); +error_t ffs_realloccg(struct node *, daddr_t, daddr_t, + int, int, daddr_t *, struct protid *); + +/* From bmap.c */ +error_t fetch_indir_spec (struct node *, daddr_t, struct iblock_spec *); +void mark_indir_dirty (struct node *, daddr_t); + +/* From hyper.c: */ +void get_hypermetadata (void); +void copy_sblock (void); + +/* From inode.c: */ +struct node *ifind (ino_t ino); +void inode_init (void); +void write_all_disknodes (void); + +/* From pager.c: */ +void create_disk_pager (void); +void din_map (struct node *); +void sin_map (struct node *); +void sin_remap (struct node *, int); +void sin_unmap (struct node *); +void din_unmap (struct node *); +void drop_pager_softrefs (struct node *); +void allow_pager_softrefs (struct node *); +void flush_node_pager (struct node *); + +/* From subr.c: */ +void ffs_fragacct (struct fs *, int, long [], int); +int ffs_isblock(struct fs *, u_char *, daddr_t); +void ffs_clrblock(struct fs *, u_char *, daddr_t); +void ffs_setblock (struct fs *, u_char *, daddr_t); +int skpc (int, int, char *); +int scanc (u_int, u_char *, u_char [], int); + +/* From pokeloc.c: */ +void record_poke (void *, vm_size_t); +void sync_disk (int); +void flush_pokes (); |