From 8f48e6fa4324fc242af66ab0d49e467f98656f15 Mon Sep 17 00:00:00 2001 From: Marcus Brinkmann Date: Tue, 3 Dec 2002 20:52:59 +0000 Subject: Initial check-in. --- fatfs/ChangeLog | 113 ++++++ fatfs/Makefile | 29 ++ fatfs/dir.c | 952 ++++++++++++++++++++++++++++++++++++++++++++++++ fatfs/fat.c | 744 ++++++++++++++++++++++++++++++++++++++ fatfs/fat.h | 403 +++++++++++++++++++++ fatfs/fatfs.h | 121 +++++++ fatfs/inode.c | 764 +++++++++++++++++++++++++++++++++++++++ fatfs/main.c | 265 ++++++++++++++ fatfs/pager.c | 1019 ++++++++++++++++++++++++++++++++++++++++++++++++++++ fatfs/virt-inode.c | 235 ++++++++++++ fatfs/virt-inode.h | 69 ++++ 11 files changed, 4714 insertions(+) create mode 100644 fatfs/ChangeLog create mode 100644 fatfs/Makefile create mode 100644 fatfs/dir.c create mode 100644 fatfs/fat.c create mode 100644 fatfs/fat.h create mode 100644 fatfs/fatfs.h create mode 100644 fatfs/inode.c create mode 100644 fatfs/main.c create mode 100644 fatfs/pager.c create mode 100644 fatfs/virt-inode.c create mode 100644 fatfs/virt-inode.h diff --git a/fatfs/ChangeLog b/fatfs/ChangeLog new file mode 100644 index 00000000..0de81963 --- /dev/null +++ b/fatfs/ChangeLog @@ -0,0 +1,113 @@ +2002-10-06 Marcus Brinkmann + + * main.c (diskfs_server_version): Set to HURD_VERSION. + + * dir.c (dirscanblock): Always emulate "." and ".." for the root + dir, even on FAT32. + (diskfs_get_directs): Likewise. New variable DOTDOT. Set EP to + DOT or DOTDOT when approriate. Implement inode number generation + with vi_rlookup. + Submitted by Marco Gerards . + + * main.c (fetch_root): Implement this for FAT32. + Submitted by Marco Gerards . + + * fatfs.h: Renamed prototype from fat_next_cluster to + fat_get_next_cluster. + * fat.c (fat_extend_chain): Replace CLUSTERS_PER_CHAIN with + CLUSTERS_PER_TABLE and LOG2_CLUSTERS_PER_CHAIN with + LOG2_CLUSTERS_PER_TABLE. + (fat_getcluster): Likewise. + (fat_truncate_node): Likewise. + Submitted by Marco Gerards . + + * fat.c (fat_get_freespace): New function. + * fat.h: New prototype for fat_get_freespace. + * inode.c (diskfs_set_statfs): Set ST->f_bfree and ST->f_bavail. + Submitted by Marco Gerards . + + * fatfs.h: New prototypes for fs_uid and fs_gid. + New prototype for refresh_node_stats. + * inode.c (read_node): Set ST->st_uid to fs_uid and ST->st_gid to + fs_gid. + (refresh_node_stats): New function. + * main.c: New variables default_fs_uid, default_fs_gid, fs_uid, fs_gid. + Include + (options): New variable. + (startup_children): Likewise. + (startup_argp): Likewise. + (runtime_children): Likewise. + (runtime_argp): Likewise. + (diskfs_runtime_argp): Likewise. + (parse_opt): New function. + (main): Set default_fs_uid, default_fs_gid, fs_uid and fs_gid. + Pass startup_argp to diskfs_init_main. + Submitted by Marco Gerards . + + * main.c (read_sblock): Move to ... + * fat.c (read_sblock): ... here and rename to fat_read_sblock. New + variable READ. Don't use disk_image but access store directly. + (fat_read_sblock): Remove check for large clusters. + * fat.c: Include + Submitted by Marco Gerards . + + * fatfs.h (struct user_pager_info): Rename DISK in enum pager_type + to FAT. + (disk_image): Rename to fat_image and make extern. + (host_name, mounted_on): Remove cruft. + (sblock, dr_root_node): Make extern. + * pager.c (create_disk_pager): Rename to create_fat_pager. Create + the pager with the type FAT and size of the FAT, not the whole + disk. Use fat_image instead disk_image. + (pager_read_page): Replace DISK with FAT and call + fat_pager_read_page instead disk_pager_read_page. + (pager_write_page): Replace DISK with FAT and call + fat_pager_write_page instead disk_pager_write_page. + (pager_report_extent): Replace DISK with FAT and return new size. + (disk_pager_read_page): Rename to fat_pager_read_page. Adjusted + to restrict to new size. Use memset instead bcopy. dev_end + renamed to fat_end. Add beginning of FAT to page. + (disk_pager_write_page): Rename to fat_pager_write_page. Adjusted + to restrict to new size. dev_end renamed to fat_end. Add + beginning to FAT to PAGE. + (file_pager_read_huge_page): Use memcpy instead bcopy. + (pending_clusters_write): Likewise. + * fat.c (fat_write_next_cluster): Don't add the beginning of FAT + to FAT_ENTRY_OFFSET. Reflect renaming of disk_image to fat_image. + (fat_get_next_cluster): Likewise. + * main.c (main): Call fat_read_sblock, not read_block. Call + create_fat_pager instead create_disk_pager, and do this only after + reading the superblock. + + * main.c (fetch_root): Use memset, not bzero. + (read_sblock): Use memcpy, not bcopy. + * dir.c (diskfs_get_directs): Replace bcopy with memcpy. + +2002-04-15 Marcus Brinkmann + + * dir.c (diskfs_get_directs): Count the special cased DOT and + DOTDOT directories for root directory nodes in FAT12 and FAT16 + file systems. + + * dir.c (dirscanblock): Determine the parent inode correctly. + Submitted by Marco Gerards . + + * inode.c (read_node): Add VK.dir_offset to BUF before doing the + pointer conversion. + (write_node): Likewise. + Submitted by Marco Gerards . + + * inode.c (write_node): Save the modification time as write time. + Submitted by Marco Gerards . + + * fat.h: Add prototype for fat_from_epoch(). + Submitted by Marco Gerards . + + * inode.c (read_node): For the root dir of a FAT12/16 file system, + set allocsize to size of the root directory region. + (read_node): For files, round up to a full cluster multiple. + Reported by Marco Gerards . + +2000-05-05 Marcus Brinkmann + + * Initial release. diff --git a/fatfs/Makefile b/fatfs/Makefile new file mode 100644 index 00000000..61b89302 --- /dev/null +++ b/fatfs/Makefile @@ -0,0 +1,29 @@ +# Copyright (C) 1997 Free Software Foundation +# Modified by Marcus Brinkmann, 2000-05-05 +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2, or (at +# your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +dir := fatfs +makemode := server + +target = fatfs +SRCS = inode.c main.c dir.c pager.c fat.c virt-inode.c +LCLHDRS = fat.h fatfs.h virt-inode.h +DIST_FILES = EXTENSIONS + +OBJS = $(SRCS:.c=.o) +HURDLIBS = diskfs iohelp fshelp store pager ports threads ihash shouldbeinlibc + +include ../Makeconf diff --git a/fatfs/dir.c b/fatfs/dir.c new file mode 100644 index 00000000..9ef76c49 --- /dev/null +++ b/fatfs/dir.c @@ -0,0 +1,952 @@ +/* main.c - FAT filesystem. + Copyright (C) 1997, 1998, 1999, 2002 Free Software Foundation, Inc. + Written by Thomas Bushnell, n/BSG and Marcus Brinkmann. + + This file is part of the GNU Hurd. + + The GNU Hurd is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + The GNU Hurd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. */ + +#include +#include +#include +#include "fatfs.h" + +/* The size of a directory block is usually just the cluster size. + However, the root directory of FAT12/16 file systems is stored in + sectors in a special region, so we settle on the greatest common + divisor here. */ +#define DIRBLKSIZ bytes_per_sector +#define LOG2_DIRBLKSIZ log2_bytes_per_sector + +enum slot_status +{ + /* This means we haven't yet found room for a new entry. */ + LOOKING, + + /* This means that the specified entry is free and should be used. */ + TAKE, + + /* This means that the specified entry has enough room at the end + to hold the new entry. */ + SHRINK, + + /* This means that there is enough space in the block, but not in + any one single entry, so they all have to be shifted to make + room. */ + COMPRESS, + + /* This means that the directory will have to be grown to hold the + entry. */ + EXTEND, + + /* For removal and rename, this means that this is the location + of the entry found. */ + HERE_TIS, +}; + +struct dirstat +{ + /* Type of followp operation expected. */ + enum lookup_type type; + + /* One of the statuses above. */ + enum slot_status stat; + + /* Mapped address and length of directory. */ + vm_address_t mapbuf; + vm_size_t mapextent; + + /* Index of this directory block. */ + int idx; + + /* For stat COMPRESS, this is the address (inside mapbuf) + of the first direct in the directory block to be compressed. */ + /* For stat HERE_TIS, SHRINK, and TAKE, this is the entry referenced. */ + struct dirrect *entry; + + /* For stat HERE_TIS, type REMOVE, this is the address of the immediately + previous direct in this directory block, or zero if this is the first. */ + struct dirrect *preventry; + + /* For stat COMPRESS, this is the number of bytes needed to be copied + in order to undertake the compression. */ + size_t nbytes; +}; + +const size_t diskfs_dirstat_size = sizeof (struct dirstat); + +/* Initialize DS such that diskfs_drop_dirstat will ignore it. */ +void +diskfs_null_dirstat (struct dirstat *ds) +{ + ds->type = LOOKUP; +} + +/* Forward declaration. */ +static error_t +dirscanblock (vm_address_t blockoff, struct node *dp, int idx, + const char *name, int namelen, enum lookup_type type, + struct dirstat *ds, ino_t *inum); + +static int +fatnamematch (const char *dirname, const char *username, size_t unamelen) +{ + char *dn = strdup(dirname); + int dpos = 0; + int upos = 0; + int ext = 0; + + /* Deleted files. */ + if (dn[0] == FAT_DIR_NAME_DELETED || dn[0] == FAT_DIR_NAME_LAST) + return 0; + if (dn[0] == FAT_DIR_NAME_REPLACE_DELETED) + dn[0] = FAT_DIR_NAME_DELETED; + + /* Special representations for `.' and `..'. */ + if (!memcmp(dn, FAT_DIR_NAME_DOT, 11)) + return unamelen == 1 && username[0] == '.'; + + if (!memcmp (dn, FAT_DIR_NAME_DOTDOT, 11)) + return unamelen == 2 && username[0] == '.' && username[1] == '.'; + + if (unamelen > 12) + return 0; + + do + { + /* First check if we have reached the extension without coming + across blanks. */ + if (dpos == 8 && !ext) + { + if (username[upos] == '.') + { + upos++; + ext = 1; + } + else + break; + } + /* Second, skip blanks in base part. */ + if (dn[dpos] == ' ') + { + if (ext) + break; + while (dpos < 8 && dn[++dpos] == ' '); + if (username[upos] == '.') + upos++; + ext = 1; + } + else + { + if (tolower(dn[dpos]) == tolower(username[upos])) + { + dpos++; + upos++; + } + else + break; + } + } while (upos < unamelen && dpos < 11); + while (dpos < 11 && dn[dpos] == ' ') + dpos++; + return (upos == unamelen && dpos == 11); +} + +/* Implement the diskfs_lookup callback from the diskfs library. See + for the interface specification. */ +error_t +diskfs_lookup_hard (struct node *dp, const char *name, enum lookup_type type, + struct node **npp, struct dirstat *ds, struct protid *cred) +{ + error_t err; + ino_t inum; + int namelen; + int spec_dotdot; + struct node *np = 0; + int retry_dotdot = 0; + vm_prot_t prot = + (type == LOOKUP) ? VM_PROT_READ : (VM_PROT_READ | VM_PROT_WRITE); + memory_object_t memobj; + vm_address_t buf = 0; + vm_size_t buflen = 0; + int blockaddr; + int idx, lastidx; + int looped; + + if ((type == REMOVE) || (type == RENAME)) + assert (npp); + + if (npp) + *npp = 0; + + spec_dotdot = type & SPEC_DOTDOT; + type &= ~SPEC_DOTDOT; + + namelen = strlen (name); + + if (namelen > FAT_NAME_MAX) + return ENAMETOOLONG; + + try_again: + if (ds) + { + ds->type = LOOKUP; + ds->mapbuf = 0; + ds->mapextent = 0; + } + if (buf) + { + munmap ((caddr_t) buf, buflen); + buf = 0; + } + if (ds && (type == CREATE || type == RENAME)) + ds->stat = LOOKING; + + /* Map in the directory contents. */ + memobj = diskfs_get_filemap (dp, prot); + + if (memobj == MACH_PORT_NULL) + return errno; + + buf = 0; + /* We allow extra space in case we have to do an EXTEND. */ + buflen = round_page (dp->dn_stat.st_size + DIRBLKSIZ); + err = vm_map (mach_task_self (), + &buf, buflen, 0, 1, memobj, 0, 0, prot, prot, 0); + mach_port_deallocate (mach_task_self (), memobj); + + inum = 0; + + if (!diskfs_check_readonly ()) + dp->dn_set_atime = 1; + + /* Start the lookup at DP->dn->dir_idx. */ + idx = dp->dn->dir_idx; + if (idx << LOG2_DIRBLKSIZ > dp->dn_stat.st_size) + idx = 0; /* just in case */ + blockaddr = buf + (idx << LOG2_DIRBLKSIZ); + looped = (idx == 0); + lastidx = idx; + if (lastidx == 0) + lastidx = dp->dn_stat.st_size >> LOG2_DIRBLKSIZ; + + while (!looped || idx < lastidx) + { + err = dirscanblock (blockaddr, dp, idx, name, namelen, type, ds, &inum); + if (!err) + { + dp->dn->dir_idx = idx; + break; + } + if (err != ENOENT) + { + munmap ((caddr_t) buf, buflen); + return err; + } + + blockaddr += DIRBLKSIZ; + idx++; + if (blockaddr - buf >= dp->dn_stat.st_size && !looped) + { + /* We've gotten to the end; start back at the beginning. */ + looped = 1; + blockaddr = buf; + idx = 0; + } + } + + if (!diskfs_check_readonly ()) + dp->dn_set_atime = 1; + if (diskfs_synchronous) + diskfs_node_update (dp, 1); + + /* If err is set here, it's ENOENT, and we don't want to + think about that as an error yet. */ + err = 0; + + if (inum && npp) + { + if (namelen != 2 || name[0] != '.' || name[1] != '.') + { + if (inum == dp->cache_id) + { + np = dp; + diskfs_nref (np); + } + else + { + err = diskfs_cached_lookup_in_dirbuf (inum, &np, buf); + if (err) + goto out; + } + } + + /* We are looking up "..". */ + /* Check to see if this is the root of the filesystem. */ + else if (dp == diskfs_root_node) + { + err = EAGAIN; + goto out; + } + + /* We can't just do diskfs_cached_lookup, because we would then + deadlock. So we do this. Ick. */ + else if (retry_dotdot) + { + /* Check to see that we got the same answer as last time. */ + if (inum != retry_dotdot) + { + /* Drop what we *thought* was .. (but isn't any more) and + try *again*. */ + diskfs_nput (np); + mutex_unlock (&dp->lock); + err = diskfs_cached_lookup_in_dirbuf (inum, &np, buf); + mutex_lock (&dp->lock); + if (err) + goto out; + retry_dotdot = inum; + goto try_again; + } + /* Otherwise, we got it fine and np is already set properly. */ + } + else if (!spec_dotdot) + { + /* Lock them in the proper order, and then + repeat the directory scan to see if this is still + right. */ + mutex_unlock (&dp->lock); + err = diskfs_cached_lookup_in_dirbuf (inum, &np, buf); + mutex_lock (&dp->lock); + if (err) + goto out; + retry_dotdot = inum; + goto try_again; + } + + /* Here below are the spec dotdot cases. */ + else if (type == RENAME || type == REMOVE) + np = ifind (inum); + + else if (type == LOOKUP) + { + diskfs_nput (dp); + err = diskfs_cached_lookup_in_dirbuf (inum, &np, buf); + if (err) + goto out; + } + else + assert (0); + } + + if ((type == CREATE || type == RENAME) && !inum && ds && ds->stat == LOOKING) + { + /* We didn't find any room, so mark ds to extend the dir. */ + ds->type = CREATE; + ds->stat = EXTEND; + ds->idx = dp->dn_stat.st_size >> LOG2_DIRBLKSIZ; + } + + /* Return to the user; if we can't, release the reference + (and lock) we acquired above. */ + out: + /* Deallocate or save the mapping. */ + if ((err && err != ENOENT) + || !ds + || ds->type == LOOKUP) + { + munmap ((caddr_t) buf, buflen); + if (ds) + ds->type = LOOKUP; /* Set to be ignored by drop_dirstat. */ + } + else + { + ds->mapbuf = buf; + ds->mapextent = buflen; + } + + if (np) + { + assert (npp); + if (err) + { + if (!spec_dotdot) + { + /* Normal case. */ + if (np == dp) + diskfs_nrele (np); + else + diskfs_nput (np); + } + else if (type == RENAME || type == REMOVE) + /* We just did ifind to get np; that allocates + no new references, so we don't have anything to do. */ + ; + else if (type == LOOKUP) + /* We did diskfs_cached_lookup. */ + diskfs_nput (np); + } + else + *npp = np; + } + + return err ? : inum ? 0 : ENOENT; +} + +/* Scan block at address BLKADDR (of node DP; block index IDX), for + name NAME of length NAMELEN. Args TYPE, DS are as for + diskfs_lookup. If found, set *INUM to the inode number, else + return ENOENT. */ +static error_t +dirscanblock (vm_address_t blockaddr, struct node *dp, int idx, + const char *name, int namelen, enum lookup_type type, + struct dirstat *ds, ino_t *inum) +{ + int nfree = 0; + int needed = 0; + vm_address_t currentoff, prevoff = 0; + struct dirrect *entry = 0; + size_t nbytes = 0; + int looking = 0; + int countcopies = 0; + int consider_compress = 0; + inode_t inode; + vi_key_t entry_key = vi_zero_key; + + /* FAT lacks the "." and ".." directory record in the root directory, + so we emulate them here. */ + if (idx == 0 && dp == diskfs_root_node + && (fatnamematch (FAT_DIR_NAME_DOT, name, namelen) + || fatnamematch (FAT_DIR_NAME_DOTDOT, name, namelen))) + { + entry_key.dir_inode = diskfs_root_node->cache_id; + currentoff = blockaddr; + } + else + { + if (ds && (ds->stat == LOOKING + || ds->stat == COMPRESS)) + { + looking = 1; + countcopies = 1; + needed = FAT_DIR_RECORDS (namelen); + } + + for (currentoff = blockaddr, prevoff = 0; + currentoff < blockaddr + DIRBLKSIZ; + prevoff = currentoff, currentoff += FAT_DIR_REC_LEN) + { + entry = (struct dirrect *)currentoff; + + if (looking || countcopies) + { + int thisfree; + + /* Count how much free space this entry has in it. */ + if ((char) entry->name[0] == FAT_DIR_NAME_LAST || + (char) entry->name[0] == FAT_DIR_NAME_DELETED) + thisfree = FAT_DIR_REC_LEN; + else + thisfree = 0; + + /* If this isn't at the front of the block, then it will + have to be copied if we do a compression; count the + number of bytes there too. */ + if (countcopies && currentoff != blockaddr) + nbytes += FAT_DIR_REC_LEN; + + if (ds->stat == COMPRESS && nbytes > ds->nbytes) + /* The previously found compress is better than this + one, so don't bother counting any more. */ + countcopies = 0; + + if (thisfree >= needed) + { + ds->type = CREATE; + ds->stat = TAKE; + ds->entry = entry; + ds->idx = idx; + looking = countcopies = 0; + } + else + { + nfree += thisfree; + if (nfree >= needed) + consider_compress = 1; + } + } + + if (entry->attribute & FAT_DIR_ATTR_LABEL) + /* Either the volume label in root dir or a long filename + component. */ + continue; + + if (fatnamematch (entry->name, name, namelen)) + break; + } + + if (consider_compress + && (ds->type == LOOKING + || (ds->type == COMPRESS && ds->nbytes > nbytes))) + { + ds->type = CREATE; + ds->stat = COMPRESS; + ds->entry = (struct dirrect *) blockaddr; + ds->idx = idx; + ds->nbytes = nbytes; + } + } + + if (currentoff >= blockaddr + DIRBLKSIZ) + { + /* The name is not in this block. */ + + return ENOENT; + } + + /* We have found the required name. */ + + if (ds && type == CREATE) + ds->type = LOOKUP; /* It's invalid now. */ + else if (ds && (type == REMOVE || type == RENAME)) + { + ds->type = type; + ds->stat = HERE_TIS; + ds->entry = entry; + ds->idx = idx; + ds->preventry = (struct dirrect *) prevoff; + } + + if (entry_key.dir_inode) + { + /* The required name is "." or ".." in the root dir. */ + *inum = entry_key.dir_inode; + } + else if ((entry->attribute & FAT_DIR_ATTR_DIR) + && !memcmp (entry->name, FAT_DIR_NAME_DOT, 11)) + { + /* "." and ".." have to be treated special. We don't want their + directory records, but the records of the directories they + point to. */ + + *inum = dp->cache_id; + } + else if ((entry->attribute & FAT_DIR_ATTR_DIR) + && !memcmp (entry->name, FAT_DIR_NAME_DOTDOT, 11)) + { + if (entry->first_cluster_low[0] == 0 + && entry->first_cluster_low[1] == 0 + && entry->first_cluster_high[0] == 0 + && entry->first_cluster_high[1] == 0) + { + *inum = diskfs_root_node->cache_id; + } + else + { + struct vi_key vk = vi_key (dp->dn->inode); + *inum = vk.dir_inode; + } + } + else + { + entry_key.dir_inode = dp->cache_id; + entry_key.dir_offset = (currentoff - blockaddr) + (idx << LOG2_DIRBLKSIZ); + return vi_rlookup(entry_key, inum, &inode, 1); + } + return 0; +} + +/* Following a lookup call for CREATE, this adds a node to a + directory. DP is the directory to be modified; NAME is the name to + be entered; NP is the node being linked in; DS is the cached + information returned by lookup; CRED describes the user making the + call. This call may only be made if the directory has been held + locked continuously since the preceding lookup call, and only if + that call returned ENOENT. */ +error_t +diskfs_direnter_hard (struct node *dp, const char *name, struct node *np, + struct dirstat *ds, struct protid *cred) +{ + struct dirrect *new; + int namelen = strlen (name); + int needed = FAT_DIR_RECORDS (namelen); + error_t err; + loff_t oldsize = 0; + + assert (ds->type == CREATE); + + assert (!diskfs_readonly); + + dp->dn_set_mtime = 1; + + /* Select a location for the new directory entry. Each branch of + this switch is responsible for setting NEW to point to the + on-disk directory entry being written. */ + + switch (ds->stat) + { + case TAKE: + /* We are supposed to consume this slot. */ + assert ((char)ds->entry->name[0] == FAT_DIR_NAME_LAST + || (char)ds->entry->name[0] == FAT_DIR_NAME_DELETED); + + new = ds->entry; + break; + + case EXTEND: + /* Extend the file. */ + assert (needed <= bytes_per_cluster); + + oldsize = dp->dn_stat.st_size; + while (oldsize + bytes_per_cluster > dp->allocsize) + { + err = diskfs_grow (dp, oldsize + bytes_per_cluster, cred); + if (err) + { + munmap ((caddr_t) ds->mapbuf, ds->mapextent); + return err; + } + } + + new = (struct dirrect *) ((char *) ds->mapbuf + oldsize); + + dp->dn_stat.st_size = oldsize + bytes_per_cluster; + dp->dn_set_ctime = 1; + + break; + + case SHRINK: + case COMPRESS: + default: + assert(0); + + /* COMPRESS will be used later, with long filenames, but shrink + does not make sense on fat, as all entries have fixed + size. */ + } + + /* NEW points to the directory entry being written. Now fill in the + data. */ + + memcpy (new->name, " ", 11); + memcpy (new->name, name, namelen % 11); /* XXX */ + + /* XXX We need to do much, much more here. */ + /* XXX What about creating . and .. for dirs? */ + + /* Mark the directory inode has having been written. */ + dp->dn_set_mtime = 1; + + munmap ((caddr_t) ds->mapbuf, ds->mapextent); + + diskfs_file_update (dp, 1); + + return 0; +} + +/* Following a lookup call for REMOVE, this removes the link from the + directory. DP is the directory being changed and DS is the cached + information returned from lookup. This call is only valid if the + directory has been locked continously since the call to lookup, and + only if that call succeeded. */ +error_t +diskfs_dirremove_hard (struct node *dp, struct dirstat *ds) +{ + assert (ds->type == REMOVE); + assert (ds->stat == HERE_TIS); + + assert (!diskfs_readonly); + + dp->dn_set_mtime = 1; + + ds->entry->name[0] = FAT_DIR_NAME_DELETED; + + /* XXX Do something with dirrect? inode? */ + + dp->dn_set_mtime = 1; + + munmap ((caddr_t) ds->mapbuf, ds->mapextent); + + diskfs_file_update (dp, 1); + + return 0; +} + +/* Following a lookup call for RENAME, this changes the inode number + on a directory entry. DP is the directory being changed; NP is the + new node being linked in; DP is the cached information returned by + lookup. This call is only valid if the directory has been locked + continuously since the call to lookup, and only if that call + succeeded. */ +error_t +diskfs_dirrewrite_hard (struct node *dp, struct node *np, struct dirstat *ds) +{ + assert (ds->type == RENAME); + assert (ds->stat == HERE_TIS); + + assert (!diskfs_readonly); + + /* XXX We have to reimplement rename completely. */ + /* + ds->entry->inode = np->cache_id; + */ + dp->dn_set_mtime = 1; + + munmap ((caddr_t) ds->mapbuf, ds->mapextent); + + diskfs_file_update (dp, 1); + + return 0; +} + +/* Tell if DP is an empty directory (has only "." and ".." entries). + This routine must be called from inside a catch_exception (). */ +int +diskfs_dirempty (struct node *dp, struct protid *cred) +{ + error_t err; + vm_address_t buf = 0, curoff; + struct dirrect *entry; + int hit = 0; /* Found something in the directory. */ + memory_object_t memobj = diskfs_get_filemap (dp, VM_PROT_READ); + + if (memobj == MACH_PORT_NULL) + /* XXX should reflect error properly. */ + return 0; + + err = vm_map (mach_task_self (), &buf, dp->dn_stat.st_size, 0, + 1, memobj, 0, 0, VM_PROT_READ, VM_PROT_READ, 0); + mach_port_deallocate (mach_task_self (), memobj); + assert (!err); + + if (! diskfs_check_readonly ()) + dp->dn_set_atime = 1; + + for (curoff = buf; + !hit && curoff < buf + dp->dn_stat.st_size; + curoff += FAT_DIR_REC_LEN) + { + entry = (struct dirrect *) curoff; + + if (entry->name[0] == FAT_DIR_NAME_LAST) + break; + if (!entry->name[0] == FAT_DIR_NAME_DELETED + && memcmp (entry->name, FAT_DIR_NAME_DOT, 11) + && memcmp (entry->name, FAT_DIR_NAME_DOTDOT, 11)) + hit = 1; + } + + if (! diskfs_check_readonly ()) + dp->dn_set_atime = 1; + if (diskfs_synchronous) + diskfs_node_update (dp, 1); + + munmap ((caddr_t) buf, dp->dn_stat.st_size); + + return !hit; +} + +/* Make DS an invalid dirstat. */ +error_t +diskfs_drop_dirstat (struct node *dp, struct dirstat *ds) +{ + if (ds->type != LOOKUP) + { + assert (ds->mapbuf); + munmap ((caddr_t) ds->mapbuf, ds->mapextent); + ds->type = LOOKUP; + } + return 0; +} + + +/* Implement the diskfs_get_directs callback as described in + . */ +error_t +diskfs_get_directs (struct node *dp, + int entry, + int nentries, + char **data, + u_int *datacnt, + vm_size_t bufsiz, + int *amt) +{ + volatile vm_size_t allocsize; + struct dirrect *ep; + struct dirent *userp; + int i; + char *datap; + volatile int ouralloc = 0; + error_t err; + vm_prot_t prot = VM_PROT_READ; + memory_object_t memobj; + vm_address_t buf = 0, bufp; + vm_size_t buflen = 0; + + /* Allocate some space to hold the returned data. */ + allocsize = bufsiz ? round_page (bufsiz) : vm_page_size * 4; + if (allocsize > *datacnt) + { + *data = mmap (0, allocsize, PROT_READ|PROT_WRITE, MAP_ANON, 0, 0); + ouralloc = 1; + } + + /* Map in the directory contents. */ + memobj = diskfs_get_filemap (dp, prot); + + if (memobj == MACH_PORT_NULL) + return errno; + + /* We allow extra space in case we have to do an EXTEND. */ + buflen = round_page (dp->dn_stat.st_size); + err = vm_map (mach_task_self (), + &buf, buflen, 0, 1, memobj, 0, 0, prot, prot, 0); + mach_port_deallocate (mach_task_self (), memobj); + + bufp = buf; + for (i = 0; i < entry; i ++) + { + /* The root directory in FAT file systems doesn't contain + entries for DOT and DOTDOT, they are special cased below. */ + if (dp == diskfs_root_node && i < 2) + continue; + + ep = (struct dirrect *) bufp; + + if (bufp >= buf + buflen || (char)ep->name[0] == FAT_DIR_NAME_LAST) + { + /* Not that many entries in the directory; return nothing. */ + if (allocsize > *datacnt) + munmap (data, allocsize); + munmap ((caddr_t) buf, buflen); + *datacnt = 0; + *amt = 0; + return 0; + } + + /* Ignore and skip deleted and label entries (catches also long + filenames). */ + if ((char)ep->name[0] == FAT_DIR_NAME_DELETED + || (ep->attribute & FAT_DIR_ATTR_LABEL)) + i--; + bufp = bufp + FAT_DIR_REC_LEN; + } + + /* Now copy entries one at a time. */ + i = 0; + datap = *data; + while (((nentries == -1) || (i < nentries)) + && (!bufsiz || datap - *data < bufsiz) + && bufp < buf + buflen) + { + char name[13]; + size_t namlen, reclen; + struct dirrect dot = { FAT_DIR_NAME_DOT, FAT_DIR_ATTR_DIR }; + struct dirrect dotdot = { FAT_DIR_NAME_DOTDOT, FAT_DIR_ATTR_DIR }; + + /* The root directory in FAT file systems doesn't contain + entries for DOT and DOTDOT, they are special cased below. */ + if (dp == diskfs_root_node && i < 2) + { + if (i == 0) + ep = ˙ + else + ep = &dotdot; + } + else + ep = (struct dirrect *) bufp; + + if ((char)ep->name[0] == FAT_DIR_NAME_LAST) + { + /* Last entry. */ + bufp = buf + buflen; + continue; + } + + if ((char)ep->name[0] == FAT_DIR_NAME_DELETED || (ep->attribute & FAT_DIR_ATTR_LABEL)) + { + bufp = bufp + FAT_DIR_REC_LEN; + continue; + } + + /* See if there's room to hold this one. */ + + fat_to_unix_filename(ep->name, name); + namlen = strlen(name); + + /* Perhaps downcase it? */ + + reclen = sizeof (struct dirent) + namlen; + reclen = (reclen + 3) & ~3; + + /* Expand buffer if necessary. */ + if (datap - *data + reclen > allocsize) + { + vm_address_t newdata; + + vm_allocate (mach_task_self (), &newdata, + (ouralloc + ? (allocsize *= 2) + : (allocsize = vm_page_size * 2)), 1); + memcpy ((void *) newdata, (void *) *data, datap - *data); + + if (ouralloc) + munmap (*data, allocsize / 2); + + datap = (char *) newdata + (datap - *data); + *data = (char *) newdata; + ouralloc = 1; + } + + userp = (struct dirent *) datap; + + /* Fill in entry. */ + { + ino_t inode; + inode_t v_inode; + vi_key_t entry_key; + + entry_key.dir_inode = dp->cache_id; + entry_key.dir_offset = bufp - buf; + + vi_rlookup (entry_key, &inode, &v_inode, 1); + userp->d_fileno = inode; + } + userp->d_type = DT_UNKNOWN; + userp->d_reclen = reclen; + userp->d_namlen = namlen; + memcpy (userp->d_name, name, namlen); + userp->d_name[namlen] = '\0'; + + /* And move along. */ + datap = datap + reclen; + if (!(dp == diskfs_root_node && i < 2)) + bufp = bufp + FAT_DIR_REC_LEN; + i++; + } + + /* If we didn't use all the pages of a buffer we allocated, free + the excess. */ + if (ouralloc + && round_page (datap - *data) < round_page (allocsize)) + munmap ((caddr_t) round_page (datap), + round_page (allocsize) - round_page (datap - *data)); + + munmap ((caddr_t) buf, buflen); + + /* Return. */ + *amt = i; + *datacnt = datap - *data; + return 0; +} diff --git a/fatfs/fat.c b/fatfs/fat.c new file mode 100644 index 00000000..4d3ba3da --- /dev/null +++ b/fatfs/fat.c @@ -0,0 +1,744 @@ +/* fat.c - Support for FAT filesystems. + Copyright (C) 2002 Free Software Foundation, Inc. + Written by Marcus Brinkmann. + + This file is part of the GNU Hurd. + + The GNU Hurd is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + The GNU Hurd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "fatfs.h" + +/* Unprocessed superblock. */ +struct boot_sector *sblock; + +/* Processed sblock info. */ +fat_t fat_type; +size_t bytes_per_sector; +size_t log2_bytes_per_sector; +size_t sectors_per_cluster; +size_t bytes_per_cluster; +unsigned int log2_bytes_per_cluster; +size_t sectors_per_fat; +size_t total_sectors; +size_t nr_of_root_dir_sectors; +size_t first_root_dir_byte; +size_t first_data_sector; +vm_offset_t first_data_byte; +size_t first_fat_sector; +cluster_t nr_of_clusters; + +/* Hold this lock while converting times using gmtime. */ +spin_lock_t epoch_to_time_lock = SPIN_LOCK_INITIALIZER; + +/* Hold this lock while allocating a new cluster in the FAT. */ +spin_lock_t allocate_free_cluster_lock = SPIN_LOCK_INITIALIZER; + +/* Where to look for the next free cluster. This is meant to avoid + searching through a nearly full file system from the beginning at + every request. It would be better to use the field of the same + name in the fs_info block. 2 is the first data cluster in any + FAT. */ +cluster_t next_free_cluster = 2; + + +/* Read the superblock. */ +void +fat_read_sblock (void) +{ + int read; + + sblock = malloc (sizeof (struct boot_sector)); + store_read (store, 0, sizeof (struct boot_sector), (void **) &sblock, &read); + + if (read_word(sblock->id) != BOOT_SECTOR_ID) + error (1, 0, "Could not find valid superblock"); + + /* Parse some important bits of the superblock. */ + + bytes_per_sector = read_word (sblock->bytes_per_sector); + switch (bytes_per_sector) + { + case 512: + log2_bytes_per_sector = 9; + break; + + case 1024: + log2_bytes_per_sector = 10; + break; + + case 2048: + log2_bytes_per_sector = 11; + break; + + case 4096: + log2_bytes_per_sector = 12; + break; + + default: + error (1, 0, "Invalid number of bytes per sector"); + }; + + sectors_per_cluster = sblock->sectors_per_cluster; + if (sectors_per_cluster != 1 && sectors_per_cluster != 2 + && sectors_per_cluster != 4 && sectors_per_cluster != 8 + && sectors_per_cluster != 16 && sectors_per_cluster != 32 + && sectors_per_cluster != 64 && sectors_per_cluster != 128) + error (1, 0, "Invalid number of sectors per cluster"); + + bytes_per_cluster = sectors_per_cluster << log2_bytes_per_sector; + switch (bytes_per_cluster) + { + case 512: + log2_bytes_per_cluster = 9; + break; + + case 1024: + log2_bytes_per_cluster = 10; + break; + + case 2048: + log2_bytes_per_cluster = 11; + break; + + case 4096: + log2_bytes_per_cluster = 12; + break; + + case 8192: + log2_bytes_per_cluster = 13; + break; + + case 16384: + log2_bytes_per_cluster = 14; + break; + + case 32768: + log2_bytes_per_cluster = 15; + break; + + default: + error (1, 0, "Invalid number of bytes per cluster"); + }; + + total_sectors = read_word (sblock->total_sectors_16) + ?: read_word (sblock->total_sectors_32); + if (total_sectors * bytes_per_sector > store->size) + error (1, 0, "Store is smaller then implied by metadata"); + if (total_sectors == 0) + error (1, 0, "Number of total sectors is zero"); + + if (bytes_per_sector & (store->block_size - 1)) + error (1, 0, "Block size of filesystem is not a multiple of the block size " + "of the store"); + + if (read_word (sblock->reserved_sectors) == 0) + error (1, 0, "Number of reserved sectors is zero"); + if (sblock->nr_of_fat_tables == 0) + error (1, 0, "Number of FATs is zero"); + + sectors_per_fat = read_word (sblock->sectors_per_fat_16) + ?: read_word (sblock->compat.fat32.sectors_per_fat_32); + if (sectors_per_fat == 0) + error (1, 0, "Number of sectors per fat is zero"); + + nr_of_root_dir_sectors = ((read_word (sblock->nr_of_root_dirents) * FAT_DIR_REC_LEN) + - 1) / bytes_per_sector + 1; + if (nr_of_root_dir_sectors & (sectors_per_cluster - 1)) + error (1, 0, "Number of root dir sectors is not a multiple of sectors_per_cluster"); + + first_root_dir_byte = (read_word (sblock->reserved_sectors) + + (sblock->nr_of_fat_tables * sectors_per_fat)) << log2_bytes_per_sector; + first_data_sector = (first_root_dir_byte >> log2_bytes_per_sector) + nr_of_root_dir_sectors; + first_data_byte = first_data_sector << log2_bytes_per_sector; + + nr_of_clusters = (total_sectors - first_data_sector) / sectors_per_cluster; + + if (nr_of_clusters < FAT12_MAX_NR_OF_CLUSTERS) + fat_type = FAT12; + else + { + if (nr_of_clusters < FAT16_MAX_NR_OF_CLUSTERS) + fat_type = FAT16; + else + fat_type = FAT32; + } + + if (fat_type == FAT32 && read_word (sblock->compat.fat32.fs_version) != 0) + error (1, 0, "Incompatible file system version"); + + first_fat_sector = 0; + if (fat_type == FAT32 && read_word (sblock->compat.fat32.extension_flags) & 1<<7) + { + first_fat_sector = (read_word (sblock->compat.fat32.extension_flags) & 0x0f); + if (first_fat_sector > sblock->nr_of_fat_tables) + error (1, 0, "Active FAT table does not exist"); + first_fat_sector *= sectors_per_fat; + } + first_fat_sector += read_word (sblock->reserved_sectors); +} + + +/* Write NEXT_CLUSTER in the FAT at position CLUSTER. + You must call this from inside diskfs_catch_exception. + Returns 0 (always succeeds). */ +error_t +fat_write_next_cluster(cluster_t cluster, cluster_t next_cluster) +{ + loff_t fat_entry_offset; + cluster_t data; + + /* First data cluster is cluster 2. */ + assert (cluster >= 2 && cluster < nr_of_clusters + 2); + + switch (fat_type) + { + case FAT12: + if (next_cluster == FAT_BAD_CLUSTER) + next_cluster = FAT12_BAD_CLUSTER; + else if (next_cluster == FAT_EOC) + next_cluster = FAT12_EOC; + + fat_entry_offset = (cluster * 3) / 2; + data = read_word (fat_image + fat_entry_offset); + if (cluster & 1) + data = (data & 0xf) | ((next_cluster & 0xfff) << 4); + else + data = (data & 0xf000) | (next_cluster & 0xfff); + + write_word (fat_image + fat_entry_offset, data); + break; + + case FAT16: + if (next_cluster == FAT_BAD_CLUSTER) + next_cluster = FAT16_BAD_CLUSTER; + else if (next_cluster == FAT_EOC) + next_cluster = FAT16_EOC; + + fat_entry_offset = cluster * 2; + write_word (fat_image + fat_entry_offset, next_cluster); + break; + + case FAT32: + default: /* To silence gcc warning. */ + if (next_cluster == FAT_BAD_CLUSTER) + next_cluster = FAT32_BAD_CLUSTER; + else if (next_cluster == FAT_EOC) + next_cluster = FAT32_EOC; + + fat_entry_offset = cluster * 4; + write_dword (fat_image + fat_entry_offset, next_cluster & 0x0fffffff); + } + + return 0; +} + +/* Read the FAT entry at position CLUSTER into NEXT_CLUSTER. + You must call this from inside diskfs_catch_exception. + Returns 0 (always succeeds). */ +error_t +fat_get_next_cluster(cluster_t cluster, cluster_t *next_cluster) +{ + loff_t fat_entry_offset; + + /* First data cluster is cluster 2. */ + assert (cluster >= 2 && cluster < nr_of_clusters + 2); + + switch (fat_type) + { + case FAT12: + fat_entry_offset = (cluster * 3) / 2; + *next_cluster = read_word (fat_image + fat_entry_offset); + if (cluster & 1) + *next_cluster = *next_cluster >> 4; + else + *next_cluster &= 0xfff; + + if (*next_cluster == FAT12_BAD_CLUSTER) + *next_cluster = FAT_BAD_CLUSTER; + else if (*next_cluster >= FAT12_EOC) + *next_cluster = FAT_EOC; + break; + + case FAT16: + fat_entry_offset = cluster * 2; + *next_cluster = read_word (fat_image + fat_entry_offset); + if (*next_cluster == FAT16_BAD_CLUSTER) + *next_cluster = FAT_BAD_CLUSTER; + else if (*next_cluster >= FAT16_EOC) + *next_cluster = FAT_EOC; + break; + + case FAT32: + default: /* To silence gcc warning. */ + fat_entry_offset = cluster * 4; + *next_cluster = read_dword (fat_image + fat_entry_offset); + *next_cluster &= 0x0fffffff; + if (*next_cluster == FAT32_BAD_CLUSTER) + *next_cluster = FAT_BAD_CLUSTER; + else if (*next_cluster >= FAT32_EOC) + *next_cluster = FAT_EOC; + } + + return 0; +} + +/* Allocate a new cluster, write CONTENT into the FAT at this new + clusters position. At success, 0 is returned and CLUSTER contains + the cluster number allocated. Otherwise, ENOSPC is returned if the + filesystem is full. + You must call this from inside diskfs_catch_exception. */ +error_t +fat_allocate_cluster (cluster_t content, cluster_t *cluster) +{ + error_t err = 0; + cluster_t old_next_free_cluster; + int wrapped = 0; + cluster_t found_cluster = FAT_FREE_CLUSTER; + + assert (content != FAT_FREE_CLUSTER); + + spin_lock (&allocate_free_cluster_lock); + old_next_free_cluster = next_free_cluster; + + /* Loop over all clusters, starting from next_free_cluster and + wrapping if reaching the end of the FAT, until we either find an + unallocated cluster, or we have to give up because all clusters + are allocated. */ + do + { + cluster_t next_free_content; + + fat_get_next_cluster (next_free_cluster, &next_free_content); + + if (next_free_content == FAT_FREE_CLUSTER) + found_cluster = next_free_cluster; + + if (++next_free_cluster == nr_of_clusters + 2) + { + next_free_cluster = 2; + wrapped = 1; + } + } + while (found_cluster == FAT_FREE_CLUSTER + && !(wrapped && next_free_cluster == old_next_free_cluster)); + + if (found_cluster != FAT_FREE_CLUSTER) + { + *cluster = found_cluster; + fat_write_next_cluster(found_cluster, content); + } + else + err = ENOSPC; + + spin_unlock(&allocate_free_cluster_lock); + return err; +} + +/* Extend the cluster chain to maximum size or new_last_cluster, + whatever is less. If we reach the end of the file, and CREATE is + true, allocate new blocks until there is either no space on the + device or new_last_cluster are allocated. (new_last_cluster: 0 is + the first cluster of the file). */ +error_t +fat_extend_chain (struct node *node, cluster_t new_last_cluster, int create) +{ + error_t err = 0; + struct disknode *dn = node->dn; + struct cluster_chain *table; + int offs; + cluster_t left, prev_cluster, cluster; + + error_t allocate_new_table(struct cluster_chain **table) + { + struct cluster_chain *t; + + t = *table; + *table = malloc (sizeof (struct cluster_chain)); + if (!*table) + return ENOMEM; + (*table)->next = 0; + if (t) + dn->last = t->next = *table; + else + dn->last = dn->first = *table; + return 0; + } + + spin_lock(&dn->chain_extension_lock); + + /* If we already have what we need, or we have all clusters that are + available without allocating new ones, go out. */ + if (new_last_cluster < dn->length_of_chain + || (!create && dn->chain_complete)) + return 0; + + left = new_last_cluster + 1 - dn->length_of_chain; + + table = dn->last; + if (table) + { + offs = (dn->length_of_chain - 1) & (CLUSTERS_PER_TABLE - 1); + prev_cluster = table->cluster[offs]; + } + else + { + offs = CLUSTERS_PER_TABLE - 1; + prev_cluster = FAT_FREE_CLUSTER; + } + + while (left) + { + if (dn->chain_complete) + { + err = fat_allocate_cluster(FAT_EOC, &cluster); + if (err) + break; + if (prev_cluster) + fat_write_next_cluster(prev_cluster, cluster); + else + /* XXX: Also write this to dirent structure! */ + dn->start_cluster = cluster; + } + else + { + if (prev_cluster != FAT_FREE_CLUSTER) + err = fat_get_next_cluster(prev_cluster, &cluster); + else + cluster = dn->start_cluster; + if (cluster == FAT_EOC || cluster == FAT_FREE_CLUSTER) + { + dn->chain_complete = 1; + if (create) + continue; + else + break; + } + } + prev_cluster = cluster; + offs++; + if (offs == CLUSTERS_PER_TABLE) + { + offs = 0; + err = allocate_new_table(&table); + if (err) + break; + } + table->cluster[offs] = cluster; + dn->length_of_chain++; + left--; + } + + if (dn->length_of_chain << log2_bytes_per_cluster > node->allocsize) + node->allocsize = dn->length_of_chain << log2_bytes_per_cluster; + + spin_unlock(&dn->chain_extension_lock); + return err; +} + +/* Returns in DISK_CLUSTER the disk cluster corresponding to cluster + CLUSTER in NODE. If there is no such cluster yet, but CREATE is + true, then it is created, otherwise EINVAL is returned. */ +error_t +fat_getcluster (struct node *node, cluster_t cluster, int create, + cluster_t *disk_cluster) +{ + error_t err = 0; + cluster_t chains_to_go = cluster >> LOG2_CLUSTERS_PER_TABLE; + cluster_t offs = cluster & (CLUSTERS_PER_TABLE - 1); + struct cluster_chain *chain; + + if (cluster >= node->dn->length_of_chain) + { + err = fat_extend_chain (node, cluster, create); + if (err) + return err; + if (cluster >= node->dn->length_of_chain) + { + assert (!create); + return EINVAL; + } + } + chain = node->dn->first; + while (chains_to_go--) + { + assert (chain); + chain = chain->next; + } + assert (chain); + *disk_cluster = chain->cluster[offs]; + return 0; +} + +void +fat_truncate_node (struct node *node, cluster_t clusters_to_keep) +{ + struct cluster_chain *next; + cluster_t count; + cluster_t offs; + cluster_t pos; + + /* The root dir of a FAT12/16 fs is of fixed size, while the root + dir of a FAT32 fs must never decease to exist. */ + assert (! (((fat_type == FAT12 || fat_type == FAT16) && node == diskfs_root_node) + || (fat_type == FAT32 && node == diskfs_root_node && clusters_to_keep == 0))); + + /* Expand the cluster chain, because we have to know the complete tail. */ + fat_extend_chain (node, FAT_EOC, 0); + if (clusters_to_keep == node->dn->length_of_chain) + return; + assert (clusters_to_keep < node->dn->length_of_chain); + + /* Truncation happens here. */ + next = node->dn->first; + if (clusters_to_keep == 0) + { + /* Deallocate the complete file. */ + node->dn->start_cluster = 0; + pos = count = offs = 0; + } + else + { + count = (clusters_to_keep - 1) >> LOG2_CLUSTERS_PER_TABLE; + offs = (clusters_to_keep - 1) & (CLUSTERS_PER_TABLE - 1); + while (count-- > 0) + { + assert (next); + next = next->next; + } + assert (next); + fat_write_next_cluster (next->cluster[offs++], FAT_EOC); + pos = clusters_to_keep; + } + + /* Purge dangling clusters. If we die here, scandisk will have to + clean up the remains. */ + while (pos < node->dn->length_of_chain) + { + if (offs == CLUSTERS_PER_TABLE) + { + offs = 0; + next = next->next; + assert(next); + } + fat_write_next_cluster(next->cluster[offs++], 0); + pos++; + } + + /* Free now unused tables. (Could be done in one run with the above.) */ + next = node->dn->first; + if (clusters_to_keep != 0) + { + count = (clusters_to_keep - 1) >> LOG2_CLUSTERS_PER_TABLE; + offs = (clusters_to_keep - 1) & (CLUSTERS_PER_TABLE - 1); + while (count-- > 0) + { + assert (next); + next = next->next; + } + assert (next); + next = next->next; + } + while (next) + { + struct cluster_chain *next_next = next->next; + free (next); + next = next_next; + } +} + + +/* Count the number of free clusters in the FAT. */ +int +fat_get_freespace (void) +{ + int free_clusters = 0; + cluster_t curr_cluster; + cluster_t next_cluster; + error_t err; + + err = diskfs_catch_exception (); + if (!err) + { + /* First cluster is the 3rd entry in the FAT table. */ + for (curr_cluster = 2; curr_cluster < nr_of_clusters + 2; + curr_cluster++) + { + fat_get_next_cluster (curr_cluster, &next_cluster); + if (next_cluster == FAT_FREE_CLUSTER) + free_clusters++; + } + } + diskfs_end_catch_exception (); + + return free_clusters; +} + + +/* FILE must be a buffer with 13 characters. */ +void fat_to_unix_filename(const char *name, char *file) +{ + int npos; + int fpos = 0; + int ext = 0; + + for (npos = 0; npos < 11; npos++) + { + if (name[npos] == ' ') + { + if (ext) + { + break; + } + else + { + file[fpos] = '.'; + fpos++; + ext = 1; + while (npos < 7 && name[npos+1] == ' ') npos++; + } + } + else + { + file[fpos] = name[npos]; + fpos++; + if (npos == 7) + { + file[fpos] = '.'; + fpos++; + ext = 1; + } + } + } + if (ext && file[fpos-1] == '.') + file[fpos-1] = '\0'; + else + file[fpos] = '\0'; +} + +void +fat_from_unix_filename(char *fn, const char *un, int ul) +{ + int fp = 0; + int up = 0; + int ext = 0; + + while (fp < 11) + { + if (up == ul) + { + /* We parsed the complete unix filename. */ + while (fp < 11) + fn[fp++] = ' '; + } + else + { + if (!ext) + { + if (un[up] == '.') + { + while (fp < 8) + fn[fp++] = ' '; + ext = 1; + un++; + } + else if (fp == 8) + { + while (un[up++] != '.' && up < ul); + ext = 1; + } + else + fn[fp++] = toupper(un[ul++]); + } + else + { + if (un[up] == '.') + { + while (fp < 11) + fn[fp++] = ' '; + } + else + fn[fp++] = toupper(un[up++]); + } + } + } +} + + +/* Return Epoch-based time from a MSDOS time/date pair. */ +void +fat_to_epoch (char *date, char *time, struct timespec *ts) +{ + struct tm tm; + + /* Date format: + Bits 0-4: Day of month (1-31). + Bits 5-8: Month of year (1-12). + Bits 9-15: Count of years from 1980 (0-127). + + Time format: + Bits 0-4: 2-second count (0-29). + Bits 5-10: Minutes (0-59). + Bits 11-15: Hours (0-23). + */ + + tm.tm_year = (read_word (date) >> 9) + 80; + tm.tm_mon = ((read_word (date) & 0x1ff) >> 5) - 1; + tm.tm_mday = read_word (date) & 0x1f; + tm.tm_hour = (read_word (time) >> 11); + tm.tm_min = (read_word (time) & 0x7ff) >> 5; + tm.tm_sec = read_word (time) & 0x1f; + tm.tm_isdst = 0; + + ts->tv_sec = timegm (&tm); + ts->tv_nsec = 0; +} + +/* Return MSDOS time/date pair from Epoch-based time. */ +void +fat_from_epoch (char *date, char *time, time_t *tp) +{ + struct tm *tm; + + spin_lock(&epoch_to_time_lock); + tm = gmtime (tp); + + /* Date format: + Bits 0-4: Day of month (1-31). + Bits 5-8: Month of year (1-12). + Bits 9-15: Count of years from 1980 (0-127). + + Time format: + Bits 0-4: 2-second count (0-29). + Bits 5-10: Minutes (0-59). + Bits 11-15: Hours (0-23). + */ + + write_word(date, tm->tm_mday | ((tm->tm_mon + 1) << 5) + | ((tm->tm_year - 80) << 9)); + write_word(time, (tm->tm_hour << 11) | (tm->tm_min << 5) + | (tm->tm_sec >> 1)); + spin_unlock(&epoch_to_time_lock); +} diff --git a/fatfs/fat.h b/fatfs/fat.h new file mode 100644 index 00000000..91e5a5cb --- /dev/null +++ b/fatfs/fat.h @@ -0,0 +1,403 @@ +/* fat.h - Support for FAT filesystems interfaces. + Copyright (C) 2002 Free Software Foundation, Inc. + Written by Marcus Brinkmann. + + This file is part of the GNU Hurd. + + The GNU Hurd is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + The GNU Hurd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. */ + +#ifndef FAT_H +#define FAT_H + +/* Specification of the FAT12/16/32 filesystem format. */ + +/* Overview + -------- + + Any FAT fs consists of several regions, which follow immediately + after each other. + + Reserved + + The reserved region consists of the boot sector, and with it the + BIOS Paramter Block, which contains all necessary data about the + filesystem like sector size, number of clusters etc. It also + holds the filesystem info block. + + The reserved region of FAT32 filesystems also hold a backup copy + of the root sector at sector 6 (usually), followed by a backup + copy of the filesystem info sector. + + The number of sectors occupied by the reserved region is stored + in the reserved region as well, in the word at offset 14 + (reserved_sectors). + + FAT + + The FAT region contains the File Allocation Table, which is a + linked list of clusters occupied by each file or directory. + There might be multiple FAT tables in the FAT region, for + redundancy. + + The number of FATs is stored in the reserved region, in the byte + at offset 16 (nr_of_fat_tables). The number of sectors per FAT is + stored in the word at offset 22 (sectors_per_fat_16) or, if this + is zero (as it is for FAT32), in the doubleword at offset 36 + (sectors_per_fat_32). + + Root Directory + + In FAT12/16, the root directory entries allocate their own region + and are not accessed through the FAT. + + The size of this region is determined by the word at offset 17 + (nr_of_root_dirents). You have to multiply this with the nr of + bytes per entry, and divide through the number of bytes per + sector, rounding up. On FAT32 filesystems, this region does not + exist, and nr_of_root_dirents is zero. The FAT32 root directory + is accessed through the FAT as any other directory is. + + Data + + The data region occupies the rest of the filesystem and stores + the actual file and directory data. It is seperated in clusters, + which are indexed in the FAT. + + The size of the data region is stored in the word at offset 19 + (total_sectors_16) or, if this is zero, in the doubleword at + offset 32 (total_sectors_32). + + + NOTE that all meta data in a FAT filesystem is stored in little endian + format. + +*/ + +/* The supported FAT types. */ + +enum fat { FAT12, FAT16, FAT32 }; +typedef enum fat fat_t; + +/* The FAT type is determined by the number of clusters in the data + region, and nothing else. The maximal number of clusters for a + FAT12 and FAT16 respectively is defined here. +*/ + +#define FAT12_MAX_NR_OF_CLUSTERS 4084 +#define FAT16_MAX_NR_OF_CLUSTERS 65524 +#define FAT32_MAX_NR_OF_CLUSTERS (FAT32_BAD_CLUSTER - 1) + +struct boot_sector +{ + /* Unused. */ + unsigned char jump_to_boot_code[3]; /* 0, typ. 0xeb 0x?? 0x90 */ + unsigned char oem_name[8]; /* 3, typ. "MSWIN4.1" */ + + /* Sector and Cluster size. + bytes_per_sector is usually 512, but 1024, 2048, 4096 are also allowed. + sectors_per_cluster is one of 1, 2, 4, 8, 16, 32, 64, 128. + Note that bytes per cluster (product of the two) must be <= 32768. */ + unsigned char bytes_per_sector[2]; /* 11 */ + unsigned char sectors_per_cluster; /* 13 */ + + /* Size of the various regions. + reserved_sectors must not be zero and is typically 1 on FAT12/16 + filesystems and 32 on FAT32 filesystems. + nr_of_fat_tables must not be zero and is typically 2. + nr_of_root_dirents must be zero on FAT32 filesystems. + For FAT12/16, the value multiplied with DIR_ENTRY_SIZE (32) + should always be a multiple of bytes_per_sector to retain + compatibility. For FAT16, 512 should be used. + total_sectors_16 contains the complete number of sectors if not zero. + If zero, the number of sectors is stored in total_sectors_32. */ + unsigned char reserved_sectors[2]; /* 14 */ + unsigned char nr_of_fat_tables; /* 16 */ + unsigned char nr_of_root_dirents[2]; /* 17 */ + unsigned char total_sectors_16[2]; /* 19 */ + + /* Media descriptor. + Allowed are values between 0xf0 and 0xff. + 0xf8 is a fixed hardware (disk), 0xf0 denotes a removable media. + Must be the same as the first byte in the FAT (compatibility + with DOS 1.x). */ + unsigned char media_descriptor; /* 21 */ + + /* Size of one FAT. + On FAT32 systems, this value must be zero and sectors_per_fat_32 + used instead. */ + unsigned char sectors_per_fat_16[2]; /* 22 */ + + /* Disk geometry. Unused. */ + unsigned char sectors_per_track[2]; /* 24 */ + unsigned char nr_of_heads[2]; /* 26 */ + unsigned char nr_of_hidden_sectors[4]; /* 28 */ + + /* See total_sectors_16. */ + unsigned char total_sectors_32[4]; /* 32 */ + + /* FAT specific information. + Starting with offset 36, FAT12/16 filesystems differ from FAT32 + filesystems. */ + union + { + struct + { + unsigned char drive; /* 36 */ + unsigned char reserved; /* 37 */ + + /* Boot signature. + Value is 0x29. + Indicates that the following three fields + are present. */ + unsigned char boot_signature; /* 38 */ + + /* Identifier. + serial is an unique identifier for removable media. + label is the filesystem label, which must match the label + stored in the root directory entry which has DIR_ATTR_LABEL + set. If no name is specified, the content is "NO NAME ". + fs_type: One of "FAT12 ", "FAT16 ", "FAT ". + Don't use. */ + unsigned char serial[4]; /* 39 */ + unsigned char label[11]; /* 43 */ + unsigned char fs_type[8]; /* 54 */ + } fat; + struct + { + /* See sectors_per_fat_16. */ + unsigned char sectors_per_fat_32[4]; /* 36 */ + + /* Extension flags. + Bits 0-3: Zero based nr of active FAT. + Bit 7: If 0, all FATs are active and should be kept up to date. + If 1, only the active FAT (see bits 0-3) should be used. + The rest of the bits are reserved. */ + unsigned char extension_flags[2]; /* 40 */ + + /* Filesystem version. + The high byte is the major number, the low byte the minor version. + Don't mount if either version number is higher than known versions. */ + unsigned char fs_version[2]; /* 42 */ + + /* Root cluster. + The cluster where the root directory starts. */ + unsigned char root_cluster[4]; /* 44 */ + + /* Filesystem Info sector. + The setor number of the filesystem info block in the + reserved area. */ + unsigned char fs_info_sector[2]; /* 48 */ + + /* Backup boot sector. + The sector of the backup copy of the boot sector. + Should be 6, so it can be used even if this field is + corrupted. */ + unsigned char backup_boot_sector[2]; /* 50 */ + unsigned char reserved1[12]; /* 52 */ + + /* See fat structure above, with the following exception: + fs_type is "FAT32 ". */ + unsigned char drive_number; /* 64 */ + unsigned char reserved2; /* 65 */ + unsigned char boot_signature; /* 66 */ + unsigned char serial[4]; /* 67 */ + unsigned char label[11]; /* 71 */ + unsigned char fs_type[8]; /* 82 */ + } fat32; + } compat; + unsigned char unused[420]; /* 90 */ + + /* Expected ID at offset 510. + */ +#define BOOT_SECTOR_ID 0xaa55 + + unsigned char id[2]; /* 510 */ +}; + +/* File System Info Block. */ + +#define FAT_FS_INFO_LEAD_SIGNATURE 0x41615252L +#define FAT_FS_INFO_STRUCT_SIGNATURE 0x61417272L +#define FAT_FS_INFO_TRAIL_SIGNAURE 0xaa550000L +#define FAT_FS_NR_OF_FREE_CLUSTERS_UNKNOWN 0xffffffffL +#define FAT_FS_NEXT_FREE_CLUSTER_UNKNOWN 0xffffffffL + +struct fat_fs_info +{ + unsigned char lead_signature[4]; + unsigned char reserved1[480]; + unsigned char struct_signature[4]; + unsigned char nr_of_free_clusters[4]; + unsigned char next_free_cluster[4]; + unsigned char reserved2[12]; + unsigned char trail_signature[4]; +}; + +/* File Allocation Table, special entries. */ + +#define FAT_FREE_CLUSTER 0 + +#define FAT12_BAD_CLUSTER 0x0ff7 +#define FAT16_BAD_CLUSTER 0xfff7 +#define FAT32_BAD_CLUSTER 0x0ffffff7L +#define FAT_BAD_CLUSTER FAT32_BAD_CLUSTER + +#define FAT12_EOC 0x0ff8 +#define FAT16_EOC 0xfff8 +#define FAT32_EOC 0x0ffffff8 +#define FAT_EOC FAT32_EOC + +/* Directories. */ + +#define FAT_DIR_REC_LEN 32 +#define FAT_DIR_RECORDS(x) FAT_DIR_REC_LEN /* Something else for vfat. */ + +#define FAT_DIR_ATTR_RDONLY 0x01 +#define FAT_DIR_ATTR_HIDDEN 0x02 +#define FAT_DIR_ATTR_SYSTEM 0x04 +#define FAT_DIR_ATTR_LABEL 0x08 +#define FAT_DIR_ATTR_DIR 0x10 +#define FAT_DIR_ATTR_ARCHIVE 0x20 +#define FAT_DIR_ATTR_LONGNAME (DIR_ATTR_RDONLY | DIR_ATTR_HIDDEN \ + | DIR_ATTR_SYSTEM | DIR_ATTR_LABEL) + +#define FAT_DIR_NAME_LAST '\x00' +#define FAT_DIR_NAME_DELETED '\xe5' + +/* If the first character is this, replace it with FAT_DIR_NAME_DELETED + after checking for it. */ +#define FAT_DIR_NAME_REPLACE_DELETED '\x05' + +#define FAT_DIR_NAME_DOT ". " +#define FAT_DIR_NAME_DOTDOT ".. " + +struct dirrect +{ + unsigned char name[11]; + unsigned char attribute; + unsigned char reserved; + unsigned char creation_time_centiseconds; + unsigned char creation_time[2]; + unsigned char creation_date[2]; + unsigned char last_access_date[2]; + unsigned char first_cluster_high[2]; + unsigned char write_time[2]; + unsigned char write_date[2]; + unsigned char first_cluster_low[2]; + unsigned char file_size[4]; +}; + +#define FAT_NAME_MAX 12 /* VFAT: 255 */ + +extern vm_offset_t first_data_byte; +extern size_t bytes_per_cluster; + +/* A cluster number. */ +typedef unsigned long cluster_t; + +#define LOG2_CLUSTERS_PER_TABLE 10 +#define CLUSTERS_PER_TABLE (1 << LOG2_CLUSTERS_PER_TABLE) + +struct cluster_chain +{ + struct cluster_chain *next; + cluster_t cluster[CLUSTERS_PER_TABLE]; +}; + +/* Prototyping. */ +void fat_read_sblock (void); +void fat_to_epoch (char *, char *, struct timespec *); +void fat_from_epoch (char *, char *, time_t *); +error_t fat_getcluster (struct node *, cluster_t, int, cluster_t *); +void fat_truncate_node (struct node *, cluster_t); +error_t fat_extend_chain (struct node *, cluster_t, int); +int fat_get_freespace (void); + +/* Unprocessed superblock. */ +extern struct boot_sector *sblock; + +/* Processed sblock info. */ +extern fat_t fat_type; +extern size_t bytes_per_sector; +extern size_t log2_bytes_per_sector; +extern size_t sectors_per_cluster; +extern size_t bytes_per_cluster; +extern unsigned int log2_bytes_per_cluster; +extern size_t sectors_per_fat; +extern size_t total_sectors; +extern size_t nr_of_root_dir_sectors; +extern size_t first_root_dir_byte; +extern size_t first_data_sector; +extern vm_offset_t first_data_byte; +extern size_t first_fat_sector; +extern cluster_t nr_of_clusters; + +/* Numeric conversions for these fields. */ +#include + +static inline unsigned int +read_dword (unsigned char *addr) +{ +#if BYTE_ORDER == LITTLE_ENDIAN + return *(unsigned int *)addr; +#elif BYTE_ORDER == BIG_ENDIAN + return *(unsigned int *)(addr + 4); +#else + return + addr[0] | (addr[1] << 8) | (addr[2] << 16) | (addr[3] << 24); +#endif +} + +static inline unsigned int +read_word (unsigned char *addr) +{ +#if BYTE_ORDER == LITTLE_ENDIAN + return *(unsigned short *)addr; +#elif BYTE_ORDER == BIG_ENDIAN + return *(unsigned short *)addr + 2; +#else + return addr[0] | (addr[1] << 8); +#endif +} + +static inline void +write_dword (unsigned char *addr, unsigned int value) +{ +#if BYTE_ORDER == LITTLE_ENDIAN + *(unsigned int *)addr = value; +#elif BYTE_ORDER == BIG_ENDIAN +#error unknown byte order +#else + addr[0] = value & 0xff; + addr[1] = (value >> 8) & 0xff; + addr[2] = (value >> 16) & 0xff; + addr[3] = (value >> 24) & 0xff; +#endif +} + +static inline void +write_word (unsigned char *addr, unsigned int value) +{ +#if BYTE_ORDER == LITTLE_ENDIAN + *(unsigned short *)addr = value; +#elif BYTE_ORDER == BIG_ENDIAN +#error unknown byte order +#else + addr[0] = value & 0xff; + addr[1] = (value >> 8) & 0xff; +#endif +} + +#endif /* FAT_H */ diff --git a/fatfs/fatfs.h b/fatfs/fatfs.h new file mode 100644 index 00000000..f46695ba --- /dev/null +++ b/fatfs/fatfs.h @@ -0,0 +1,121 @@ +/* fatfs.h - Interface for fatfs. + Copyright (C) 1997, 1999, 2002 Free Software Foundation, Inc. + Written by Thomas Bushnell, n/BSG and Marcus Brinkmann. + + This file is part of the GNU Hurd. + + The GNU Hurd is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + The GNU Hurd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. */ + +#include +#include +#include +#include +#include + +#include "fat.h" +#include "virt-inode.h" + +/* There is no such thing as an inode in this format, all such information + being recorded in the directory entry. So we report inode numbers as + the start cluster number of the file. When messing around with the + directory entry, hold the DIRENT_LOCK. */ + +struct disknode +{ + cluster_t start_cluster; + + /* Links on hash list. */ + struct node *hnext, **hprevp; + + /* The inode as returned by virtual inode management routines. */ + inode_t inode; + + struct rwlock dirent_lock; + + char *link_target; /* For S_ISLNK. */ + + size_t translen; + char *translator; + + /* Lock to hold while fiddling with this inode's block allocation + info. */ + struct rwlock alloc_lock; + /* Lock to hold while extending this inode's block allocation info. + Hold only if you hold readers alloc_lock, then you don't need to + hold it if you hold writers alloc_lock already. */ + spin_lock_t chain_extension_lock; + struct cluster_chain *first; + struct cluster_chain *last; + cluster_t length_of_chain; + int chain_complete; + + /* This file's pager. */ + struct pager *pager; + + /* Index to start a directory lookup at. */ + int dir_idx; +}; + +struct user_pager_info +{ + struct node *node; + enum pager_type + { + FAT, + FILE_DATA, + } type; + vm_prot_t max_prot; +}; + +/* The physical media. */ +extern struct store *store; + +/* The UID and GID for all files in the filesystem. */ +extern uid_t fs_uid; +extern gid_t fs_gid; + +/* Mapped image of the FAT. */ +extern void *fat_image; + +/* Handy source of zeroes. */ +extern vm_address_t zerocluster; + +extern struct dirrect dr_root_node; + + +#define round_cluster(offs) \ + ((((offs) + bytes_per_cluster - 1) \ + >> log2_bytes_per_cluster) << log2_bytes_per_cluster) + +#define fat_first_cluster_byte(cluster) \ + (first_data_byte + ((cluster - 2) << log2_bytes_per_cluster)) + +void drop_pager_softrefs (struct node *); +void allow_pager_softrefs (struct node *); +void create_fat_pager (void); + +void flush_node_pager (struct node *node); + +void write_all_disknodes (); + +struct node *ifind (ino_t inum); + +error_t fat_get_next_cluster (cluster_t cluster, cluster_t *next_cluster); +void fat_to_unix_filename (const char *, char *); + +error_t diskfs_cached_lookup_in_dirbuf (int cache_id, struct node **npp, + vm_address_t buf); +void refresh_node_stats (void); + diff --git a/fatfs/inode.c b/fatfs/inode.c new file mode 100644 index 00000000..4119551a --- /dev/null +++ b/fatfs/inode.c @@ -0,0 +1,764 @@ +/* inode.c - Inode management routines. + Copyright (C) 1994,95,96,97,98,99, 2000, 2002 Free Software Foundation, Inc. + Modified for fatfs by Marcus Brinkmann + + This file is part of the GNU Hurd. + + The GNU Hurd is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2, or (at + your option) any later version. + + The GNU Hurd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. */ + +#include +#include "fatfs.h" + +/* These flags aren't actually defined by a header file yet, so + temporarily disable them if necessary. */ +#ifndef UF_APPEND +#define UF_APPEND 0 +#endif +#ifndef UF_NODUMP +#define UF_NODUMP 0 +#endif +#ifndef UF_IMMUTABLE +#define UF_IMMUTABLE 0 +#endif + +#define INOHSZ 512 +#if ((INOHSZ&(INOHSZ-1)) == 0) +#define INOHASH(ino) ((ino)&(INOHSZ-1)) +#else +#define INOHASH(ino) (((unsigned)(ino))%INOHSZ) +#endif + +static struct node *nodehash[INOHSZ]; + +static error_t read_node (struct node *np, vm_address_t buf); + +/* Initialize the inode hash table. */ +void +inode_init () +{ + int n; + for (n = 0; n < INOHSZ; n++) + nodehash[n] = 0; +} + +/* Fetch inode INUM, set *NPP to the node structure; gain one user + reference and lock the node. */ +error_t +diskfs_cached_lookup (ino64_t inum, struct node **npp) +{ + error_t err; + struct node *np; + struct disknode *dn; + + spin_lock (&diskfs_node_refcnt_lock); + for (np = nodehash[INOHASH(inum)]; np; np = np->dn->hnext) + if (np->cache_id == inum) + { + np->references++; + spin_unlock (&diskfs_node_refcnt_lock); + mutex_lock (&np->lock); + *npp = np; + return 0; + } + + /* Format specific data for the new node. */ + dn = malloc (sizeof (struct disknode)); + if (! dn) + { + spin_unlock (&diskfs_node_refcnt_lock); + return ENOMEM; + } + dn->pager = 0; + dn->first = 0; + dn->last = 0; + dn->length_of_chain = 0; + dn->chain_complete = 0; + dn->chain_extension_lock = SPIN_LOCK_INITIALIZER; + rwlock_init (&dn->alloc_lock); + rwlock_init (&dn->dirent_lock); + + /* Create the new node. */ + np = diskfs_make_node (dn); + np->cache_id = inum; + np->dn->inode = vi_lookup(inum); + + mutex_lock (&np->lock); + + /* Put NP in NODEHASH. */ + dn->hnext = nodehash[INOHASH(inum)]; + if (dn->hnext) + dn->hnext->dn->hprevp = &dn->hnext; + dn->hprevp = &nodehash[INOHASH(inum)]; + nodehash[INOHASH(inum)] = np; + + spin_unlock (&diskfs_node_refcnt_lock); + + /* Get the contents of NP off disk. */ + err = read_node (np, 0); + + if (err) + return err; + else + { + *npp = np; + return 0; + } +} + +/* Fetch inode INUM, set *NPP to the node structure; + gain one user reference and lock the node. + On the way, use BUF as the directory file map. */ +error_t +diskfs_cached_lookup_in_dirbuf (int inum, struct node **npp, vm_address_t buf) +{ + error_t err; + struct node *np; + struct disknode *dn; + + spin_lock (&diskfs_node_refcnt_lock); + for (np = nodehash[INOHASH(inum)]; np; np = np->dn->hnext) + if (np->cache_id == inum) + { + np->references++; + spin_unlock (&diskfs_node_refcnt_lock); + mutex_lock (&np->lock); + *npp = np; + return 0; + } + + /* Format specific data for the new node. */ + dn = malloc (sizeof (struct disknode)); + if (! dn) + { + spin_unlock (&diskfs_node_refcnt_lock); + return ENOMEM; + } + dn->pager = 0; + dn->first = 0; + dn->last = 0; + dn->length_of_chain = 0; + dn->chain_complete = 0; + dn->chain_extension_lock = SPIN_LOCK_INITIALIZER; + rwlock_init (&dn->alloc_lock); + rwlock_init (&dn->dirent_lock); + + /* Create the new node. */ + np = diskfs_make_node (dn); + np->cache_id = inum; + np->dn->inode = vi_lookup(inum); + + mutex_lock (&np->lock); + + /* Put NP in NODEHASH. */ + dn->hnext = nodehash[INOHASH(inum)]; + if (dn->hnext) + dn->hnext->dn->hprevp = &dn->hnext; + dn->hprevp = &nodehash[INOHASH(inum)]; + nodehash[INOHASH(inum)] = np; + + spin_unlock (&diskfs_node_refcnt_lock); + + /* Get the contents of NP off disk. */ + err = read_node (np, buf); + + if (err) + return err; + else + { + *npp = np; + return 0; + } +} + +/* Lookup node INUM (which must have a reference already) and return + it without allocating any new references. */ +struct node * +ifind (ino_t inum) +{ + struct node *np; + + spin_lock (&diskfs_node_refcnt_lock); + for (np = nodehash[INOHASH(inum)]; np; np = np->dn->hnext) + { + if (np->cache_id != inum) + continue; + + assert (np->references); + spin_unlock (&diskfs_node_refcnt_lock); + return np; + } + assert (0); +} + +/* The last reference to a node has gone away; drop it from the hash + table and clean all state in the dn structure. */ +void +diskfs_node_norefs (struct node *np) +{ + struct cluster_chain *last = np->dn->first; + + *np->dn->hprevp = np->dn->hnext; + if (np->dn->hnext) + np->dn->hnext->dn->hprevp = np->dn->hprevp; + + while (last) + { + struct cluster_chain *next = last->next; + free(last); + last = next; + } + + if (np->dn->translator) + free (np->dn->translator); + + assert (!np->dn->pager); + + free (np->dn); + free (np); +} + +/* The last hard reference to a node has gone away; arrange to have + all the weak references dropped that can be. */ +void +diskfs_try_dropping_softrefs (struct node *np) +{ + drop_pager_softrefs (np); +} + +/* The last hard reference to a node has gone away. */ +void +diskfs_lost_hardrefs (struct node *np) +{ +} + +/* A new hard reference to a node has been created; it's now OK to + have unused weak references. */ +void +diskfs_new_hardrefs (struct node *np) +{ + allow_pager_softrefs (np); +} + +/* Read stat information out of the directory entry. */ +static error_t +read_node (struct node *np, vm_address_t buf) +{ + /* XXX This needs careful investigation */ + error_t err; + struct stat *st = &np->dn_stat; + struct disknode *dn = np->dn; + struct dirrect *dr; + struct node *dp = 0; + struct vi_key vk = vi_key(np->dn->inode); + vm_prot_t prot = VM_PROT_READ; + memory_object_t memobj; + vm_size_t buflen = 0; + int our_buf = 0; + + if (vk.dir_inode == 0) + dr = &dr_root_node; + else + { + if (buf == 0) + { + err = diskfs_cached_lookup (vk.dir_inode, &dp); + if (err) + return err; + + /* Map in the directory contents. */ + memobj = diskfs_get_filemap (dp, prot); + + if (memobj == MACH_PORT_NULL) + { + diskfs_nput (dp); + return errno; + } + + buflen = round_page (dp->dn_stat.st_size); + err = vm_map (mach_task_self (), + &buf, buflen, 0, 1, memobj, 0, 0, prot, prot, 0); + mach_port_deallocate (mach_task_self (), memobj); + our_buf = 1; + } + + dr = (struct dirrect *) (buf + vk.dir_offset); + } + + st->st_fstype = FSTYPE_MSLOSS; + st->st_fsid = getpid (); + st->st_ino = np->cache_id; + st->st_gen = 0; + st->st_rdev = 0; + + st->st_nlink = 1; + st->st_uid = fs_uid; + st->st_gid = fs_gid; + + st->st_rdev = 0; + + np->dn->translator = 0; + np->dn->translen = 0; + + st->st_flags = 0; + + /* If we are called for a newly allocated node that has no directory + entry yet, only set a minimal amount of data until the dirent is + created (and we get called a second time?). */ + /* We will avoid this by overriding the relevant functions. + if (dr == (void *)1) + return 0; + */ + + rwlock_reader_lock(&np->dn->dirent_lock); + + dn->start_cluster = (read_word (dr->first_cluster_high) << 16) + + read_word (dr->first_cluster_low); + + if (dr->attribute & FAT_DIR_ATTR_DIR) + { + st->st_mode = S_IFDIR | 0777; + /* When we read in the node the first time, diskfs_root_node is + zero. */ + if (diskfs_root_node == 0 || + (np == diskfs_root_node && (fat_type == FAT12 || fat_type == FAT16))) + { + st->st_size = read_dword (dr->file_size); + np->allocsize = nr_of_root_dir_sectors << log2_bytes_per_sector; + } + else + { + np->allocsize = 0; + rwlock_reader_lock(&dn->alloc_lock); + err = fat_extend_chain (np, FAT_EOC, 0); + rwlock_reader_unlock(&dn->alloc_lock); + if (err) + { + if (our_buf && buf) + munmap ((caddr_t) buf, buflen); + if (dp) + diskfs_nput (dp); + return err; + } + st->st_size = np->allocsize; + } + } + else + { + unsigned offset; + st->st_mode = S_IFREG | 0666; + st->st_size = read_dword (dr->file_size); + np->allocsize = np->dn_stat.st_size; + + /* Round up to a cluster multiple. */ + offset = np->allocsize & (bytes_per_cluster - 1); + if (offset > 0) + np->allocsize += bytes_per_cluster - offset; + } + if (dr->attribute & FAT_DIR_ATTR_RDONLY) + st->st_mode &= ~0222; + + { + struct timespec ts; + fat_to_epoch (dr->write_date, dr->write_time, &ts); + st->st_ctime = st->st_mtime = st->st_atime = ts.tv_sec; + st->st_ctime_usec = st->st_mtime_usec = st->st_atime_usec + = ts.tv_nsec * 1000; + } + + st->st_blksize = bytes_per_sector; + st->st_blocks = (st->st_size - 1) / bytes_per_sector + 1; + + rwlock_reader_unlock(&np->dn->dirent_lock); + + if (our_buf && buf) + munmap ((caddr_t) buf, buflen); + if (dp) + diskfs_nput (dp); + return 0; +} + +/* Return 0 if NP's owner can be changed to UID; otherwise return an + error code. */ +error_t +diskfs_validate_owner_change (struct node *np, uid_t uid) +{ + /* Allow configurable uid. */ + if (uid != 0) + return EINVAL; + return 0; +} + +/* Return 0 if NP's group can be changed to GID; otherwise return an + error code. */ +error_t +diskfs_validate_group_change (struct node *np, gid_t gid) +{ + /* Allow configurable gid. */ + if (gid != 0) + return EINVAL; + return 0; +} + +/* Return 0 if NP's mode can be changed to MODE; otherwise return an + error code. It must always be possible to clear the mode; diskfs + will not ask for permission before doing so. */ +error_t +diskfs_validate_mode_change (struct node *np, mode_t mode) +{ + /* XXX */ + return 0; +} + +/* Return 0 if NP's author can be changed to AUTHOR; otherwise return + an error code. */ +error_t +diskfs_validate_author_change (struct node *np, uid_t author) +{ + return (author == np->dn_stat.st_uid) ? 0 : EINVAL; +} + +/* The user may define this function. Return 0 if NP's flags can be + changed to FLAGS; otherwise return an error code. It must always + be possible to clear the flags. */ +error_t +diskfs_validate_flags_change (struct node *np, int flags) +{ + if (flags & ~(UF_NODUMP | UF_IMMUTABLE | UF_APPEND)) + return EINVAL; + else + return 0; +} + +/* Writes everything from NP's inode to the disk image. */ +void +write_node (struct node *np) +{ + error_t err; + struct stat *st = &np->dn_stat; + struct dirrect *dr; + struct node *dp; + struct vi_key vk = vi_key(np->dn->inode); + vm_prot_t prot = VM_PROT_READ | VM_PROT_WRITE; + memory_object_t memobj; + vm_address_t buf = 0; + vm_size_t buflen; + + /* XXX: If we are called from node-create before direnter was + called, DR is zero and we can't update the node. Just return + here, and leave it to direnter to call us again when we are + ready. + If we are called for the root directory node, we can't do anything, + as FAT root dirs don't have a directory entry for themselve. + */ + if (vk.dir_inode == 0 || np == diskfs_root_node) + return; + + assert (!np->dn_set_ctime && !np->dn_set_atime && !np->dn_set_mtime); + if (np->dn_stat_dirty) + { + assert (!diskfs_readonly); + + err = diskfs_cached_lookup (vk.dir_inode, &dp); + if (err) + return; + + /* Map in the directory contents. */ + memobj = diskfs_get_filemap (dp, prot); + + if (memobj == MACH_PORT_NULL) + return; + + buflen = round_page (dp->dn_stat.st_size); + err = vm_map (mach_task_self (), + &buf, buflen, 0, 1, memobj, 0, 0, prot, prot, 0); + mach_port_deallocate (mach_task_self (), memobj); + + dr = (struct dirrect *) (buf + vk.dir_offset); + + rwlock_writer_lock(&np->dn->dirent_lock); + write_word (dr->first_cluster_low, np->dn->start_cluster & 0xffff); + write_word (dr->first_cluster_high, np->dn->start_cluster >> 16); + + write_dword (dr->file_size, st->st_size); + + /* Write time. */ + fat_from_epoch ((unsigned char *) &dr->write_date, + (unsigned char *) &dr->write_time, &st->st_mtime); + + rwlock_writer_unlock(&np->dn->dirent_lock); + np->dn_stat_dirty = 0; + + munmap ((caddr_t) buf, buflen); + diskfs_nput (dp); + } +} + +/* Reload all data specific to NODE from disk, without writing anything. + Always called with DISKFS_READONLY true. */ +error_t +diskfs_node_reload (struct node *node) +{ + struct cluster_chain *last = node->dn->first; + + while (last) + { + struct cluster_chain *next = last->next; + free(last); + last = next; + } + flush_node_pager (node); + read_node (node, 0); + + return 0; +} + +/* For each active node, call FUN. The node is to be locked around the call + to FUN. If FUN returns non-zero for any node, then immediately stop, and + return that value. */ +error_t +diskfs_node_iterate (error_t (*fun)(struct node *)) +{ + error_t err = 0; + int n, num_nodes = 0; + struct node *node, **node_list, **p; + + spin_lock (&diskfs_node_refcnt_lock); + + /* We must copy everything from the hash table into another data structure + to avoid running into any problems with the hash-table being modified + during processing (normally we delegate access to hash-table with + diskfs_node_refcnt_lock, but we can't hold this while locking the + individual node locks). */ + + for (n = 0; n < INOHSZ; n++) + for (node = nodehash[n]; node; node = node->dn->hnext) + num_nodes++; + + node_list = alloca (num_nodes * sizeof (struct node *)); + p = node_list; + for (n = 0; n < INOHSZ; n++) + for (node = nodehash[n]; node; node = node->dn->hnext) + { + *p++ = node; + node->references++; + } + + spin_unlock (&diskfs_node_refcnt_lock); + + p = node_list; + while (num_nodes-- > 0) + { + node = *p++; + if (!err) + { + mutex_lock (&node->lock); + err = (*fun)(node); + mutex_unlock (&node->lock); + } + diskfs_nrele (node); + } + + return err; +} + +/* Write all active disknodes into the ext2_inode pager. */ +void +write_all_disknodes () +{ + error_t write_one_disknode (struct node *node) + { + diskfs_set_node_times (node); + + /* Update the inode image. */ + write_node (node); + + return 0; + } + + diskfs_node_iterate (write_one_disknode); +} + + +void +refresh_node_stats () +{ + error_t refresh_one_node_stat (struct node *node) + { + node->dn_stat.st_uid = fs_uid; + node->dn_stat.st_gid = fs_gid; + return 0; + } + + diskfs_node_iterate (refresh_one_node_stat); +} + + +/* Sync the info in NP->dn_stat and any associated format-specific + information to disk. If WAIT is true, then return only after the + physicial media has been completely updated. */ +void +diskfs_write_disknode (struct node *np, int wait) +{ + write_node (np); +} + +/* Set *ST with appropriate values to reflect the current state of the + filesystem. */ +error_t +diskfs_set_statfs (struct statfs *st) +{ + st->f_type = FSTYPE_MSLOSS; + st->f_bsize = bytes_per_sector; + st->f_blocks = total_sectors; + st->f_bfree = fat_get_freespace () * sectors_per_cluster; + st->f_bavail = st->f_bfree; + /* There is no easy way to determine the number of (free) files on a + FAT filesystem. */ + st->f_files = 0; + st->f_ffree = 0; + st->f_fsid = getpid (); + st->f_namelen = 0; + st->f_favail = st->f_ffree; + st->f_frsize = bytes_per_cluster; + return 0; +} + +error_t +diskfs_set_translator (struct node *node, + const char *name, u_int namelen, + struct protid *cred) +{ + assert (!diskfs_readonly); + return EOPNOTSUPP; +} + +error_t +diskfs_get_translator (struct node *node, char **namep, u_int *namelen) +{ + assert(0); +} + +void +diskfs_shutdown_soft_ports () +{ + /* Should initiate termination of internally held pager ports + (the only things that should be soft) XXX */ +} + +/* The user must define this function. Truncate locked node NODE to be SIZE + bytes long. (If NODE is already less than or equal to SIZE bytes + long, do nothing.) If this is a symlink (and diskfs_shortcut_symlink + is set) then this should clear the symlink, even if + diskfs_create_symlink_hook stores the link target elsewhere. */ +error_t +diskfs_truncate (struct node *node, loff_t length) +{ + error_t err; + loff_t offset; + + diskfs_check_readonly (); + assert (!diskfs_readonly); + + if (length >= node->dn_stat.st_size) + return 0; + + /* If the file is not being truncated to a cluster boundary, the + contents of the partial cluster following the end of the file + must be zeroed in case it ever becomes accessible again because + of subsequent file growth. */ + offset = length & (bytes_per_cluster - 1); + if (offset > 0) + { + diskfs_node_rdwr (node, (void *)zerocluster, length, bytes_per_cluster - offset, + 1, 0, 0); + diskfs_file_update (node, 1); + } + + rwlock_writer_lock (&node->dn->alloc_lock); + + /* Update the size on disk; if we crash, we'll loose. */ + node->dn_stat.st_size = length; + node->dn_set_mtime = 1; + node->dn_set_ctime = 1; + diskfs_node_update (node, 1); + + err = diskfs_catch_exception (); + if (!err) + { + fat_truncate_node(node, round_cluster(length) >> log2_bytes_per_cluster); + node->allocsize = round_cluster(length); + } + diskfs_end_catch_exception (); + + node->dn_set_mtime = 1; + node->dn_set_ctime = 1; + node->dn_stat_dirty = 1; + + rwlock_writer_unlock (&node->dn->alloc_lock); + + return err; +} + +error_t +diskfs_S_file_get_storage_info (struct protid *cred, + mach_port_t **ports, + mach_msg_type_name_t *ports_type, + mach_msg_type_number_t *num_ports, + int **ints, mach_msg_type_number_t *num_ints, + loff_t **offsets, + mach_msg_type_number_t *num_offsets, + char **data, mach_msg_type_number_t *data_len) +{ + /* XXX */ + return EOPNOTSUPP; +} + +/* Free node NP; the on disk copy has already been synced with + diskfs_node_update (where NP->dn_stat.st_mode was 0). It's + mode used to be OLD_MODE. */ +void +diskfs_free_node (struct node *np, mode_t old_mode) +{ + assert (!diskfs_readonly); + + vi_free(np->dn->inode); +} + +/* The user must define this function. Allocate a new node to be of + mode MODE in locked directory DP (don't actually set the mode or + modify the dir, that will be done by the caller); the user + responsible for the request can be identified with CRED. Set *NP + to be the newly allocated node. */ +error_t +diskfs_alloc_node (struct node *dir, mode_t mode, struct node **node) +{ + error_t err; + ino_t inum; + inode_t inode; + struct node *np; + + assert (!diskfs_readonly); + + err = vi_new((struct vi_key) {0,1} /* XXX not allocated yet */, &inum, &inode); + if (err) + return err; + + err = diskfs_cached_lookup (inum, &np); + if (err) + return err; + + *node = np; + return 0; +} diff --git a/fatfs/main.c b/fatfs/main.c new file mode 100644 index 00000000..800e58b4 --- /dev/null +++ b/fatfs/main.c @@ -0,0 +1,265 @@ +/* main.c - FAT filesystem. + Copyright (C) 1997, 1998, 1999, 2002 Free Software Foundation, Inc. + Written by Thomas Bushnell, n/BSG and Marcus Brinkmann. + + This file is part of the GNU Hurd. + + The GNU Hurd is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + The GNU Hurd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. */ + +#include +#include +#include +#include +#include + +#include +#include "fatfs.h" + +struct node *diskfs_root_node; + +struct store *store = 0; +struct store_parsed *store_parsed = 0; +char *diskfs_disk_name = 0; + +char *diskfs_server_name = "fatfs"; +char *diskfs_server_version = HURD_VERSION; +char *diskfs_extra_version = "GNU Hurd"; +int diskfs_synchronous = 0; + +int diskfs_link_max = 1; +int diskfs_name_max = FAT_NAME_MAX; +int diskfs_maxsymlinks = 8; /* XXX */ + +/* This filesystem is not capable of writing yet. */ +int diskfs_readonly = 1, diskfs_hard_readonly = 1; + +/* Handy source of zeroes. */ +vm_address_t zerocluster; + +struct dirrect dr_root_node; + +/* The UID and GID for all files in the filesystem. */ +uid_t default_fs_uid; +gid_t default_fs_gid; +uid_t fs_uid; +gid_t fs_gid; + +/* fatfs specific options. */ +static const struct argp_option options[] = + { + { "uid", 'U', "uid", 0, "Default uid for files" }, + { "gid", 'G', "gid", 0, "Default gid for files" }, + { 0 } + }; + +static error_t +parse_opt (int key, char *arg, struct argp_state *state) +{ + switch (key) + { + case 'U': + if (arg) + fs_uid = atoi (arg); + refresh_node_stats (); + break; + case 'G': + if (arg) + fs_gid = atoi (arg); + refresh_node_stats (); + break; + case ARGP_KEY_INIT: + state->child_inputs[0] = state->input; + break; + case ARGP_KEY_SUCCESS: + break; + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + +/* Add our startup arguments to the standard diskfs set. */ +static const struct argp_child startup_children[] = + { { &diskfs_store_startup_argp }, { 0 } }; +static struct argp startup_argp = + { options, parse_opt, 0, 0, startup_children }; + +/* Similarly at runtime. */ +static const struct argp_child runtime_children[] = + { { &diskfs_std_runtime_argp }, { 0 } }; +static struct argp runtime_argp = + { options, parse_opt, 0, 0, runtime_children }; + +struct argp *diskfs_runtime_argp = (struct argp *) &runtime_argp; + + +/* Override the standard diskfs routine so we can add our own + output. */ +error_t +diskfs_append_args (char **argz, unsigned *argz_len) +{ + error_t err; + char buf[100]; + + /* Get the standard things. */ + err = diskfs_append_std_options (argz, argz_len); + + if (!err && fs_uid != default_fs_uid) + { + snprintf (buf, sizeof buf, "--uid=%d", fs_uid); + err = argz_add (argz, argz_len, buf); + } + + if (!err && fs_gid != default_fs_gid) + { + snprintf (buf, sizeof buf, "--gid=%d", fs_gid); + err = argz_add (argz, argz_len, buf); + } + + if (! err) + err = store_parsed_append_args (store_parsed, argz, argz_len); + + return err; +} + + +/* Fetch the root node. */ +static void +fetch_root () +{ + error_t err; + ino_t inum; + inode_t inode; + + memset (&dr_root_node, 0, sizeof(struct dirrect)); + + /* Fill root directory entry. XXX Should partially be in fat.c */ + dr_root_node.attribute = FAT_DIR_ATTR_DIR; + if (fat_type == FAT32) + { + /* FAT12/16: There is no such thing as a start cluster, because + the whole root dir is in a special region after the FAT. The + start cluster of the root node is undefined. */ + dr_root_node.first_cluster_high[1] + = sblock->compat.fat32.root_cluster[3]; + dr_root_node.first_cluster_high[0] + = sblock->compat.fat32.root_cluster[2]; + dr_root_node.first_cluster_low[1] = sblock->compat.fat32.root_cluster[1]; + dr_root_node.first_cluster_low[0] = sblock->compat.fat32.root_cluster[0]; + } + + /* Determine size of the directory (different for fat12/16 vs 32). */ + switch (fat_type) + { + case FAT12: + case FAT16: + write_dword(dr_root_node.file_size, nr_of_root_dir_sectors + << log2_bytes_per_sector); + break; + + case FAT32: + { + /* Extend the cluster chain of the root directory and calculate + file_size based on that. */ + cluster_t rootdir; + int cs = 0; + + rootdir = (cluster_t) *sblock->compat.fat32.root_cluster; + while (rootdir != FAT_EOC) + { + fat_get_next_cluster (rootdir, &rootdir); + cs++; + } + write_dword (dr_root_node.file_size, cs << log2_bytes_per_cluster); + } + break; + + default: + assert(!"don't know how to set size of root dir"); + }; + + /* The magic vi_key {0, 1} for the root directory is distinguished + from the vi_zero_key (in the dir_offset value) as well as all + normal virtual inode keys (in the dir_inode value). Enter the + disknode into the inode table. */ + err = vi_new ((struct vi_key) {0, 1}, &inum, &inode); + assert_perror (err); + + /* Allocate a node for the root directory disknode in + diskfs_root_node. */ + if (!err) + err = diskfs_cached_lookup (inum, &diskfs_root_node); + + assert_perror (err); + + mutex_unlock (&diskfs_root_node->lock); +} + + +int +main (int argc, char **argv) +{ + mach_port_t bootstrap; + + default_fs_uid = getuid (); + default_fs_gid = getgid (); + fs_uid = default_fs_uid; + fs_gid = default_fs_gid; + + /* Initialize the diskfs library, parse arguments, and open the + store. This starts the first diskfs thread for us. */ + store = diskfs_init_main (&startup_argp, argc, argv, &store_parsed, + &bootstrap); + + fat_read_sblock (); + + create_fat_pager (); + + zerocluster = (vm_address_t) mmap (0, bytes_per_cluster, PROT_READ|PROT_WRITE, + MAP_ANON, 0, 0); + + fetch_root (); + + diskfs_startup_diskfs (bootstrap, 0); + + cthread_exit (0); + + return 0; +} + + +/* Nothing to do for read-only medium. */ +error_t +diskfs_reload_global_state () +{ + return 0; +} + + +error_t +diskfs_set_hypermetadata (int wait, int clean) +{ + return 0; +} + + +void +diskfs_readonly_changed (int readonly) +{ + /* We should never get here because we set diskfs_hard_readonly above. */ + abort (); +} + diff --git a/fatfs/pager.c b/fatfs/pager.c new file mode 100644 index 00000000..606dc4d8 --- /dev/null +++ b/fatfs/pager.c @@ -0,0 +1,1019 @@ +/* pager.c - Pager for fatfs. + Copyright (C) 1997, 1999, 2002 Free Software Foundation, Inc. + Written by Thomas Bushnell, n/BSG and Marcus Brinkmann. + + This file is part of the GNU Hurd. + + The GNU Hurd is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + The GNU Hurd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. */ + +#include +#include +#include "fatfs.h" + +/* A ports bucket to hold pager ports. */ +struct port_bucket *pager_bucket; + +/* Mapped image of the FAT. */ +void *fat_image; + +spin_lock_t node_to_page_lock = SPIN_LOCK_INITIALIZER; + +#ifdef DONT_CACHE_MEMORY_OBJECTS +#define MAY_CACHE 0 +#else +#define MAY_CACHE 1 +#endif + +#define STAT_INC(field) /* nop */0 + +#define MAX_FREE_PAGE_BUFS 32 + +static spin_lock_t free_page_bufs_lock = SPIN_LOCK_INITIALIZER; +static void *free_page_bufs = 0; +static int num_free_page_bufs = 0; + +/* Returns a single page page-aligned buffer. */ +static void * +get_page_buf () +{ + void *buf; + + spin_lock (&free_page_bufs_lock); + + buf = free_page_bufs; + if (buf == 0) + { + spin_unlock (&free_page_bufs_lock); + buf = mmap (0, vm_page_size, PROT_READ|PROT_WRITE, MAP_ANON, 0, 0); + if (buf == (void *) -1) + buf = 0; + } + else + { + free_page_bufs = *(void **)buf; + num_free_page_bufs--; + spin_unlock (&free_page_bufs_lock); + } + + return buf; +} + +/* Frees a block returned by get_page_buf. */ +static void +free_page_buf (void *buf) +{ + spin_lock (&free_page_bufs_lock); + if (num_free_page_bufs < MAX_FREE_PAGE_BUFS) + { + *(void **)buf = free_page_bufs; + free_page_bufs = buf; + num_free_page_bufs++; + spin_unlock (&free_page_bufs_lock); + } + else + { + spin_unlock (&free_page_bufs_lock); + munmap (buf, vm_page_size); + } +} + +/* Find the location on disk of page OFFSET in NODE. Return the disk + cluster in CLUSTER. If *LOCK is 0, then it a reader + lock is aquired on NODE's ALLOC_LOCK before doing anything, and left + locked after return -- even if an error is returned. 0 on success or an + error code otherwise is returned. */ +static error_t +find_cluster (struct node *node, vm_offset_t offset, + cluster_t *cluster, struct rwlock **lock) +{ + error_t err; + + if (!*lock) + { + *lock = &node->dn->alloc_lock; + rwlock_reader_lock (*lock); + } + + if (offset + bytes_per_cluster > node->allocsize) + return EIO; + + err = fat_getcluster (node, offset >> log2_bytes_per_cluster, 0, cluster); + + return err; +} + +/* Read one page for the root dir pager at offset PAGE, into BUF. This + may need to select several filesystem sectors to satisfy one page. + Assumes that fat_type is FAT12 or FAT16, and that vm_page_size is a + power of two multiple of bytes_per_sector (which happens to be true). +*/ +static error_t +root_dir_pager_read_page (vm_offset_t page, void **buf, int *writelock) +{ + error_t err; + daddr_t addr; + int overrun = 0; + size_t read = 0; + + *writelock = 0; + + if (page >= diskfs_root_node->allocsize) + { + return EIO; + } + + rwlock_reader_lock(&diskfs_root_node->dn->alloc_lock); + + addr = first_root_dir_byte + page; + if (page + vm_page_size > diskfs_root_node->allocsize) + overrun = page + vm_page_size - diskfs_root_node->allocsize; + + err = store_read (store, addr >> store->log2_block_size, + vm_page_size, (void **) buf, &read); + if (!err && read != vm_page_size) + err = EIO; + + rwlock_reader_unlock (&diskfs_root_node->dn->alloc_lock); + + if (overrun) + bzero ((void *) *buf + vm_page_size - overrun, overrun); + + return err; +} + +/* Read one page for the pager backing NODE at offset PAGE, into BUF. This + may need to select only a part of a filesystem block to satisfy one page. + Assumes that bytes_per_cluster is a power of two multiple of vm_page_size. +*/ +static error_t +file_pager_read_small_page (struct node *node, vm_offset_t page, + void **buf, int *writelock) +{ + error_t err; + struct rwlock *lock = NULL; + cluster_t cluster; + size_t read = 0; + + *writelock = 0; + + if (page >= node->allocsize) + { + return EIO; + } + + err = find_cluster (node, page, &cluster, &lock); + + if (!err) + { + err = store_read (store, + (fat_first_cluster_byte(cluster) + + (page % bytes_per_cluster)) >> store->log2_block_size, + vm_page_size, (void **) buf, &read); + if (read != vm_page_size) + err = EIO; + } + + if (lock) + rwlock_reader_unlock (lock); + + return err; +} + +/* Read one page for the pager backing NODE at offset PAGE, into BUF. This + may need to read several filesystem blocks to satisfy one page, and tries + to consolidate the i/o if possible. + Assumes that vm_page_size is a power of two multiple of bytes_per_cluster. +*/ +static error_t +file_pager_read_huge_page (struct node *node, vm_offset_t page, + void **buf, int *writelock) +{ + error_t err; + int offs = 0; + struct rwlock *lock = NULL; + int left = vm_page_size; + cluster_t pending_clusters = 0; + int num_pending_clusters = 0; + + /* Read the NUM_PENDING_CLUSTERS cluster in PENDING_CLUSTERS, into the buffer + pointed to by BUF (allocating it if necessary) at offset OFFS. OFFS in + adjusted by the amount read, and NUM_PENDING_CLUSTERS is zeroed. Any read + error is returned. */ + error_t do_pending_reads () + { + if (num_pending_clusters > 0) + { + size_t dev_block = fat_first_cluster_byte(pending_clusters) >> store->log2_block_size; + size_t amount = num_pending_clusters << log2_bytes_per_cluster; + /* The buffer we try to read into; on the first read, we pass in a + size of zero, so that the read is guaranteed to allocate a new + buffer, otherwise, we try to read directly into the tail of the + buffer we've already got. */ + void *new_buf = *buf + offs; + size_t new_len = offs == 0 ? 0 : vm_page_size - offs; + + STAT_INC (file_pagein_reads); + + err = store_read (store, dev_block, amount, &new_buf, &new_len); + if (err) + return err; + else if (amount != new_len) + return EIO; + + if (new_buf != *buf + offs) + { + /* The read went into a different buffer than the one we + passed. */ + if (offs == 0) + /* First read, make the returned page be our buffer. */ + *buf = new_buf; + else + /* We've already got some buffer, so copy into it. */ + { + memcpy (*buf + offs, new_buf, new_len); + free_page_buf (new_buf); /* Return NEW_BUF to our pool. */ + STAT_INC (file_pagein_freed_bufs); + } + } + + offs += new_len; + num_pending_clusters = 0; + } + + return 0; + } + + STAT_INC (file_pageins); + + *writelock = 0; + + if (page >= node->allocsize) + { + err = EIO; + left = 0; + } + else if (page + left > node->allocsize) + left = node->allocsize - page; + + while (left > 0) + { + cluster_t cluster; + + err = find_cluster (node, page, &cluster, &lock); + if (err) + break; + + if (cluster != pending_clusters + num_pending_clusters) + { + err = do_pending_reads (); + if (err) + break; + pending_clusters = cluster; + } + + num_pending_clusters++; + + page += bytes_per_cluster; + left -= bytes_per_cluster; + } + + if (!err && num_pending_clusters > 0) + err = do_pending_reads(); + + if (lock) + rwlock_reader_unlock (lock); + + return err; +} + +struct pending_clusters + { + /* The cluster number of the first of the clusters. */ + cluster_t cluster; + /* How many clusters we have. */ + loff_t num; + /* A (page-aligned) buffer pointing to the data we're dealing with. */ + void *buf; + /* And an offset into BUF. */ + int offs; +}; + +/* Write the any pending clusters in PC. */ +static error_t +pending_clusters_write (struct pending_clusters *pc) +{ + if (pc->num > 0) + { + error_t err; + size_t dev_block = fat_first_cluster_byte(pc->cluster) >> store->log2_block_size; + size_t length = pc->num << log2_bytes_per_cluster, amount; + + if (pc->offs > 0) + /* Put what we're going to write into a page-aligned buffer. */ + { + void *page_buf = get_page_buf (); + memcpy ((void *) page_buf, pc->buf + pc->offs, length); + err = store_write (store, dev_block, page_buf, length, &amount); + free_page_buf (page_buf); + } + else + err = store_write (store, dev_block, pc->buf, length, &amount); + if (err) + return err; + else if (amount != length) + return EIO; + + pc->offs += length; + pc->num = 0; + } + + return 0; +} + +static void +pending_clusters_init (struct pending_clusters *pc, void *buf) +{ + pc->buf = buf; + pc->cluster = 0; + pc->num = 0; + pc->offs = 0; +} + +/* Add the disk cluster CLUSTER to the list of destination disk clusters pending in + PC. */ +static error_t +pending_clusters_add (struct pending_clusters *pc, cluster_t cluster) +{ + if (cluster != pc->cluster + pc->num) + { + error_t err = pending_clusters_write (pc); + if (err) + return err; + pc->cluster = cluster; + } + pc->num++; + return 0; +} + +/* Write one page for the pager backing NODE, at offset PAGE, into BUF. This + may need to write several filesystem blocks to satisfy one page, and tries + to consolidate the i/o if possible. + Assumes that vm_page_size is a power of two multiple of bytes_per_cluster. +*/ +static error_t +file_pager_write_huge_page (struct node *node, vm_offset_t offset, void *buf) +{ + error_t err = 0; + struct pending_clusters pc; + struct rwlock *lock = &node->dn->alloc_lock; + cluster_t cluster; + int left = vm_page_size; + + pending_clusters_init (&pc, buf); + + /* Holding NODE->dn->alloc_lock effectively locks NODE->allocsize, + at least for the cases we care about: pager_unlock_page, + diskfs_grow and diskfs_truncate. */ + rwlock_reader_lock (&node->dn->alloc_lock); + + if (offset >= node->allocsize) + left = 0; + else if (offset + left > node->allocsize) + left = node->allocsize - offset; + + STAT_INC (file_pageouts); + + while (left > 0) + { + err = find_cluster (node, offset, &cluster, &lock); + if (err) + break; + pending_clusters_add (&pc, cluster); + offset += bytes_per_cluster; + left -= bytes_per_cluster; + } + + if (!err) + pending_clusters_write (&pc); + + rwlock_reader_unlock (&node->dn->alloc_lock); + + return err; +} + +/* Write one page for the root dir pager, at offset OFFSET, into BUF. This + may need to write several filesystem blocks to satisfy one page, and tries + to consolidate the i/o if possible. + Assumes that fat_type is FAT12 or FAT16 and that vm_page_size is a + power of two multiple of bytes_per_sector. +*/ +static error_t +root_dir_pager_write_page (vm_offset_t offset, void *buf) +{ + error_t err; + daddr_t addr; + size_t length; + size_t write = 0; + + if (offset >= diskfs_root_node->allocsize) + return 0; + + /* Holding NODE->dn->alloc_lock effectively locks NODE->allocsize, + at least for the cases we care about: pager_unlock_page, + diskfs_grow and diskfs_truncate. */ + rwlock_reader_lock (&diskfs_root_node->dn->alloc_lock); + + addr = first_root_dir_byte + offset; + + if (offset + vm_page_size > diskfs_root_node->allocsize) + length = diskfs_root_node->allocsize - offset; + else + length = vm_page_size; + + err = store_write (store, addr >> store->log2_block_size, (void **) buf, + length, &write); + if (!err && write != length) + err = EIO; + + rwlock_reader_unlock (&diskfs_root_node->dn->alloc_lock); + + return err; +} + +/* Write one page for the pager backing NODE, at offset OFFSET, into BUF. This + may need to write several filesystem blocks to satisfy one page, and tries + to consolidate the i/o if possible. + Assumes that bytes_per_cluster is a power of two multiple of vm_page_size. +*/ +static error_t +file_pager_write_small_page (struct node *node, vm_offset_t offset, void *buf) +{ + error_t err; + struct rwlock *lock = NULL; + cluster_t cluster; + size_t write = 0; + + if (offset >= node->allocsize) + return 0; + + /* Holding NODE->dn->alloc_lock effectively locks NODE->allocsize, + at least for the cases we care about: pager_unlock_page, + diskfs_grow and diskfs_truncate. */ + rwlock_reader_lock (&node->dn->alloc_lock); + + err = find_cluster (node, offset, &cluster, &lock); + + if (!err) + { + err = store_write (store, + (fat_first_cluster_byte(cluster) + + (offset % bytes_per_cluster)) >> store->log2_block_size, + (void **) buf, vm_page_size, &write); + if (write != vm_page_size) + err = EIO; + } + + if (lock) + rwlock_reader_unlock (lock); + + return err; +} + +static error_t +fat_pager_read_page (vm_offset_t page, void **buf, int *writelock) +{ + error_t err; + size_t length = vm_page_size, read = 0; + vm_size_t fat_end = bytes_per_sector * sectors_per_fat; + + if (page + vm_page_size > fat_end) + length = fat_end - page; + + page += first_fat_sector * bytes_per_sector; + err = store_read (store, page >> store->log2_block_size, length, buf, &read); + if (read != length) + return EIO; + if (!err && length != vm_page_size) + memset ((void *)(*buf + length), 0, vm_page_size - length); + + *writelock = 0; + + return err; +} + +static error_t +fat_pager_write_page (vm_offset_t page, void *buf) +{ + error_t err = 0; + size_t length = vm_page_size, amount; + vm_size_t fat_end = bytes_per_sector * sectors_per_fat; + + if (page + vm_page_size > fat_end) + length = fat_end - page; + + page += first_fat_sector * bytes_per_sector; + err = store_write (store, page >> store->log2_block_size, + buf, length, &amount); + if (!err && length != amount) + err = EIO; + + return err; +} + +/* Satisfy a pager read request for either the disk pager or file pager + PAGER, to the page at offset PAGE into BUF. WRITELOCK should be set if + the pager should make the page writeable. */ +error_t +pager_read_page (struct user_pager_info *pager, vm_offset_t page, + vm_address_t *buf, int *writelock) +{ + if (pager->type == FAT) + return fat_pager_read_page (page, (void **)buf, writelock); + else + { + if (pager->node == diskfs_root_node + && (fat_type == FAT12 || fat_type == FAT16)) + return root_dir_pager_read_page (page, (void **)buf, writelock); + else + { + if (bytes_per_cluster < vm_page_size) + return file_pager_read_huge_page (pager->node, page, + (void **)buf, writelock); + else + return file_pager_read_small_page (pager->node, page, + (void **)buf, writelock); + } + } +} + +/* Satisfy a pager write request for either the disk pager or file pager + PAGER, from the page at offset PAGE from BUF. */ +error_t +pager_write_page (struct user_pager_info *pager, vm_offset_t page, + vm_address_t buf) +{ + if (pager->type == FAT) + return fat_pager_write_page (page, (void *)buf); + else + { + if (pager->node == diskfs_root_node + && (fat_type == FAT12 || fat_type == FAT16)) + return root_dir_pager_write_page (page, (void *)buf); + else + { + if (bytes_per_cluster < vm_page_size) + return file_pager_write_huge_page (pager->node, page, + (void *)buf); + else + return file_pager_write_small_page (pager->node, page, + (void *)buf); + } + } +} + +/* Make page PAGE writable, at least up to ALLOCSIZE. */ +error_t +pager_unlock_page (struct user_pager_info *pager, + vm_offset_t page) +{ + /* All pages are writeable. The disk pages anyway, and the file + pages because blocks are directly allocated in diskfs_grow. */ + return 0; +} + +/* Grow the disk allocated to locked node NODE to be at least SIZE + bytes, and set NODE->allocsize to the actual allocated size. (If + the allocated size is already SIZE bytes, do nothing.) CRED + identifies the user responsible for the call. Note that this will + only be called for real files, so there is no need to be careful + about the root dir node on FAT12/16. */ +error_t +diskfs_grow (struct node *node, loff_t size, struct protid *cred) +{ + diskfs_check_readonly (); + assert (!diskfs_readonly); + + if (size > node->allocsize) + { + error_t err = 0; + loff_t old_size; + volatile loff_t new_size; + volatile cluster_t end_cluster; + cluster_t new_end_cluster; + struct disknode *dn = node->dn; + + rwlock_writer_lock (&dn->alloc_lock); + + old_size = node->allocsize; + new_size = ((size + bytes_per_cluster - 1) >> log2_bytes_per_cluster) + << log2_bytes_per_cluster; + + /* The first unallocated clusters after the old and new ends of + the file, respectively. */ + end_cluster = old_size >> log2_bytes_per_cluster; + new_end_cluster = new_size >> log2_bytes_per_cluster; + + if (new_end_cluster > end_cluster) + { + err = diskfs_catch_exception (); + while (!err && end_cluster < new_end_cluster) + { + cluster_t disk_cluster; + err = fat_getcluster (node, end_cluster++, 1, &disk_cluster); + } + diskfs_end_catch_exception (); + + if (err) + /* Reflect how much we allocated successfully. */ + new_size = (end_cluster - 1) >> log2_bytes_per_cluster; + } + + STAT_INC (file_grows); + + node->allocsize = new_size; + + rwlock_writer_unlock (&dn->alloc_lock); + + return err; + } + else + return 0; +} + +/* This syncs a single file (NODE) to disk. Wait for all I/O to + complete if WAIT is set. NODE->lock must be held. */ +void +diskfs_file_update (struct node *node, int wait) +{ + struct pager *pager; + + spin_lock (&node_to_page_lock); + pager = node->dn->pager; + if (pager) + ports_port_ref (pager); + spin_unlock (&node_to_page_lock); + + if (pager) + { + pager_sync (pager, wait); + ports_port_deref (pager); + } + + diskfs_node_update (node, wait); +} + +/* Invalidate any pager data associated with NODE. */ +void +flush_node_pager (struct node *node) +{ + struct pager *pager; + struct disknode *dn = node->dn; + + spin_lock (&node_to_page_lock); + pager = dn->pager; + if (pager) + ports_port_ref (pager); + spin_unlock (&node_to_page_lock); + + if (pager) + { + pager_flush (pager, 1); + ports_port_deref (pager); + } +} + +/* Return in *OFFSET and *SIZE the minimum valid address the pager + will accept and the size of the object. */ +inline error_t +pager_report_extent (struct user_pager_info *pager, + vm_address_t *offset, vm_size_t *size) +{ + assert (pager->type == FAT || pager->type == FILE_DATA); + + *offset = 0; + + if (pager->type == FAT) + *size = bytes_per_sector * sectors_per_fat; + else + *size = pager->node->allocsize; + + return 0; +} + +/* This is called when a pager is being deallocated after all extant + send rights have been destroyed. */ +void +pager_clear_user_data (struct user_pager_info *upi) +{ + if (upi->type == FILE_DATA) + { + struct pager *pager; + + spin_lock (&node_to_page_lock); + pager = upi->node->dn->pager; + if (pager && pager_get_upi (pager) == upi) + upi->node->dn->pager = 0; + spin_unlock (&node_to_page_lock); + + diskfs_nrele_light (upi->node); + } + + free (upi); +} + +/* This will be called when the ports library wants to drop weak + references. The pager library creates no weak references itself. + If the user doesn't either, then it's OK for this function to do + nothing. */ +void +pager_dropweak (struct user_pager_info *p __attribute__ ((unused))) +{ +} + +/* Create the disk pager. */ +void +create_fat_pager (void) +{ + struct user_pager_info *upi = malloc (sizeof (struct user_pager_info)); + upi->type = FAT; + pager_bucket = ports_create_bucket (); + diskfs_start_disk_pager (upi, pager_bucket, MAY_CACHE, + bytes_per_sector * sectors_per_fat, + &fat_image); +} + +/* Call this to create a FILE_DATA pager and return a send right. + NODE must be locked. */ +mach_port_t +diskfs_get_filemap (struct node *node, vm_prot_t prot) +{ + mach_port_t right; + + assert (S_ISDIR (node->dn_stat.st_mode) + || S_ISREG (node->dn_stat.st_mode) + || (S_ISLNK (node->dn_stat.st_mode))); + + spin_lock (&node_to_page_lock); + do + { + struct pager *pager = node->dn->pager; + if (pager) + { + /* Because PAGER is not a real reference, this might be + nearly deallocated. If that's so, then the port right + will be null. In that case, clear here and loop. The + deallocation will complete separately. */ + right = pager_get_port (pager); + if (right == MACH_PORT_NULL) + node->dn->pager = 0; + else + pager_get_upi (pager)->max_prot |= prot; + } + else + { + struct user_pager_info *upi = + malloc (sizeof (struct user_pager_info)); + upi->type = FILE_DATA; + upi->node = node; + upi->max_prot = 0; + diskfs_nref_light (node); + node->dn->pager = + pager_create (upi, pager_bucket, MAY_CACHE, + MEMORY_OBJECT_COPY_DELAY); + if (node->dn->pager == 0) + { + diskfs_nrele_light (node); + free (upi); + spin_unlock (&node_to_page_lock); + return MACH_PORT_NULL; + } + + right = pager_get_port (node->dn->pager); + ports_port_deref (node->dn->pager); + } + } + while (right == MACH_PORT_NULL); + spin_unlock (&node_to_page_lock); + + mach_port_insert_right (mach_task_self (), right, right, + MACH_MSG_TYPE_MAKE_SEND); + + return right; +} + +/* Call this when we should turn off caching so that unused memory + object ports get freed. */ +void +drop_pager_softrefs (struct node *node) +{ + struct pager *pager; + + spin_lock (&node_to_page_lock); + pager = node->dn->pager; + if (pager) + ports_port_ref (pager); + spin_unlock (&node_to_page_lock); + + if (MAY_CACHE && pager) + pager_change_attributes (pager, 0, MEMORY_OBJECT_COPY_DELAY, 0); + if (pager) + ports_port_deref (pager); +} + +/* Call this when we should turn on caching because it's no longer + important for unused memory object ports to get freed. */ +void +allow_pager_softrefs (struct node *node) +{ + struct pager *pager; + + spin_lock (&node_to_page_lock); + pager = node->dn->pager; + if (pager) + ports_port_ref (pager); + spin_unlock (&node_to_page_lock); + + if (MAY_CACHE && pager) + pager_change_attributes (pager, 1, MEMORY_OBJECT_COPY_DELAY, 0); + if (pager) + ports_port_deref (pager); +} + +/* Call this to find out the struct pager * corresponding to the + FILE_DATA pager of inode IP. This should be used *only* as a + subsequent argument to register_memory_fault_area, and will be + deleted when the kernel interface is fixed. NODE must be + locked. */ +struct pager * +diskfs_get_filemap_pager_struct (struct node *node) +{ + /* This is safe because pager can't be cleared; there must be an + active mapping for this to be called. */ + return node->dn->pager; +} + +/* Shutdown all the pagers (except the disk pager). */ +void +diskfs_shutdown_pager () +{ + error_t shutdown_one (void *v_p) + { + struct pager *p = v_p; + if (p != diskfs_disk_pager) + pager_shutdown (p); + return 0; + } + + write_all_disknodes (); + + ports_bucket_iterate (pager_bucket, shutdown_one); + + pager_sync (diskfs_disk_pager, 1); + + /* Despite the name of this function, we never actually shutdown the + disk pager, just make sure it's synced. */ +} + +/* Sync all the pagers. */ +void +diskfs_sync_everything (int wait) +{ + error_t sync_one (void *v_p) + { + struct pager *p = v_p; + if (p != diskfs_disk_pager) + pager_sync (p, wait); + return 0; + } + + write_all_disknodes (); + ports_bucket_iterate (pager_bucket, sync_one); + pager_sync (diskfs_disk_pager, wait); +} + +static void +disable_caching () +{ + error_t block_cache (void *arg) + { + struct pager *p = arg; + + pager_change_attributes (p, 0, MEMORY_OBJECT_COPY_DELAY, 1); + return 0; + } + + /* Loop through the pagers and turn off caching one by one, + synchronously. That should cause termination of each pager. */ + ports_bucket_iterate (pager_bucket, block_cache); +} + +static void +enable_caching () +{ + error_t enable_cache (void *arg) + { + struct pager *p = arg; + struct user_pager_info *upi = pager_get_upi (p); + + pager_change_attributes (p, 1, MEMORY_OBJECT_COPY_DELAY, 0); + + /* It's possible that we didn't have caching on before, because + the user here is the only reference to the underlying node + (actually, that's quite likely inside this particular + routine), and if that node has no links. So dinkle the node + ref counting scheme here, which will cause caching to be + turned off, if that's really necessary. */ + if (upi->type == FILE_DATA) + { + diskfs_nref (upi->node); + diskfs_nrele (upi->node); + } + + return 0; + } + + ports_bucket_iterate (pager_bucket, enable_cache); +} + +/* Tell diskfs if there are pagers exported, and if none, then + prevent any new ones from showing up. */ +int +diskfs_pager_users () +{ + int npagers = ports_count_bucket (pager_bucket); + + if (npagers <= 1) + return 0; + + if (MAY_CACHE) + { + disable_caching (); + + /* Give it a second; the kernel doesn't actually shutdown + immediately. XXX */ + sleep (1); + + npagers = ports_count_bucket (pager_bucket); + if (npagers <= 1) + return 0; + + /* Darn, there are actual honest users. Turn caching back on, + and return failure. */ + enable_caching (); + } + + ports_enable_bucket (pager_bucket); + + return 1; +} + +/* Return the bitwise or of the maximum prot parameter (the second arg + to diskfs_get_filemap) for all active user pagers. */ +vm_prot_t +diskfs_max_user_pager_prot () +{ + vm_prot_t max_prot = 0; + int npagers = ports_count_bucket (pager_bucket); + + if (npagers > 1) + /* More than just the disk pager. */ + { + error_t add_pager_max_prot (void *v_p) + { + struct pager *p = v_p; + struct user_pager_info *upi = pager_get_upi (p); + if (upi->type == FILE_DATA) + max_prot |= upi->max_prot; + /* Stop iterating if MAX_PROT is as filled as it is going to + get. */ + return (max_prot + == (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE)) ? 1 : 0; + } + + disable_caching (); /* Make any silly pagers go away. */ + + /* Give it a second; the kernel doesn't actually shutdown + immediately. XXX */ + sleep (1); + + ports_bucket_iterate (pager_bucket, add_pager_max_prot); + + enable_caching (); + } + + ports_enable_bucket (pager_bucket); + + return max_prot; +} diff --git a/fatfs/virt-inode.c b/fatfs/virt-inode.c new file mode 100644 index 00000000..d7c990d6 --- /dev/null +++ b/fatfs/virt-inode.c @@ -0,0 +1,235 @@ +/* Virtual Inode management routines + Copyright (C) 2002 Free Software Foundation, Inc. + Written by Marcus Brinkmann. + + This file is part of the GNU Hurd. + + The GNU Hurd is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + The GNU Hurd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. */ + +/* TODO: Improve NEW by keeping a bitmap of free inodes. + TODO: Improve RLOOKUP by keeping an open hash for keys (need to change + CHANGE and FREE, too). + TODO: Improve FREE by keeping the highest inode in use and keep it + up-to-date. When a table page can be freed, do so. */ + +#include +#include +#include +#include +#include "virt-inode.h" + +/* Each virtual inode contains the UNIQUE key it belongs to, + which must not be zero. */ + +vi_key_t vi_zero_key = {0, 0}; + +struct v_inode +{ + vi_key_t key; +}; + +/* All inodes are stored in a table by their index number - 1. + Decrementing by one is necessary because inode numbers start from 1, + but our table is zero based. */ + +#define LOG2_TABLE_PAGE_SIZE 10 +#define TABLE_PAGE_SIZE (1 << LOG2_TABLE_PAGE_SIZE) + +struct table_page +{ + struct table_page *next; + + struct v_inode vi[TABLE_PAGE_SIZE]; +}; + +struct table_page *inode_table; + +spin_lock_t inode_table_lock = SPIN_LOCK_INITIALIZER; + +/* See vi_new and vi_rlookup. */ +error_t +_vi_new(vi_key_t key, ino_t *inode, inode_t *v_inode) +{ + struct table_page *table = inode_table; + struct table_page *prev_table = 0; + int page = 0; + int offset = 0; + + while (table && memcmp(&vi_zero_key, &table->vi[offset].key, sizeof(vi_key_t))) + { + offset++; + if (offset == TABLE_PAGE_SIZE) + { + offset = 0; + page++; + prev_table = table; + table = table->next; + } + } + + if (table) + { + table->vi[offset].key = key; + /* See above for rationale of increment. */ + *inode = (page << LOG2_TABLE_PAGE_SIZE) + offset + 1; + *v_inode = &table->vi[offset]; + } + else + { + struct table_page **pagep; + + if (prev_table) + pagep = &prev_table->next; + else + pagep = &inode_table; + *pagep = (struct table_page *) malloc (sizeof (struct table_page)); + if (!*pagep) + { + return ENOSPC; + } + memset (*pagep, 0, sizeof (struct table_page)); + (*pagep)->vi[0].key = key; + /* See above for rationale of increment. */ + *inode = (page << LOG2_TABLE_PAGE_SIZE) + 1; + *v_inode = &(*pagep)->vi[0]; + } + + return 0; +} + +/* Allocate a new inode number INODE for KEY and return it as well as + the virtual inode V_INODE. Return 0 on success, otherwise an error + value (ENOSPC). */ +error_t +vi_new(vi_key_t key, ino_t *inode, inode_t *v_inode) +{ + error_t err; + + assert (memcmp(&vi_zero_key, &key, sizeof (vi_key_t))); + + spin_lock (&inode_table_lock); + err = _vi_new(key, inode, v_inode); + spin_unlock (&inode_table_lock); + + return err; +} + +/* Get the key for virtual inode V_INODE. */ +vi_key_t +vi_key(inode_t v_inode) +{ + return v_inode->key; +} + +/* Get the inode V_INODE belonging to inode number INODE. + Returns 0 if this inode number is free. */ +inode_t +vi_lookup(ino_t inode) +{ + struct table_page *table = inode_table; + /* See above for rationale of decrement. */ + int page = (inode - 1) >> LOG2_TABLE_PAGE_SIZE; + int offset = (inode - 1) & (TABLE_PAGE_SIZE - 1); + inode_t v_inode = 0; + + spin_lock (&inode_table_lock); + + while (table && page > 0) + { + page--; + table = table->next; + } + + if (table) + v_inode = &table->vi[offset]; + + spin_unlock (&inode_table_lock); + + return v_inode; +} + +/* Get the inode number and virtual inode belonging to key KEY. + Returns 0 on success and EINVAL if no inode is found for KEY and + CREATE is false. Otherwise, if CREATE is true, allocate new inode. */ +error_t +vi_rlookup(vi_key_t key, ino_t *inode, inode_t *v_inode, int create) +{ + error_t err = 0; + struct table_page *table = inode_table; + int page = 0; + int offset = 0; + + assert (memcmp(&vi_zero_key, &key, sizeof (vi_key_t))); + + spin_lock (&inode_table_lock); + + while (table && memcmp(&table->vi[offset].key, &key, sizeof (vi_key_t))) + { + offset++; + if (offset == TABLE_PAGE_SIZE) + { + offset = 0; + page++; + table = table->next; + } + } + + if (table) + { + /* See above for rationale of increment. */ + *inode = (page << LOG2_TABLE_PAGE_SIZE) + offset + 1; + *v_inode = &table->vi[offset]; + } + else + { + if (create) + err = _vi_new (key, inode, v_inode); + else + err = EINVAL; + } + + spin_unlock (&inode_table_lock); + + return err; +} + +/* Change the key of virtual inode V_INODE to KEY and return the old + key. */ +vi_key_t vi_change(inode_t v_inode, vi_key_t key) +{ + vi_key_t okey = v_inode->key; + + assert (memcmp(&vi_zero_key, &key, sizeof (vi_key_t))); + v_inode->key = key; + return okey; +} + +/* Release virtual inode V_INODE, freeing the inode number. Return + the key. */ +vi_key_t vi_free(inode_t v_inode) +{ + vi_key_t key; + spin_lock (&inode_table_lock); + key = v_inode->key; + v_inode->key = vi_zero_key; + spin_unlock (&inode_table_lock); + return key; +} + + + + + + diff --git a/fatfs/virt-inode.h b/fatfs/virt-inode.h new file mode 100644 index 00000000..5b889d23 --- /dev/null +++ b/fatfs/virt-inode.h @@ -0,0 +1,69 @@ +/* virt-inode.h - Public interface for the virtual inode management routines. + Copyright (C) 2002 Free Software Foundation, Inc. + Written by Marcus Brinkmann. + + This file is part of the GNU Hurd. + + The GNU Hurd is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + The GNU Hurd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. */ + +#ifndef VIRT_INODE_H +#define VIRT_INODE_H + +#include +#include + +/* Define struct vi_key to match your needs. It is passed by copy, + so don't make it too huge. Equality is tested with memcpy, because + C == operator doesn't work on structs. */ + +struct vi_key +{ + ino_t dir_inode; + int dir_offset; +}; + +typedef struct vi_key vi_key_t; + +extern vi_key_t vi_zero_key; + +typedef struct v_inode *inode_t; + +/* Allocate a new inode number INODE for KEY and return it as well as + the virtual inode V_INODE. Return 0 on success, otherwise an error + value (ENOSPC). */ +error_t vi_new(vi_key_t key, ino_t *inode, inode_t *v_inode); + +/* Get the key for virtual inode V_INODE. */ +vi_key_t vi_key(inode_t v_inode); + +/* Get the inode V_INODE belonging to inode number INODE. + Returns 0 if this inode number is free. */ +inode_t vi_lookup(ino_t inode); + +/* Get the inode number and virtual inode belonging to key KEY. + Returns 0 on success and EINVAL if no inode is found for KEY and + CREATE is false. Otherwise, if CREATE is true, allocate a new + inode. */ +error_t vi_rlookup(vi_key_t key, ino_t *inode, inode_t *v_inode, int create); + +/* Change the key of virtual inode V_INODE to KEY and return the old + key. */ +vi_key_t vi_change(inode_t v_inode, vi_key_t key); + +/* Release virtual inode V_INODE, freeing the inode number. Return + the key. */ +vi_key_t vi_free(inode_t v_inode); + +#endif -- cgit v1.2.3