From 8f48e6fa4324fc242af66ab0d49e467f98656f15 Mon Sep 17 00:00:00 2001
From: Marcus Brinkmann <marcus@gnu.org>
Date: Tue, 3 Dec 2002 20:52:59 +0000
Subject: Initial check-in.

---
 fatfs/ChangeLog    |  113 ++++++
 fatfs/Makefile     |   29 ++
 fatfs/dir.c        |  952 ++++++++++++++++++++++++++++++++++++++++++++++++
 fatfs/fat.c        |  744 ++++++++++++++++++++++++++++++++++++++
 fatfs/fat.h        |  403 +++++++++++++++++++++
 fatfs/fatfs.h      |  121 +++++++
 fatfs/inode.c      |  764 +++++++++++++++++++++++++++++++++++++++
 fatfs/main.c       |  265 ++++++++++++++
 fatfs/pager.c      | 1019 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 fatfs/virt-inode.c |  235 ++++++++++++
 fatfs/virt-inode.h |   69 ++++
 11 files changed, 4714 insertions(+)
 create mode 100644 fatfs/ChangeLog
 create mode 100644 fatfs/Makefile
 create mode 100644 fatfs/dir.c
 create mode 100644 fatfs/fat.c
 create mode 100644 fatfs/fat.h
 create mode 100644 fatfs/fatfs.h
 create mode 100644 fatfs/inode.c
 create mode 100644 fatfs/main.c
 create mode 100644 fatfs/pager.c
 create mode 100644 fatfs/virt-inode.c
 create mode 100644 fatfs/virt-inode.h

diff --git a/fatfs/ChangeLog b/fatfs/ChangeLog
new file mode 100644
index 00000000..0de81963
--- /dev/null
+++ b/fatfs/ChangeLog
@@ -0,0 +1,113 @@
+2002-10-06  Marcus Brinkmann  <marcus@gnu.org>
+
+	* main.c (diskfs_server_version): Set to HURD_VERSION.
+
+	* dir.c (dirscanblock): Always emulate "." and ".." for the root
+	dir, even on FAT32.
+	(diskfs_get_directs): Likewise.  New variable DOTDOT.  Set EP to
+	DOT or DOTDOT when approriate.  Implement inode number generation
+	with vi_rlookup.
+	Submitted by Marco Gerards <mg@friaco.nl>.
+
+	* main.c (fetch_root): Implement this for FAT32.
+	Submitted by Marco Gerards <mg@friaco.nl>.
+
+	* fatfs.h: Renamed prototype from fat_next_cluster to
+	fat_get_next_cluster.
+	* fat.c (fat_extend_chain): Replace CLUSTERS_PER_CHAIN with
+	CLUSTERS_PER_TABLE and LOG2_CLUSTERS_PER_CHAIN with
+	LOG2_CLUSTERS_PER_TABLE.
+	(fat_getcluster): Likewise.
+	(fat_truncate_node): Likewise.
+	Submitted by Marco Gerards <mg@friaco.nl>.
+
+	* fat.c (fat_get_freespace): New function.
+	* fat.h: New prototype for fat_get_freespace.
+	* inode.c (diskfs_set_statfs): Set ST->f_bfree and ST->f_bavail.
+	Submitted by Marco Gerards <mg@friaco.nl>.
+
+	* fatfs.h: New prototypes for fs_uid and fs_gid.
+	New prototype for refresh_node_stats.
+	* inode.c (read_node): Set ST->st_uid to fs_uid and ST->st_gid to
+	fs_gid.
+	(refresh_node_stats): New function.
+	* main.c: New variables default_fs_uid, default_fs_gid, fs_uid, fs_gid.
+	Include <argz.h>
+	(options): New variable.
+	(startup_children): Likewise.
+	(startup_argp): Likewise.
+	(runtime_children): Likewise.
+	(runtime_argp): Likewise.
+	(diskfs_runtime_argp): Likewise.
+	(parse_opt): New function.
+	(main): Set default_fs_uid, default_fs_gid, fs_uid and fs_gid.
+	Pass startup_argp to diskfs_init_main.
+	Submitted by Marco Gerards <mg@friaco.nl>.
+
+	* main.c (read_sblock): Move to ...
+	* fat.c (read_sblock): ... here and rename to fat_read_sblock.  New
+	variable READ.  Don't use disk_image but access store directly.
+	(fat_read_sblock): Remove check for large clusters.
+	* fat.c: Include <time.h>
+	Submitted by Marco Gerards <mg@friaco.nl>.
+	
+	* fatfs.h (struct user_pager_info): Rename DISK in enum pager_type
+	to FAT.
+	(disk_image): Rename to fat_image and make extern.
+	(host_name, mounted_on): Remove cruft.
+	(sblock, dr_root_node): Make extern.
+	* pager.c (create_disk_pager): Rename to create_fat_pager.  Create
+	the pager with the type FAT and size of the FAT, not the whole
+	disk.  Use fat_image instead disk_image.
+	(pager_read_page): Replace DISK with FAT and call
+	fat_pager_read_page instead disk_pager_read_page.
+	(pager_write_page): Replace DISK with FAT and call
+	fat_pager_write_page instead disk_pager_write_page.
+	(pager_report_extent): Replace DISK with FAT and return new size.
+	(disk_pager_read_page): Rename to fat_pager_read_page.  Adjusted
+	to restrict to new size.  Use memset instead bcopy.  dev_end
+	renamed to fat_end.  Add beginning of FAT to page.
+	(disk_pager_write_page): Rename to fat_pager_write_page.  Adjusted
+	to restrict to new size.  dev_end renamed to fat_end.  Add
+	beginning to FAT to PAGE.
+	(file_pager_read_huge_page): Use memcpy instead bcopy.
+	(pending_clusters_write): Likewise.
+	* fat.c (fat_write_next_cluster): Don't add the beginning of FAT
+	to FAT_ENTRY_OFFSET.  Reflect renaming of disk_image to fat_image.
+	(fat_get_next_cluster): Likewise.
+	* main.c (main): Call fat_read_sblock, not read_block.  Call
+	create_fat_pager instead create_disk_pager, and do this only after
+	reading the superblock.
+
+	* main.c (fetch_root): Use memset, not bzero.
+	(read_sblock): Use memcpy, not bcopy.
+	* dir.c (diskfs_get_directs): Replace bcopy with memcpy.
+
+2002-04-15  Marcus Brinkmann  <marcus@gnu.org>
+
+	* dir.c (diskfs_get_directs): Count the special cased DOT and
+	DOTDOT directories for root directory nodes in FAT12 and FAT16
+	file systems.
+
+	* dir.c (dirscanblock): Determine the parent inode correctly.
+	Submitted by Marco Gerards <mg@friaco.nl>.
+
+	* inode.c (read_node): Add VK.dir_offset to BUF before doing the
+	pointer conversion.
+	(write_node): Likewise.
+	Submitted by Marco Gerards <mg@friaco.nl>.
+	
+	* inode.c (write_node): Save the modification time as write time.
+	Submitted by Marco Gerards <mg@friaco.nl>.
+
+	* fat.h: Add prototype for fat_from_epoch().
+	Submitted by Marco Gerards <mg@friaco.nl>.
+
+	* inode.c (read_node): For the root dir of a FAT12/16 file system,
+	set allocsize to size of the root directory region.
+	(read_node): For files, round up to a full cluster multiple.
+	Reported by Marco Gerards <mg@friaco.nl>.
+
+2000-05-05  Marcus Brinkmann  <marcus@gnu.org>
+
+	* Initial release.
diff --git a/fatfs/Makefile b/fatfs/Makefile
new file mode 100644
index 00000000..61b89302
--- /dev/null
+++ b/fatfs/Makefile
@@ -0,0 +1,29 @@
+#   Copyright (C) 1997 Free Software Foundation
+#   Modified by Marcus Brinkmann, 2000-05-05
+#
+#   This program is free software; you can redistribute it and/or
+#   modify it under the terms of the GNU General Public License as
+#   published by the Free Software Foundation; either version 2, or (at
+#   your option) any later version.
+#
+#   This program is distributed in the hope that it will be useful, but
+#   WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+#   General Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License
+#   along with this program; if not, write to the Free Software
+#   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+dir := fatfs
+makemode := server
+
+target = fatfs
+SRCS = inode.c main.c dir.c pager.c fat.c virt-inode.c
+LCLHDRS = fat.h fatfs.h virt-inode.h
+DIST_FILES = EXTENSIONS
+
+OBJS = $(SRCS:.c=.o)
+HURDLIBS = diskfs iohelp fshelp store pager ports threads ihash shouldbeinlibc
+
+include ../Makeconf
diff --git a/fatfs/dir.c b/fatfs/dir.c
new file mode 100644
index 00000000..9ef76c49
--- /dev/null
+++ b/fatfs/dir.c
@@ -0,0 +1,952 @@
+/* main.c - FAT filesystem.
+   Copyright (C) 1997, 1998, 1999, 2002 Free Software Foundation, Inc.
+   Written by Thomas Bushnell, n/BSG and Marcus Brinkmann.
+
+   This file is part of the GNU Hurd.
+
+   The GNU Hurd is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   The GNU Hurd is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA. */
+
+#include <ctype.h>
+#include <string.h>
+#include <dirent.h>
+#include "fatfs.h"
+
+/* The size of a directory block is usually just the cluster size.
+   However, the root directory of FAT12/16 file systems is stored in
+   sectors in a special region, so we settle on the greatest common
+   divisor here.  */
+#define DIRBLKSIZ bytes_per_sector
+#define LOG2_DIRBLKSIZ log2_bytes_per_sector
+
+enum slot_status
+{
+  /* This means we haven't yet found room for a new entry.  */
+  LOOKING,
+
+  /* This means that the specified entry is free and should be used. */
+  TAKE,
+
+  /* This means that the specified entry has enough room at the end
+     to hold the new entry. */
+  SHRINK,
+
+  /* This means that there is enough space in the block, but not in
+     any one single entry, so they all have to be shifted to make
+     room.  */
+  COMPRESS,
+
+  /* This means that the directory will have to be grown to hold the
+     entry. */
+  EXTEND,
+
+  /* For removal and rename, this means that this is the location
+     of the entry found.  */
+  HERE_TIS,
+};
+
+struct dirstat
+{
+  /* Type of followp operation expected.  */
+  enum lookup_type type;
+
+  /* One of the statuses above.  */
+  enum slot_status stat;
+
+  /* Mapped address and length of directory.  */
+  vm_address_t mapbuf;
+  vm_size_t mapextent;
+
+  /* Index of this directory block.  */
+  int idx;
+
+  /* For stat COMPRESS, this is the address (inside mapbuf)
+     of the first direct in the directory block to be compressed.  */
+  /* For stat HERE_TIS, SHRINK, and TAKE, this is the entry referenced.  */
+  struct dirrect *entry;
+
+  /* For stat HERE_TIS, type REMOVE, this is the address of the immediately
+     previous direct in this directory block, or zero if this is the first.  */
+  struct dirrect *preventry;
+
+  /* For stat COMPRESS, this is the number of bytes needed to be copied
+     in order to undertake the compression.  */
+  size_t nbytes;
+};
+
+const size_t diskfs_dirstat_size = sizeof (struct dirstat);
+
+/* Initialize DS such that diskfs_drop_dirstat will ignore it.  */
+void
+diskfs_null_dirstat (struct dirstat *ds)
+{
+  ds->type = LOOKUP;
+}
+
+/* Forward declaration.  */
+static error_t
+dirscanblock (vm_address_t blockoff, struct node *dp, int idx,
+              const char *name, int namelen, enum lookup_type type,
+              struct dirstat *ds, ino_t *inum);
+
+static int
+fatnamematch (const char *dirname, const char *username, size_t unamelen)
+{
+  char *dn = strdup(dirname);
+  int dpos = 0;
+  int upos = 0;
+  int ext = 0;
+
+  /* Deleted files. */
+  if (dn[0] == FAT_DIR_NAME_DELETED || dn[0] == FAT_DIR_NAME_LAST)
+    return 0;
+  if (dn[0] == FAT_DIR_NAME_REPLACE_DELETED)
+    dn[0] = FAT_DIR_NAME_DELETED;
+
+  /* Special representations for `.' and `..'.  */
+  if (!memcmp(dn, FAT_DIR_NAME_DOT, 11))
+    return unamelen == 1 && username[0] == '.';
+
+  if (!memcmp (dn, FAT_DIR_NAME_DOTDOT, 11))
+    return unamelen == 2 && username[0] == '.' && username[1] == '.';
+
+  if (unamelen > 12)
+    return 0;
+
+  do
+    {
+      /* First check if we have reached the extension without coming
+	 across blanks. */
+      if (dpos == 8 && !ext)
+	{
+	  if (username[upos] == '.')
+	    {
+	      upos++;
+	      ext = 1;
+	    }
+	  else
+	    break;
+	}
+      /* Second, skip blanks in base part.  */
+      if (dn[dpos] == ' ')
+	{
+	  if (ext)
+	    break;
+	  while (dpos < 8 && dn[++dpos] == ' ');
+	  if (username[upos] == '.')
+	    upos++;
+	  ext = 1;
+	}
+      else
+	{
+	  if (tolower(dn[dpos]) == tolower(username[upos]))
+	    {
+	      dpos++;
+	      upos++;
+	    }
+	  else
+	    break;
+	}
+    } while (upos < unamelen && dpos < 11);
+  while (dpos < 11 && dn[dpos] == ' ')
+    dpos++;
+  return (upos == unamelen && dpos == 11);
+}
+
+/* Implement the diskfs_lookup callback from the diskfs library.  See
+   <hurd/diskfs.h> for the interface specification.  */
+error_t
+diskfs_lookup_hard (struct node *dp, const char *name, enum lookup_type type,
+		    struct node **npp, struct dirstat *ds, struct protid *cred)
+{
+  error_t err;
+  ino_t inum;
+  int namelen;
+  int spec_dotdot;
+  struct node *np = 0;
+  int retry_dotdot = 0;
+  vm_prot_t prot =
+    (type == LOOKUP) ? VM_PROT_READ : (VM_PROT_READ | VM_PROT_WRITE);
+  memory_object_t memobj;
+  vm_address_t buf = 0;
+  vm_size_t buflen = 0;
+  int blockaddr;
+  int idx, lastidx;
+  int looped;
+
+  if ((type == REMOVE) || (type == RENAME))
+    assert (npp);
+
+  if (npp)
+    *npp = 0;
+
+  spec_dotdot = type & SPEC_DOTDOT;
+  type &= ~SPEC_DOTDOT;
+
+  namelen = strlen (name);
+
+  if (namelen > FAT_NAME_MAX)
+    return ENAMETOOLONG;
+  
+ try_again:
+  if (ds)
+    {
+      ds->type = LOOKUP;
+      ds->mapbuf = 0;
+      ds->mapextent = 0;
+    }
+  if (buf)
+    {
+      munmap ((caddr_t) buf, buflen);
+      buf = 0;
+    }
+  if (ds && (type == CREATE || type == RENAME))
+    ds->stat = LOOKING;
+
+  /* Map in the directory contents. */
+  memobj = diskfs_get_filemap (dp, prot);
+
+  if (memobj == MACH_PORT_NULL)
+    return errno;
+
+  buf = 0;
+  /* We allow extra space in case we have to do an EXTEND.  */
+  buflen = round_page (dp->dn_stat.st_size + DIRBLKSIZ);
+  err = vm_map (mach_task_self (),
+                &buf, buflen, 0, 1, memobj, 0, 0, prot, prot, 0);
+  mach_port_deallocate (mach_task_self (), memobj);
+
+  inum = 0;
+
+  if (!diskfs_check_readonly ())
+    dp->dn_set_atime = 1;
+
+  /* Start the lookup at DP->dn->dir_idx.  */
+  idx = dp->dn->dir_idx;
+  if (idx << LOG2_DIRBLKSIZ > dp->dn_stat.st_size)
+    idx = 0;                    /* just in case */
+  blockaddr = buf + (idx << LOG2_DIRBLKSIZ);
+  looped = (idx == 0);
+  lastidx = idx;
+  if (lastidx == 0)
+    lastidx = dp->dn_stat.st_size >> LOG2_DIRBLKSIZ;
+
+  while (!looped || idx < lastidx)
+    {
+      err = dirscanblock (blockaddr, dp, idx, name, namelen, type, ds, &inum);
+      if (!err)
+        {
+          dp->dn->dir_idx = idx;
+          break;
+        }
+      if (err != ENOENT)
+        {
+          munmap ((caddr_t) buf, buflen);
+          return err;
+        }
+
+      blockaddr += DIRBLKSIZ;
+      idx++;
+      if (blockaddr - buf >= dp->dn_stat.st_size && !looped)
+        {
+          /* We've gotten to the end; start back at the beginning.  */
+          looped = 1;
+          blockaddr = buf;
+          idx = 0;
+        }
+    }
+
+  if (!diskfs_check_readonly ())
+    dp->dn_set_atime = 1;
+  if (diskfs_synchronous)
+    diskfs_node_update (dp, 1);
+
+  /* If err is set here, it's ENOENT, and we don't want to
+     think about that as an error yet.  */
+  err = 0;
+
+  if (inum && npp)
+    {
+      if (namelen != 2 || name[0] != '.' || name[1] != '.')
+        {
+          if (inum == dp->cache_id)
+            {
+              np = dp;
+              diskfs_nref (np);
+            }
+          else
+            {
+              err = diskfs_cached_lookup_in_dirbuf (inum, &np, buf);
+              if (err)
+                goto out;
+            }
+        }
+
+      /* We are looking up "..".  */
+      /* Check to see if this is the root of the filesystem.  */
+      else if (dp == diskfs_root_node)
+        {
+          err = EAGAIN;
+          goto out;
+        }
+
+      /* We can't just do diskfs_cached_lookup, because we would then
+         deadlock.  So we do this.  Ick.  */
+      else if (retry_dotdot)
+        {
+          /* Check to see that we got the same answer as last time.  */
+          if (inum != retry_dotdot)
+            {
+              /* Drop what we *thought* was .. (but isn't any more) and
+                 try *again*.  */
+              diskfs_nput (np);
+              mutex_unlock (&dp->lock);
+              err = diskfs_cached_lookup_in_dirbuf (inum, &np, buf);
+              mutex_lock (&dp->lock);
+              if (err)
+                goto out;
+              retry_dotdot = inum;
+              goto try_again;
+            }
+          /* Otherwise, we got it fine and np is already set properly.  */
+        }
+      else if (!spec_dotdot)
+        {
+          /* Lock them in the proper order, and then
+             repeat the directory scan to see if this is still
+             right.  */
+          mutex_unlock (&dp->lock);
+          err = diskfs_cached_lookup_in_dirbuf (inum, &np, buf);
+          mutex_lock (&dp->lock);
+          if (err)
+            goto out;
+          retry_dotdot = inum;
+          goto try_again;
+        }
+
+      /* Here below are the spec dotdot cases.  */
+      else if (type == RENAME || type == REMOVE)
+        np = ifind (inum);
+
+      else if (type == LOOKUP)
+        {
+          diskfs_nput (dp);
+          err = diskfs_cached_lookup_in_dirbuf (inum, &np, buf);
+          if (err)
+            goto out;
+        }
+      else
+        assert (0);
+    }
+
+  if ((type == CREATE || type == RENAME) && !inum && ds && ds->stat == LOOKING)
+    {
+      /* We didn't find any room, so mark ds to extend the dir.  */
+      ds->type = CREATE;
+      ds->stat = EXTEND;
+      ds->idx = dp->dn_stat.st_size >> LOG2_DIRBLKSIZ;
+    }
+
+  /* Return to the user; if we can't, release the reference
+     (and lock) we acquired above.  */
+ out:
+  /* Deallocate or save the mapping.  */
+  if ((err && err != ENOENT)
+      || !ds
+      || ds->type == LOOKUP)
+    {
+      munmap ((caddr_t) buf, buflen);
+      if (ds)
+        ds->type = LOOKUP;      /* Set to be ignored by drop_dirstat.  */
+    }
+  else
+    {
+      ds->mapbuf = buf;
+      ds->mapextent = buflen;
+    }
+
+  if (np)
+    {
+      assert (npp);
+      if (err)
+        {
+          if (!spec_dotdot)
+            {
+              /* Normal case.  */
+              if (np == dp)
+                diskfs_nrele (np);
+              else
+                diskfs_nput (np);
+            }
+          else if (type == RENAME || type == REMOVE)
+            /* We just did ifind to get np; that allocates
+               no new references, so we don't have anything to do.  */
+            ;
+          else if (type == LOOKUP)
+            /* We did diskfs_cached_lookup.  */
+            diskfs_nput (np);
+        }
+      else
+        *npp = np;
+    }
+
+  return err ? : inum ? 0 : ENOENT;
+}
+
+/* Scan block at address BLKADDR (of node DP; block index IDX), for
+   name NAME of length NAMELEN.  Args TYPE, DS are as for
+   diskfs_lookup.  If found, set *INUM to the inode number, else
+   return ENOENT.  */
+static error_t
+dirscanblock (vm_address_t blockaddr, struct node *dp, int idx,
+              const char *name, int namelen, enum lookup_type type,
+              struct dirstat *ds, ino_t *inum)
+{
+  int nfree = 0;
+  int needed = 0;
+  vm_address_t currentoff, prevoff = 0;
+  struct dirrect *entry = 0;
+  size_t nbytes = 0;
+  int looking = 0;
+  int countcopies = 0;
+  int consider_compress = 0;
+  inode_t inode;
+  vi_key_t entry_key = vi_zero_key;
+
+  /* FAT lacks the "." and ".." directory record in the root directory,
+     so we emulate them here.  */
+  if (idx == 0 && dp == diskfs_root_node
+      && (fatnamematch (FAT_DIR_NAME_DOT, name, namelen)
+	  || fatnamematch (FAT_DIR_NAME_DOTDOT, name, namelen)))
+    {
+      entry_key.dir_inode = diskfs_root_node->cache_id;
+      currentoff = blockaddr;
+    }
+  else
+    {
+      if (ds && (ds->stat == LOOKING
+		 || ds->stat == COMPRESS))
+	{
+	  looking = 1;
+	  countcopies = 1;
+	  needed = FAT_DIR_RECORDS (namelen);
+	}
+      
+      for (currentoff = blockaddr, prevoff = 0;
+	   currentoff < blockaddr + DIRBLKSIZ;
+	   prevoff = currentoff, currentoff += FAT_DIR_REC_LEN)
+	{
+	  entry = (struct dirrect *)currentoff;
+	  
+	  if (looking || countcopies)
+	    {
+	      int thisfree;
+	      
+	      /* Count how much free space this entry has in it.  */
+	      if ((char) entry->name[0] == FAT_DIR_NAME_LAST ||
+		  (char) entry->name[0] == FAT_DIR_NAME_DELETED)
+		thisfree = FAT_DIR_REC_LEN;
+	      else
+		thisfree = 0;
+	      
+	      /* If this isn't at the front of the block, then it will
+		 have to be copied if we do a compression; count the
+		 number of bytes there too.  */
+	      if (countcopies && currentoff != blockaddr)
+		nbytes += FAT_DIR_REC_LEN;
+	      
+	      if (ds->stat == COMPRESS && nbytes > ds->nbytes)
+		/* The previously found compress is better than this
+		   one, so don't bother counting any more.  */
+		countcopies = 0;
+	      
+	      if (thisfree >= needed)
+		{
+		  ds->type = CREATE;
+		  ds->stat = TAKE;
+		  ds->entry = entry;
+		  ds->idx = idx;
+		  looking = countcopies = 0;
+		}
+	      else
+		{
+		  nfree += thisfree;
+		  if (nfree >= needed)
+		    consider_compress = 1;
+		}
+	    }
+	  
+	  if (entry->attribute & FAT_DIR_ATTR_LABEL)
+	    /* Either the volume label in root dir or a long filename
+	       component.  */
+	    continue;
+	  
+	  if (fatnamematch (entry->name, name, namelen))
+	    break;
+	}
+
+      if (consider_compress
+	  && (ds->type == LOOKING
+	      || (ds->type == COMPRESS && ds->nbytes > nbytes)))
+	{
+	  ds->type = CREATE;
+	  ds->stat = COMPRESS;
+	  ds->entry = (struct dirrect *) blockaddr;
+	  ds->idx = idx;
+	  ds->nbytes = nbytes;
+	}
+    }
+  
+  if (currentoff >= blockaddr + DIRBLKSIZ)
+    {
+      /* The name is not in this block.  */
+
+      return ENOENT;
+    }
+
+  /* We have found the required name.  */
+
+  if (ds && type == CREATE)
+    ds->type = LOOKUP;          /* It's invalid now.  */
+  else if (ds && (type == REMOVE || type == RENAME))
+    {
+      ds->type = type;
+      ds->stat = HERE_TIS;
+      ds->entry = entry;
+      ds->idx = idx;
+      ds->preventry = (struct dirrect *) prevoff;
+    }
+
+  if (entry_key.dir_inode)
+    {
+      /* The required name is "." or ".." in the root dir.  */
+      *inum = entry_key.dir_inode;
+    }
+  else if ((entry->attribute & FAT_DIR_ATTR_DIR)
+	   && !memcmp (entry->name, FAT_DIR_NAME_DOT, 11))
+    {
+      /* "." and ".." have to be treated special. We don't want their
+	 directory records, but the records of the directories they
+	 point to.  */
+      
+      *inum = dp->cache_id;
+    }
+  else if ((entry->attribute & FAT_DIR_ATTR_DIR)
+	   && !memcmp (entry->name, FAT_DIR_NAME_DOTDOT, 11))
+    {
+      if (entry->first_cluster_low[0] == 0
+	  && entry->first_cluster_low[1] == 0
+	  && entry->first_cluster_high[0] == 0
+	  && entry->first_cluster_high[1] == 0)
+	{
+	  *inum = diskfs_root_node->cache_id;
+	}
+      else
+	{
+	  struct vi_key vk = vi_key (dp->dn->inode);
+	  *inum = vk.dir_inode;
+	}
+    }
+  else
+    {
+      entry_key.dir_inode = dp->cache_id;
+      entry_key.dir_offset = (currentoff - blockaddr) + (idx << LOG2_DIRBLKSIZ);
+      return vi_rlookup(entry_key, inum, &inode, 1);
+    }
+  return 0;
+}
+
+/* Following a lookup call for CREATE, this adds a node to a
+   directory.  DP is the directory to be modified; NAME is the name to
+   be entered; NP is the node being linked in; DS is the cached
+   information returned by lookup; CRED describes the user making the
+   call.  This call may only be made if the directory has been held
+   locked continuously since the preceding lookup call, and only if
+   that call returned ENOENT.  */
+error_t
+diskfs_direnter_hard (struct node *dp, const char *name, struct node *np,
+                      struct dirstat *ds, struct protid *cred)
+{
+  struct dirrect *new;
+  int namelen = strlen (name);
+  int needed = FAT_DIR_RECORDS (namelen);
+  error_t err;
+  loff_t oldsize = 0;
+
+  assert (ds->type == CREATE);
+
+  assert (!diskfs_readonly);
+
+  dp->dn_set_mtime = 1;
+
+  /* Select a location for the new directory entry.  Each branch of
+     this switch is responsible for setting NEW to point to the
+     on-disk directory entry being written.  */
+
+  switch (ds->stat)
+    {
+    case TAKE:
+      /* We are supposed to consume this slot.  */
+      assert ((char)ds->entry->name[0] == FAT_DIR_NAME_LAST
+	      || (char)ds->entry->name[0] == FAT_DIR_NAME_DELETED);
+
+      new = ds->entry;
+      break;
+
+    case EXTEND:
+      /* Extend the file.  */
+      assert (needed <= bytes_per_cluster);
+
+      oldsize = dp->dn_stat.st_size;
+      while (oldsize + bytes_per_cluster > dp->allocsize)
+        {
+          err = diskfs_grow (dp, oldsize + bytes_per_cluster, cred);
+          if (err)
+            {
+              munmap ((caddr_t) ds->mapbuf, ds->mapextent);
+              return err;
+            }
+        }
+      
+      new = (struct dirrect *) ((char *) ds->mapbuf + oldsize);
+
+      dp->dn_stat.st_size = oldsize + bytes_per_cluster;
+      dp->dn_set_ctime = 1;
+
+      break;
+
+    case SHRINK:
+    case COMPRESS:
+    default:
+      assert(0);
+
+      /* COMPRESS will be used later, with long filenames, but shrink
+	 does not make sense on fat, as all entries have fixed
+	 size.  */
+    }
+
+  /* NEW points to the directory entry being written.  Now fill in the
+     data.  */
+
+  memcpy (new->name, "           ", 11);
+  memcpy (new->name, name, namelen % 11); /* XXX */
+
+  /* XXX We need to do much, much more here.  */
+  /* XXX What about creating . and .. for dirs?  */
+
+  /* Mark the directory inode has having been written.  */
+  dp->dn_set_mtime = 1;
+
+  munmap ((caddr_t) ds->mapbuf, ds->mapextent);
+
+  diskfs_file_update (dp, 1);
+
+  return 0;
+}
+
+/* Following a lookup call for REMOVE, this removes the link from the
+   directory.  DP is the directory being changed and DS is the cached
+   information returned from lookup.  This call is only valid if the
+   directory has been locked continously since the call to lookup, and
+   only if that call succeeded.  */
+error_t
+diskfs_dirremove_hard (struct node *dp, struct dirstat *ds)
+{
+  assert (ds->type == REMOVE);
+  assert (ds->stat == HERE_TIS);
+
+  assert (!diskfs_readonly);
+
+  dp->dn_set_mtime = 1;
+
+  ds->entry->name[0] = FAT_DIR_NAME_DELETED;
+
+  /* XXX Do something with dirrect? inode?  */
+
+  dp->dn_set_mtime = 1;
+
+  munmap ((caddr_t) ds->mapbuf, ds->mapextent);
+
+  diskfs_file_update (dp, 1);
+
+  return 0;
+}
+
+/* Following a lookup call for RENAME, this changes the inode number
+   on a directory entry.  DP is the directory being changed; NP is the
+   new node being linked in; DP is the cached information returned by
+   lookup.  This call is only valid if the directory has been locked
+   continuously since the call to lookup, and only if that call
+   succeeded.  */
+error_t
+diskfs_dirrewrite_hard (struct node *dp, struct node *np, struct dirstat *ds)
+{
+  assert (ds->type == RENAME);
+  assert (ds->stat == HERE_TIS);
+
+  assert (!diskfs_readonly);
+
+  /* XXX We have to reimplement rename completely.  */
+  /*
+    ds->entry->inode = np->cache_id;
+  */
+  dp->dn_set_mtime = 1;
+ 
+  munmap ((caddr_t) ds->mapbuf, ds->mapextent);
+
+  diskfs_file_update (dp, 1);
+
+  return 0;
+}
+
+/* Tell if DP is an empty directory (has only "." and ".." entries).
+   This routine must be called from inside a catch_exception ().  */
+int
+diskfs_dirempty (struct node *dp, struct protid *cred)
+{
+  error_t err;
+  vm_address_t buf = 0, curoff;
+  struct dirrect *entry;
+  int hit = 0;                  /* Found something in the directory.  */
+  memory_object_t memobj = diskfs_get_filemap (dp, VM_PROT_READ);
+
+  if (memobj == MACH_PORT_NULL)
+    /* XXX should reflect error properly.  */
+    return 0;
+
+  err = vm_map (mach_task_self (), &buf, dp->dn_stat.st_size, 0,
+                1, memobj, 0, 0, VM_PROT_READ, VM_PROT_READ, 0);
+  mach_port_deallocate (mach_task_self (), memobj);
+  assert (!err);
+
+  if (! diskfs_check_readonly ())
+    dp->dn_set_atime = 1;
+
+  for (curoff = buf;
+       !hit && curoff < buf + dp->dn_stat.st_size;
+       curoff += FAT_DIR_REC_LEN)
+    {
+      entry = (struct dirrect *) curoff;
+
+      if (entry->name[0] == FAT_DIR_NAME_LAST)
+	break;
+      if (!entry->name[0] == FAT_DIR_NAME_DELETED
+	  && memcmp (entry->name, FAT_DIR_NAME_DOT, 11)
+	  && memcmp (entry->name, FAT_DIR_NAME_DOTDOT, 11))
+	hit = 1;
+    }
+  
+  if (! diskfs_check_readonly ())
+    dp->dn_set_atime = 1;
+  if (diskfs_synchronous)
+    diskfs_node_update (dp, 1);
+
+  munmap ((caddr_t) buf, dp->dn_stat.st_size);
+
+  return !hit;
+}
+
+/* Make DS an invalid dirstat.  */
+error_t
+diskfs_drop_dirstat (struct node *dp, struct dirstat *ds)
+{
+  if (ds->type != LOOKUP)
+    {
+      assert (ds->mapbuf);
+      munmap ((caddr_t) ds->mapbuf, ds->mapextent);
+      ds->type = LOOKUP;
+    }
+  return 0;
+}
+
+
+/* Implement the diskfs_get_directs callback as described in
+   <hurd/diskfs.h>.  */
+error_t
+diskfs_get_directs (struct node *dp,
+		    int entry,
+		    int nentries,
+		    char **data,
+		    u_int *datacnt,
+		    vm_size_t bufsiz,
+		    int *amt)
+{
+  volatile vm_size_t allocsize;
+  struct dirrect *ep;
+  struct dirent *userp;
+  int i;
+  char *datap;
+  volatile int ouralloc = 0;
+  error_t err;
+  vm_prot_t prot = VM_PROT_READ;
+  memory_object_t memobj;
+  vm_address_t buf = 0, bufp;
+  vm_size_t buflen = 0;
+
+  /* Allocate some space to hold the returned data.  */
+  allocsize = bufsiz ? round_page (bufsiz) : vm_page_size * 4;
+  if (allocsize > *datacnt)
+    {
+      *data = mmap (0, allocsize, PROT_READ|PROT_WRITE, MAP_ANON, 0, 0);
+      ouralloc = 1;
+    }
+
+  /* Map in the directory contents.  */
+  memobj = diskfs_get_filemap (dp, prot);
+
+  if (memobj == MACH_PORT_NULL)
+    return errno;
+
+  /* We allow extra space in case we have to do an EXTEND.  */
+  buflen = round_page (dp->dn_stat.st_size);
+  err = vm_map (mach_task_self (),
+                &buf, buflen, 0, 1, memobj, 0, 0, prot, prot, 0);
+  mach_port_deallocate (mach_task_self (), memobj);
+
+  bufp = buf;
+  for (i = 0; i < entry; i ++)
+    {
+      /* The root directory in FAT file systems doesn't contain
+	 entries for DOT and DOTDOT, they are special cased below.  */
+      if (dp == diskfs_root_node && i < 2)
+	continue;
+
+      ep = (struct dirrect *) bufp;
+
+      if (bufp >= buf + buflen || (char)ep->name[0] == FAT_DIR_NAME_LAST)
+	{
+	  /* Not that many entries in the directory; return nothing.  */
+	  if (allocsize > *datacnt)
+	    munmap (data, allocsize);
+	  munmap ((caddr_t) buf, buflen);
+	  *datacnt = 0;
+	  *amt = 0;
+	  return 0;
+	}
+
+      /* Ignore and skip deleted and label entries (catches also long
+	 filenames).  */
+      if ((char)ep->name[0] == FAT_DIR_NAME_DELETED
+	  || (ep->attribute & FAT_DIR_ATTR_LABEL))
+	  i--;
+      bufp = bufp + FAT_DIR_REC_LEN;
+    }
+
+  /* Now copy entries one at a time.  */
+  i = 0;
+  datap = *data;
+  while (((nentries == -1) || (i < nentries))
+	 && (!bufsiz || datap - *data < bufsiz)
+	 && bufp < buf + buflen)
+    {
+      char name[13];
+      size_t namlen, reclen;
+      struct dirrect dot = { FAT_DIR_NAME_DOT, FAT_DIR_ATTR_DIR };
+      struct dirrect dotdot = { FAT_DIR_NAME_DOTDOT, FAT_DIR_ATTR_DIR };
+
+      /* The root directory in FAT file systems doesn't contain
+	 entries for DOT and DOTDOT, they are special cased below.  */
+      if (dp == diskfs_root_node && i < 2)
+	{
+	  if (i == 0)
+	    ep = &dot;
+	  else
+	    ep = &dotdot;
+	}
+      else
+	ep = (struct dirrect *) bufp;
+
+      if ((char)ep->name[0] == FAT_DIR_NAME_LAST)
+	{
+	  /* Last entry.  */
+	  bufp = buf + buflen;
+	  continue;
+	}
+
+      if ((char)ep->name[0] == FAT_DIR_NAME_DELETED || (ep->attribute & FAT_DIR_ATTR_LABEL))
+	{
+	  bufp = bufp + FAT_DIR_REC_LEN;
+  	  continue;
+	}
+
+      /* See if there's room to hold this one.  */
+      
+      fat_to_unix_filename(ep->name, name);
+      namlen = strlen(name);
+
+      /* Perhaps downcase it?  */
+
+      reclen = sizeof (struct dirent) + namlen;
+      reclen = (reclen + 3) & ~3;
+
+      /* Expand buffer if necessary.  */
+      if (datap - *data + reclen > allocsize)
+	{
+	  vm_address_t newdata;
+
+	  vm_allocate (mach_task_self (), &newdata,
+		       (ouralloc
+			? (allocsize *= 2)
+			: (allocsize = vm_page_size * 2)), 1);
+	  memcpy ((void *) newdata, (void *) *data, datap - *data);
+
+	  if (ouralloc)
+	    munmap (*data, allocsize / 2);
+
+ 	  datap = (char *) newdata + (datap - *data);
+	  *data = (char *) newdata;
+	  ouralloc = 1;
+	}
+
+      userp = (struct dirent *) datap;
+
+      /* Fill in entry.  */
+      {
+        ino_t inode;
+	inode_t v_inode;
+	vi_key_t entry_key;
+
+	entry_key.dir_inode = dp->cache_id;
+	entry_key.dir_offset = bufp - buf;
+
+	vi_rlookup (entry_key, &inode, &v_inode, 1);
+	userp->d_fileno = inode;
+      }
+      userp->d_type = DT_UNKNOWN;
+      userp->d_reclen = reclen;
+      userp->d_namlen = namlen;
+      memcpy (userp->d_name, name, namlen);
+      userp->d_name[namlen] = '\0';
+
+      /* And move along.  */
+      datap = datap + reclen;
+      if (!(dp == diskfs_root_node && i < 2))
+	bufp = bufp + FAT_DIR_REC_LEN;
+      i++;
+    }
+
+  /* If we didn't use all the pages of a buffer we allocated, free
+     the excess.  */
+  if (ouralloc
+      && round_page (datap - *data) < round_page (allocsize))
+    munmap ((caddr_t) round_page (datap),
+	    round_page (allocsize) - round_page (datap - *data));
+
+  munmap ((caddr_t) buf, buflen);
+
+  /* Return.  */
+  *amt = i;
+  *datacnt = datap - *data;
+  return 0;
+}
diff --git a/fatfs/fat.c b/fatfs/fat.c
new file mode 100644
index 00000000..4d3ba3da
--- /dev/null
+++ b/fatfs/fat.c
@@ -0,0 +1,744 @@
+/* fat.c - Support for FAT filesystems.
+   Copyright (C) 2002 Free Software Foundation, Inc.
+   Written by Marcus Brinkmann.
+
+   This file is part of the GNU Hurd.
+
+   The GNU Hurd is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   The GNU Hurd is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA. */
+
+#include <string.h>
+#include <error.h>
+#include <limits.h>
+#include <errno.h>
+#include <assert.h>
+#include <ctype.h>
+#include <time.h>
+
+#include <hurd/store.h>
+#include <hurd/diskfs.h>
+
+#include "fatfs.h"
+
+/* Unprocessed superblock.  */
+struct boot_sector *sblock;
+
+/* Processed sblock info.  */
+fat_t fat_type;
+size_t bytes_per_sector;
+size_t log2_bytes_per_sector;
+size_t sectors_per_cluster;
+size_t bytes_per_cluster;
+unsigned int log2_bytes_per_cluster;
+size_t sectors_per_fat;
+size_t total_sectors;
+size_t nr_of_root_dir_sectors;
+size_t first_root_dir_byte;
+size_t first_data_sector;
+vm_offset_t first_data_byte;
+size_t first_fat_sector;
+cluster_t nr_of_clusters;
+
+/* Hold this lock while converting times using gmtime.  */
+spin_lock_t epoch_to_time_lock = SPIN_LOCK_INITIALIZER;
+
+/* Hold this lock while allocating a new cluster in the FAT.  */
+spin_lock_t allocate_free_cluster_lock = SPIN_LOCK_INITIALIZER;
+
+/* Where to look for the next free cluster. This is meant to avoid
+   searching through a nearly full file system from the beginning at
+   every request.  It would be better to use the field of the same
+   name in the fs_info block. 2 is the first data cluster in any
+   FAT.  */
+cluster_t next_free_cluster = 2;
+
+
+/* Read the superblock.  */
+void
+fat_read_sblock (void)
+{
+  int read;
+
+  sblock = malloc (sizeof (struct boot_sector));
+  store_read (store, 0, sizeof (struct boot_sector), (void **) &sblock, &read);
+
+  if (read_word(sblock->id) != BOOT_SECTOR_ID)
+    error (1, 0, "Could not find valid superblock");
+
+  /* Parse some important bits of the superblock.  */
+
+  bytes_per_sector = read_word (sblock->bytes_per_sector);
+  switch (bytes_per_sector)
+    {
+    case 512:
+      log2_bytes_per_sector = 9;
+      break;
+      
+    case 1024:
+      log2_bytes_per_sector = 10;
+      break;
+	
+    case 2048:
+      log2_bytes_per_sector = 11;
+      break;
+      
+    case 4096:
+      log2_bytes_per_sector = 12;
+      break;
+      
+    default:
+      error (1, 0, "Invalid number of bytes per sector");
+    };
+
+  sectors_per_cluster = sblock->sectors_per_cluster;
+  if (sectors_per_cluster != 1 && sectors_per_cluster != 2
+      && sectors_per_cluster != 4 && sectors_per_cluster != 8
+      && sectors_per_cluster != 16 && sectors_per_cluster != 32
+      && sectors_per_cluster != 64 && sectors_per_cluster != 128)
+    error (1, 0, "Invalid number of sectors per cluster");
+
+  bytes_per_cluster = sectors_per_cluster << log2_bytes_per_sector;
+  switch (bytes_per_cluster)
+    {
+    case 512:
+      log2_bytes_per_cluster = 9;
+      break;
+      
+    case 1024:
+      log2_bytes_per_cluster = 10;
+      break;
+      
+    case 2048:
+      log2_bytes_per_cluster = 11;
+      break;
+      
+    case 4096:
+      log2_bytes_per_cluster = 12;
+      break;
+      
+    case 8192:
+      log2_bytes_per_cluster = 13;
+      break;
+      
+    case 16384:
+      log2_bytes_per_cluster = 14;
+      break;
+
+    case 32768:
+      log2_bytes_per_cluster = 15;
+      break;
+      
+    default:
+      error (1, 0, "Invalid number of bytes per cluster");
+    };
+  
+  total_sectors = read_word (sblock->total_sectors_16)
+    ?: read_word (sblock->total_sectors_32);
+  if (total_sectors * bytes_per_sector > store->size)
+    error (1, 0, "Store is smaller then implied by metadata");
+  if (total_sectors == 0)
+    error (1, 0, "Number of total sectors is zero");
+
+  if (bytes_per_sector & (store->block_size - 1))
+    error (1, 0, "Block size of filesystem is not a multiple of the block size "
+	   "of the store");
+
+  if (read_word (sblock->reserved_sectors) == 0)
+    error (1, 0, "Number of reserved sectors is zero");
+  if (sblock->nr_of_fat_tables == 0)
+    error (1, 0, "Number of FATs is zero");
+
+  sectors_per_fat = read_word (sblock->sectors_per_fat_16)
+    ?: read_word (sblock->compat.fat32.sectors_per_fat_32);
+  if (sectors_per_fat == 0)
+    error (1, 0, "Number of sectors per fat is zero");
+
+  nr_of_root_dir_sectors = ((read_word (sblock->nr_of_root_dirents) * FAT_DIR_REC_LEN)
+		      - 1) / bytes_per_sector + 1;
+  if (nr_of_root_dir_sectors & (sectors_per_cluster - 1))
+    error (1, 0, "Number of root dir sectors is not a multiple of sectors_per_cluster");
+
+  first_root_dir_byte = (read_word (sblock->reserved_sectors)
+    + (sblock->nr_of_fat_tables * sectors_per_fat)) << log2_bytes_per_sector;
+  first_data_sector = (first_root_dir_byte >> log2_bytes_per_sector) + nr_of_root_dir_sectors;
+  first_data_byte = first_data_sector << log2_bytes_per_sector;
+
+  nr_of_clusters = (total_sectors - first_data_sector) / sectors_per_cluster;
+
+  if (nr_of_clusters < FAT12_MAX_NR_OF_CLUSTERS)
+    fat_type = FAT12;
+  else
+    {
+      if (nr_of_clusters < FAT16_MAX_NR_OF_CLUSTERS)
+	fat_type = FAT16;
+      else
+	fat_type = FAT32;
+    }
+  
+  if (fat_type == FAT32 && read_word (sblock->compat.fat32.fs_version) != 0)
+    error (1, 0, "Incompatible file system version");
+
+  first_fat_sector = 0;
+  if (fat_type == FAT32 && read_word (sblock->compat.fat32.extension_flags) & 1<<7)
+    {
+      first_fat_sector = (read_word (sblock->compat.fat32.extension_flags) & 0x0f);
+      if (first_fat_sector > sblock->nr_of_fat_tables)
+	error (1, 0, "Active FAT table does not exist");
+      first_fat_sector *= sectors_per_fat;
+    }
+  first_fat_sector += read_word (sblock->reserved_sectors);
+}
+
+
+/* Write NEXT_CLUSTER in the FAT at position CLUSTER.
+   You must call this from inside diskfs_catch_exception.
+   Returns 0 (always succeeds).  */
+error_t
+fat_write_next_cluster(cluster_t cluster, cluster_t next_cluster)
+{
+  loff_t fat_entry_offset;
+  cluster_t data;
+
+  /* First data cluster is cluster 2.  */
+  assert (cluster >= 2 && cluster < nr_of_clusters + 2); 
+
+  switch (fat_type)
+    {
+    case FAT12:
+      if (next_cluster == FAT_BAD_CLUSTER)
+	next_cluster = FAT12_BAD_CLUSTER;
+      else if (next_cluster == FAT_EOC)
+	next_cluster = FAT12_EOC;
+
+      fat_entry_offset = (cluster * 3) / 2;
+      data = read_word (fat_image + fat_entry_offset);
+      if (cluster & 1)
+	data = (data & 0xf) | ((next_cluster & 0xfff) << 4);
+      else
+	data = (data & 0xf000) | (next_cluster & 0xfff);
+
+      write_word (fat_image + fat_entry_offset, data);
+      break;
+
+    case FAT16:
+      if (next_cluster == FAT_BAD_CLUSTER)
+	next_cluster = FAT16_BAD_CLUSTER;
+      else if (next_cluster == FAT_EOC)
+	next_cluster = FAT16_EOC;
+
+      fat_entry_offset = cluster * 2;
+      write_word (fat_image + fat_entry_offset, next_cluster);
+      break;
+
+    case FAT32:
+    default:                             /* To silence gcc warning.  */
+      if (next_cluster == FAT_BAD_CLUSTER)
+	next_cluster = FAT32_BAD_CLUSTER;
+      else if (next_cluster == FAT_EOC)
+	next_cluster = FAT32_EOC;
+
+      fat_entry_offset = cluster * 4;
+      write_dword (fat_image + fat_entry_offset, next_cluster & 0x0fffffff);
+    }
+
+  return 0;
+}
+
+/* Read the FAT entry at position CLUSTER into NEXT_CLUSTER.
+   You must call this from inside diskfs_catch_exception.
+   Returns 0 (always succeeds).  */
+error_t
+fat_get_next_cluster(cluster_t cluster, cluster_t *next_cluster)
+{
+  loff_t fat_entry_offset;
+
+  /* First data cluster is cluster 2.  */
+  assert (cluster >= 2 && cluster < nr_of_clusters + 2); 
+
+  switch (fat_type)
+    {
+    case FAT12:
+      fat_entry_offset = (cluster * 3) / 2;
+      *next_cluster = read_word (fat_image + fat_entry_offset);
+      if (cluster & 1)
+	*next_cluster = *next_cluster >> 4;
+      else
+	*next_cluster &= 0xfff;
+
+      if (*next_cluster == FAT12_BAD_CLUSTER)
+	*next_cluster = FAT_BAD_CLUSTER;
+      else if (*next_cluster >= FAT12_EOC)
+	*next_cluster = FAT_EOC;
+      break;
+
+    case FAT16:
+      fat_entry_offset = cluster * 2;
+      *next_cluster = read_word (fat_image + fat_entry_offset);
+      if (*next_cluster == FAT16_BAD_CLUSTER)
+	*next_cluster = FAT_BAD_CLUSTER;
+      else if (*next_cluster >= FAT16_EOC)
+	*next_cluster = FAT_EOC;
+      break;
+
+    case FAT32:
+    default:                             /* To silence gcc warning.  */
+      fat_entry_offset = cluster * 4;
+      *next_cluster = read_dword (fat_image + fat_entry_offset);
+      *next_cluster &= 0x0fffffff;
+      if (*next_cluster == FAT32_BAD_CLUSTER)
+	*next_cluster = FAT_BAD_CLUSTER;
+      else if (*next_cluster >= FAT32_EOC)
+	*next_cluster = FAT_EOC;
+    }
+
+  return 0;
+}
+
+/* Allocate a new cluster, write CONTENT into the FAT at this new
+   clusters position.  At success, 0 is returned and CLUSTER contains
+   the cluster number allocated.  Otherwise, ENOSPC is returned if the
+   filesystem is full.
+   You must call this from inside diskfs_catch_exception.  */
+error_t
+fat_allocate_cluster (cluster_t content, cluster_t *cluster)
+{
+  error_t err = 0;
+  cluster_t old_next_free_cluster;
+  int wrapped = 0;
+  cluster_t found_cluster = FAT_FREE_CLUSTER;
+
+  assert (content != FAT_FREE_CLUSTER);
+
+  spin_lock (&allocate_free_cluster_lock);
+  old_next_free_cluster = next_free_cluster;
+
+  /* Loop over all clusters, starting from next_free_cluster and
+     wrapping if reaching the end of the FAT, until we either find an
+     unallocated cluster, or we have to give up because all clusters
+     are allocated.  */
+  do
+    {
+      cluster_t next_free_content;
+
+      fat_get_next_cluster (next_free_cluster, &next_free_content);
+
+      if (next_free_content == FAT_FREE_CLUSTER)
+	found_cluster = next_free_cluster;
+
+      if (++next_free_cluster == nr_of_clusters + 2)
+	{
+	  next_free_cluster = 2;
+	  wrapped = 1;
+	}
+    }
+  while (found_cluster == FAT_FREE_CLUSTER
+	 && !(wrapped && next_free_cluster == old_next_free_cluster));
+
+  if (found_cluster != FAT_FREE_CLUSTER)
+    {
+      *cluster = found_cluster;
+      fat_write_next_cluster(found_cluster, content);
+    }
+  else 
+    err = ENOSPC;
+
+  spin_unlock(&allocate_free_cluster_lock);
+  return err;
+}
+
+/* Extend the cluster chain to maximum size or new_last_cluster,
+   whatever is less. If we reach the end of the file, and CREATE is
+   true, allocate new blocks until there is either no space on the
+   device or new_last_cluster are allocated.  (new_last_cluster: 0 is
+   the first cluster of the file).  */
+error_t
+fat_extend_chain (struct node *node, cluster_t new_last_cluster, int create)
+{
+  error_t err = 0;
+  struct disknode *dn = node->dn;
+  struct cluster_chain *table;
+  int offs;
+  cluster_t left, prev_cluster, cluster;
+
+  error_t allocate_new_table(struct cluster_chain **table)
+    {
+      struct cluster_chain *t;
+
+      t = *table;
+      *table = malloc (sizeof (struct cluster_chain));
+      if (!*table)
+	return ENOMEM;
+      (*table)->next = 0;
+      if (t)
+	dn->last = t->next = *table;
+      else
+	dn->last = dn->first = *table;
+      return 0;
+    }
+	  
+  spin_lock(&dn->chain_extension_lock);
+  
+  /* If we already have what we need, or we have all clusters that are
+     available without allocating new ones, go out.  */
+  if (new_last_cluster < dn->length_of_chain
+      || (!create && dn->chain_complete))
+    return 0;
+
+  left = new_last_cluster + 1 - dn->length_of_chain;
+
+  table = dn->last;
+  if (table)
+    {
+      offs = (dn->length_of_chain - 1) & (CLUSTERS_PER_TABLE - 1);
+      prev_cluster = table->cluster[offs];
+    }
+  else
+    {
+      offs = CLUSTERS_PER_TABLE - 1;
+      prev_cluster = FAT_FREE_CLUSTER;
+    }
+
+   while (left)
+     {
+       if (dn->chain_complete)
+	 {
+	   err = fat_allocate_cluster(FAT_EOC, &cluster);
+	   if (err)
+	     break;
+	   if (prev_cluster)
+	     fat_write_next_cluster(prev_cluster, cluster);
+	   else
+	     /* XXX: Also write this to dirent structure!  */
+	     dn->start_cluster = cluster;
+	 }
+       else
+	 {
+	   if (prev_cluster != FAT_FREE_CLUSTER)
+	     err = fat_get_next_cluster(prev_cluster, &cluster);
+	   else
+	     cluster = dn->start_cluster;
+	   if (cluster == FAT_EOC || cluster == FAT_FREE_CLUSTER)
+	     {
+	       dn->chain_complete = 1;
+	       if (create)
+		 continue;
+	       else
+		 break;
+	     }
+	 }
+       prev_cluster = cluster;
+       offs++;
+       if (offs == CLUSTERS_PER_TABLE)
+	 {
+	   offs = 0;
+	   err = allocate_new_table(&table);
+	   if (err)
+	     break;
+	 }
+       table->cluster[offs] = cluster;
+       dn->length_of_chain++;
+       left--;
+     }
+
+   if (dn->length_of_chain << log2_bytes_per_cluster > node->allocsize)
+     node->allocsize = dn->length_of_chain << log2_bytes_per_cluster;
+
+   spin_unlock(&dn->chain_extension_lock);
+   return err;
+}
+   
+/* Returns in DISK_CLUSTER the disk cluster corresponding to cluster
+   CLUSTER in NODE.  If there is no such cluster yet, but CREATE is
+   true, then it is created, otherwise EINVAL is returned.  */
+error_t
+fat_getcluster (struct node *node, cluster_t cluster, int create,
+		cluster_t *disk_cluster)
+{
+  error_t err = 0;
+  cluster_t chains_to_go = cluster >> LOG2_CLUSTERS_PER_TABLE;
+  cluster_t offs = cluster & (CLUSTERS_PER_TABLE - 1);
+  struct cluster_chain *chain;
+
+  if (cluster >= node->dn->length_of_chain)
+    {
+      err = fat_extend_chain (node, cluster, create);
+      if (err)
+	return err;
+      if (cluster >= node->dn->length_of_chain)
+	{
+	  assert (!create);
+	  return EINVAL;
+	}
+    }
+  chain = node->dn->first;
+  while (chains_to_go--)
+    {
+      assert (chain);
+      chain = chain->next;
+    }
+  assert (chain);
+  *disk_cluster = chain->cluster[offs];
+  return 0;
+}
+
+void
+fat_truncate_node (struct node *node, cluster_t clusters_to_keep)
+{
+  struct cluster_chain *next;
+  cluster_t count;
+  cluster_t offs;
+  cluster_t pos;
+
+  /* The root dir of a FAT12/16 fs is of fixed size, while the root
+     dir of a FAT32 fs must never decease to exist.  */
+  assert (! (((fat_type == FAT12 || fat_type == FAT16) && node == diskfs_root_node)
+	     || (fat_type == FAT32 && node == diskfs_root_node && clusters_to_keep == 0)));
+
+  /* Expand the cluster chain, because we have to know the complete tail.  */
+  fat_extend_chain (node, FAT_EOC, 0);
+  if (clusters_to_keep == node->dn->length_of_chain)
+    return;
+  assert (clusters_to_keep < node->dn->length_of_chain);
+
+  /* Truncation happens here.  */
+  next = node->dn->first;
+  if (clusters_to_keep == 0)
+    {
+      /* Deallocate the complete file.  */
+      node->dn->start_cluster = 0;
+      pos = count = offs = 0;
+    }
+  else
+    {
+      count = (clusters_to_keep - 1) >> LOG2_CLUSTERS_PER_TABLE;
+      offs = (clusters_to_keep - 1) & (CLUSTERS_PER_TABLE - 1);
+      while (count-- > 0)
+	{
+	  assert (next);
+	  next = next->next;
+	}
+      assert (next);
+      fat_write_next_cluster (next->cluster[offs++], FAT_EOC);
+      pos = clusters_to_keep;
+    }
+
+  /* Purge dangling clusters. If we die here, scandisk will have to
+     clean up the remains.  */
+  while (pos < node->dn->length_of_chain)
+    {
+      if (offs == CLUSTERS_PER_TABLE)
+	{
+	  offs = 0;
+	  next = next->next;
+	  assert(next);
+	}
+      fat_write_next_cluster(next->cluster[offs++], 0);
+      pos++;
+    }
+ 
+  /* Free now unused tables.  (Could be done in one run with the above.)  */
+  next = node->dn->first;
+  if (clusters_to_keep != 0)
+    {
+      count = (clusters_to_keep - 1) >> LOG2_CLUSTERS_PER_TABLE;
+      offs = (clusters_to_keep - 1) & (CLUSTERS_PER_TABLE - 1);
+      while (count-- > 0)
+	{
+	  assert (next);
+	  next = next->next;
+	}
+      assert (next);
+      next = next->next;
+    }
+  while (next)
+    {
+      struct cluster_chain *next_next = next->next;
+      free (next);
+      next = next_next;
+    }
+}
+
+
+/* Count the number of free clusters in the FAT.  */
+int
+fat_get_freespace (void)
+{
+  int free_clusters = 0;
+  cluster_t curr_cluster;
+  cluster_t next_cluster;
+  error_t err;
+
+  err = diskfs_catch_exception ();
+  if (!err)
+    {
+      /* First cluster is the 3rd entry in the FAT table.  */
+      for (curr_cluster = 2; curr_cluster < nr_of_clusters + 2;
+	   curr_cluster++)
+	{
+	  fat_get_next_cluster (curr_cluster, &next_cluster);
+	  if (next_cluster == FAT_FREE_CLUSTER)
+	    free_clusters++;
+	}
+    }
+  diskfs_end_catch_exception ();
+
+  return free_clusters;
+}
+
+
+/* FILE must be a buffer with 13 characters.  */
+void fat_to_unix_filename(const char *name, char *file)
+{
+  int npos;
+  int fpos = 0;
+  int ext = 0;
+
+  for (npos = 0; npos < 11; npos++)
+    {
+      if (name[npos] == ' ')
+	{
+	  if (ext)
+	    {
+	      break;
+	    }
+	  else
+	    {
+	      file[fpos] = '.';
+	      fpos++;
+	      ext = 1;
+	      while (npos < 7 && name[npos+1] == ' ') npos++;
+	    }
+	}
+      else
+	{
+	  file[fpos] = name[npos];
+	  fpos++;
+	  if (npos == 7)
+	    {
+	      file[fpos] = '.';
+	      fpos++;
+	      ext = 1;
+	    }
+	}
+    }
+  if (ext && file[fpos-1] == '.')
+    file[fpos-1] = '\0';
+  else
+    file[fpos] = '\0';
+}
+
+void
+fat_from_unix_filename(char *fn, const char *un, int ul)
+{
+  int fp = 0;
+  int up = 0;
+  int ext = 0;
+
+  while (fp < 11)
+    {
+      if (up == ul)
+	{
+	  /* We parsed the complete unix filename.  */
+	  while (fp < 11)
+	    fn[fp++] = ' ';
+	}
+      else
+	{
+	  if (!ext)
+	    {
+	      if (un[up] == '.')
+		{
+		  while (fp < 8)
+		    fn[fp++] = ' ';
+		  ext = 1;
+		  un++;
+		}
+	      else if (fp == 8)
+		{
+		  while (un[up++] != '.' && up < ul);
+		  ext = 1;
+		}
+	      else
+		  fn[fp++] = toupper(un[ul++]);
+	    }
+	  else
+	    {
+	      if (un[up] == '.')
+		{
+		  while (fp < 11)
+		    fn[fp++] = ' ';
+		}
+	      else
+		fn[fp++] = toupper(un[up++]);
+	    }
+	}
+    }
+}
+
+
+/* Return Epoch-based time from a MSDOS time/date pair.  */
+void
+fat_to_epoch (char *date, char *time, struct timespec *ts)
+{
+  struct tm tm;
+
+  /* Date format:
+     Bits 0-4: Day of month (1-31).
+     Bits 5-8: Month of year (1-12).
+     Bits 9-15: Count of years from 1980 (0-127).
+
+     Time format:
+     Bits 0-4: 2-second count (0-29).
+     Bits 5-10: Minutes (0-59).
+     Bits 11-15: Hours (0-23).
+  */
+
+  tm.tm_year = (read_word (date) >> 9) + 80;
+  tm.tm_mon = ((read_word (date) & 0x1ff) >> 5) - 1;
+  tm.tm_mday = read_word (date) & 0x1f;
+  tm.tm_hour = (read_word (time) >> 11);
+  tm.tm_min = (read_word (time) & 0x7ff) >> 5;
+  tm.tm_sec = read_word (time) & 0x1f;
+  tm.tm_isdst = 0;
+
+  ts->tv_sec = timegm (&tm);
+  ts->tv_nsec = 0;
+}
+
+/* Return MSDOS time/date pair from Epoch-based time.  */
+void
+fat_from_epoch (char *date, char *time, time_t *tp)
+{
+  struct tm *tm;
+
+  spin_lock(&epoch_to_time_lock);
+  tm = gmtime (tp);
+
+  /* Date format:
+     Bits 0-4: Day of month (1-31).
+     Bits 5-8: Month of year (1-12).
+     Bits 9-15: Count of years from 1980 (0-127).
+
+     Time format:
+     Bits 0-4: 2-second count (0-29).
+     Bits 5-10: Minutes (0-59).
+     Bits 11-15: Hours (0-23).
+  */
+
+  write_word(date, tm->tm_mday | ((tm->tm_mon + 1) << 5)
+	     | ((tm->tm_year - 80) << 9));
+  write_word(time, (tm->tm_hour << 11) | (tm->tm_min << 5)
+	     | (tm->tm_sec >> 1));
+  spin_unlock(&epoch_to_time_lock);
+}
diff --git a/fatfs/fat.h b/fatfs/fat.h
new file mode 100644
index 00000000..91e5a5cb
--- /dev/null
+++ b/fatfs/fat.h
@@ -0,0 +1,403 @@
+/* fat.h - Support for FAT filesystems interfaces.
+   Copyright (C) 2002 Free Software Foundation, Inc.
+   Written by Marcus Brinkmann.
+
+   This file is part of the GNU Hurd.
+
+   The GNU Hurd is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   The GNU Hurd is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA. */
+
+#ifndef FAT_H
+#define FAT_H
+
+/* Specification of the FAT12/16/32 filesystem format.  */
+
+/* Overview
+   --------
+
+   Any FAT fs consists of several regions, which follow immediately
+   after each other.
+
+   Reserved
+
+     The reserved region consists of the boot sector, and with it the
+     BIOS Paramter Block, which contains all necessary data about the
+     filesystem like sector size, number of clusters etc. It also
+     holds the filesystem info block.
+
+     The reserved region of FAT32 filesystems also hold a backup copy
+     of the root sector at sector 6 (usually), followed by a backup
+     copy of the filesystem info sector.
+
+     The number of sectors occupied by the reserved region is stored
+     in the reserved region as well, in the word at offset 14
+     (reserved_sectors).
+
+   FAT
+
+     The FAT region contains the File Allocation Table, which is a
+     linked list of clusters occupied by each file or directory.
+     There might be multiple FAT tables in the FAT region, for
+     redundancy.
+
+     The number of FATs is stored in the reserved region, in the byte
+     at offset 16 (nr_of_fat_tables). The number of sectors per FAT is
+     stored in the word at offset 22 (sectors_per_fat_16) or, if this
+     is zero (as it is for FAT32), in the doubleword at offset 36
+     (sectors_per_fat_32).
+
+   Root Directory
+
+     In FAT12/16, the root directory entries allocate their own region
+     and are not accessed through the FAT.
+
+     The size of this region is determined by the word at offset 17
+     (nr_of_root_dirents). You have to multiply this with the nr of
+     bytes per entry, and divide through the number of bytes per
+     sector, rounding up.  On FAT32 filesystems, this region does not
+     exist, and nr_of_root_dirents is zero. The FAT32 root directory
+     is accessed through the FAT as any other directory is.
+
+   Data
+
+     The data region occupies the rest of the filesystem and stores
+     the actual file and directory data. It is seperated in clusters,
+     which are indexed in the FAT.
+
+     The size of the data region is stored in the word at offset 19
+     (total_sectors_16) or, if this is zero, in the doubleword at
+     offset 32 (total_sectors_32).
+
+
+  NOTE that all meta data in a FAT filesystem is stored in little endian
+  format.
+
+*/
+
+/* The supported FAT types.  */
+
+enum fat { FAT12, FAT16, FAT32 };
+typedef enum fat fat_t;
+
+/* The FAT type is determined by the number of clusters in the data
+   region, and nothing else.  The maximal number of clusters for a
+   FAT12 and FAT16 respectively is defined here.
+*/
+
+#define FAT12_MAX_NR_OF_CLUSTERS 4084
+#define FAT16_MAX_NR_OF_CLUSTERS 65524
+#define FAT32_MAX_NR_OF_CLUSTERS (FAT32_BAD_CLUSTER - 1)
+
+struct boot_sector
+{
+  /* Unused.  */
+  unsigned char jump_to_boot_code[3];      /*   0, typ. 0xeb 0x?? 0x90  */
+  unsigned char oem_name[8];               /*   3, typ. "MSWIN4.1"  */
+
+  /* Sector and Cluster size.
+     bytes_per_sector is usually 512, but 1024, 2048, 4096 are also allowed.
+     sectors_per_cluster is one of 1, 2, 4, 8, 16, 32, 64, 128.
+     Note that bytes per cluster (product of the two) must be <= 32768.  */
+  unsigned char bytes_per_sector[2];       /*  11 */
+  unsigned char sectors_per_cluster;       /*  13 */
+  
+  /* Size of the various regions.
+     reserved_sectors must not be zero and is typically 1 on FAT12/16
+     filesystems and 32 on FAT32 filesystems.
+     nr_of_fat_tables must not be zero and is typically 2.
+     nr_of_root_dirents must be zero on FAT32 filesystems.
+     For FAT12/16, the value multiplied with DIR_ENTRY_SIZE (32)
+     should always be a multiple of bytes_per_sector to retain
+     compatibility. For FAT16, 512 should be used.
+     total_sectors_16 contains the complete number of sectors if not zero.
+     If zero, the number of sectors is stored in total_sectors_32.  */
+  unsigned char reserved_sectors[2];       /*  14 */
+  unsigned char nr_of_fat_tables;          /*  16 */
+  unsigned char nr_of_root_dirents[2];     /*  17 */
+  unsigned char total_sectors_16[2];       /*  19 */
+
+  /* Media descriptor.
+     Allowed are values between 0xf0 and 0xff.
+     0xf8 is a fixed hardware (disk), 0xf0 denotes a removable media.
+     Must be the same as the first byte in the FAT (compatibility
+     with DOS 1.x).  */
+  unsigned char media_descriptor;          /*  21 */
+
+  /* Size of one FAT.
+     On FAT32 systems, this value must be zero and sectors_per_fat_32
+     used instead.  */
+  unsigned char sectors_per_fat_16[2];     /*  22 */
+
+  /* Disk geometry. Unused.  */
+  unsigned char sectors_per_track[2];      /*  24 */
+  unsigned char nr_of_heads[2];            /*  26 */
+  unsigned char nr_of_hidden_sectors[4];   /*  28 */
+
+  /* See total_sectors_16.  */
+  unsigned char total_sectors_32[4];       /*  32 */
+
+  /* FAT specific information.
+     Starting with offset 36, FAT12/16 filesystems differ from FAT32
+     filesystems.  */
+  union
+  {
+    struct
+    {
+      unsigned char drive;                 /*  36 */
+      unsigned char reserved;              /*  37 */
+
+      /* Boot signature.
+	 Value is 0x29.
+	 Indicates that the following three fields
+	 are present.  */
+      unsigned char boot_signature;        /*  38 */
+
+      /* Identifier.
+	 serial is an unique identifier for removable media.
+	 label is the filesystem label, which must match the label
+	 stored in the root directory entry which has DIR_ATTR_LABEL
+	 set. If no name is specified, the content is "NO NAME    ".
+	 fs_type: One of "FAT12      ", "FAT16      ", "FAT        ".
+	 Don't use.  */
+      unsigned char serial[4];             /*  39 */
+      unsigned char label[11];             /*  43 */
+      unsigned char fs_type[8];            /*  54 */
+    } fat;
+    struct
+    {
+      /* See sectors_per_fat_16.  */
+      unsigned char sectors_per_fat_32[4]; /*  36 */
+
+      /* Extension flags.
+	 Bits 0-3: Zero based nr of active FAT.
+	 Bit 7: If 0, all FATs are active and should be kept up to date.
+	        If 1, only the active FAT (see bits 0-3) should be used.
+	 The rest of the bits are reserved.  */
+      unsigned char extension_flags[2];    /*  40 */
+
+      /* Filesystem version.
+	 The high byte is the major number, the low byte the minor version.
+	 Don't mount if either version number is higher than known versions. */
+      unsigned char fs_version[2];         /*  42 */
+
+      /* Root cluster.
+	 The cluster where the root directory starts.  */
+      unsigned char root_cluster[4];       /*  44 */
+
+      /* Filesystem Info sector.
+	 The setor number of the filesystem info block in the
+	 reserved area.  */
+      unsigned char fs_info_sector[2];     /*  48 */
+
+      /* Backup boot sector.
+	 The sector of the backup copy of the boot sector.
+	 Should be 6, so it can be used even if this field is
+	 corrupted.  */
+      unsigned char backup_boot_sector[2]; /*  50 */
+      unsigned char reserved1[12];         /*  52 */
+
+      /* See fat structure above, with the following exception:
+	 fs_type is "FAT32      ".  */
+      unsigned char drive_number;          /*  64 */
+      unsigned char reserved2;             /*  65 */
+      unsigned char boot_signature;        /*  66 */
+      unsigned char serial[4];             /*  67 */
+      unsigned char label[11];             /*  71 */
+      unsigned char fs_type[8];            /*  82 */
+    } fat32;
+  } compat;
+  unsigned char unused[420];               /*  90 */
+
+  /* Expected ID at offset 510.
+   */
+#define BOOT_SECTOR_ID 0xaa55
+
+  unsigned char id[2];                     /* 510 */
+};
+
+/* File System Info Block. */
+
+#define FAT_FS_INFO_LEAD_SIGNATURE		0x41615252L
+#define FAT_FS_INFO_STRUCT_SIGNATURE		0x61417272L
+#define FAT_FS_INFO_TRAIL_SIGNAURE		0xaa550000L
+#define FAT_FS_NR_OF_FREE_CLUSTERS_UNKNOWN	0xffffffffL
+#define FAT_FS_NEXT_FREE_CLUSTER_UNKNOWN	0xffffffffL
+
+struct fat_fs_info
+{
+  unsigned char lead_signature[4];
+  unsigned char reserved1[480];
+  unsigned char struct_signature[4];
+  unsigned char nr_of_free_clusters[4];
+  unsigned char next_free_cluster[4];
+  unsigned char reserved2[12];
+  unsigned char trail_signature[4];
+};
+
+/* File Allocation Table, special entries.  */
+
+#define FAT_FREE_CLUSTER	0
+
+#define FAT12_BAD_CLUSTER	0x0ff7
+#define FAT16_BAD_CLUSTER	0xfff7
+#define FAT32_BAD_CLUSTER	0x0ffffff7L
+#define FAT_BAD_CLUSTER		FAT32_BAD_CLUSTER
+
+#define FAT12_EOC	0x0ff8
+#define FAT16_EOC	0xfff8
+#define FAT32_EOC	0x0ffffff8
+#define FAT_EOC		FAT32_EOC
+
+/* Directories.  */
+
+#define FAT_DIR_REC_LEN		32
+#define FAT_DIR_RECORDS(x)	FAT_DIR_REC_LEN    /* Something else for vfat.  */
+
+#define FAT_DIR_ATTR_RDONLY	0x01
+#define FAT_DIR_ATTR_HIDDEN	0x02
+#define FAT_DIR_ATTR_SYSTEM	0x04
+#define FAT_DIR_ATTR_LABEL	0x08
+#define FAT_DIR_ATTR_DIR	0x10
+#define FAT_DIR_ATTR_ARCHIVE	0x20
+#define FAT_DIR_ATTR_LONGNAME	(DIR_ATTR_RDONLY | DIR_ATTR_HIDDEN \
+				| DIR_ATTR_SYSTEM | DIR_ATTR_LABEL)
+
+#define FAT_DIR_NAME_LAST	'\x00'
+#define FAT_DIR_NAME_DELETED	'\xe5'
+
+/* If the first character is this, replace it with FAT_DIR_NAME_DELETED
+   after checking for it.  */
+#define FAT_DIR_NAME_REPLACE_DELETED '\x05'
+
+#define FAT_DIR_NAME_DOT	".          "
+#define FAT_DIR_NAME_DOTDOT	"..         "
+
+struct dirrect
+{
+  unsigned char name[11];
+  unsigned char attribute;
+  unsigned char reserved;
+  unsigned char creation_time_centiseconds;
+  unsigned char creation_time[2];
+  unsigned char creation_date[2];
+  unsigned char last_access_date[2];
+  unsigned char first_cluster_high[2];
+  unsigned char write_time[2];
+  unsigned char write_date[2];
+  unsigned char first_cluster_low[2];
+  unsigned char file_size[4];
+};
+
+#define FAT_NAME_MAX 12   /* VFAT: 255 */
+
+extern vm_offset_t first_data_byte;
+extern size_t bytes_per_cluster;
+
+/* A cluster number.  */
+typedef unsigned long cluster_t;
+
+#define LOG2_CLUSTERS_PER_TABLE 10
+#define CLUSTERS_PER_TABLE (1 << LOG2_CLUSTERS_PER_TABLE)
+
+struct cluster_chain
+{
+  struct cluster_chain *next;
+  cluster_t cluster[CLUSTERS_PER_TABLE];
+};
+
+/* Prototyping.  */
+void fat_read_sblock (void);
+void fat_to_epoch (char *, char *, struct timespec *);
+void fat_from_epoch (char *, char *, time_t *);
+error_t fat_getcluster (struct node *, cluster_t, int, cluster_t *);
+void fat_truncate_node (struct node *, cluster_t);
+error_t fat_extend_chain (struct node *, cluster_t, int);
+int fat_get_freespace (void);
+
+/* Unprocessed superblock.  */
+extern struct boot_sector *sblock;
+
+/* Processed sblock info.  */
+extern fat_t fat_type;
+extern size_t bytes_per_sector;
+extern size_t log2_bytes_per_sector;
+extern size_t sectors_per_cluster;
+extern size_t bytes_per_cluster;
+extern unsigned int log2_bytes_per_cluster;
+extern size_t sectors_per_fat;
+extern size_t total_sectors;
+extern size_t nr_of_root_dir_sectors;
+extern size_t first_root_dir_byte;
+extern size_t first_data_sector;
+extern vm_offset_t first_data_byte;
+extern size_t first_fat_sector;
+extern cluster_t nr_of_clusters;
+        
+/* Numeric conversions for these fields.  */
+#include <endian.h>
+
+static inline unsigned int 
+read_dword (unsigned char *addr)
+{
+#if BYTE_ORDER == LITTLE_ENDIAN
+  return *(unsigned int *)addr;
+#elif BYTE_ORDER == BIG_ENDIAN
+  return *(unsigned int *)(addr + 4);
+#else
+  return
+    addr[0] | (addr[1] << 8) | (addr[2] << 16) | (addr[3] << 24);
+#endif
+}
+
+static inline unsigned int
+read_word (unsigned char *addr)
+{
+#if BYTE_ORDER == LITTLE_ENDIAN
+  return *(unsigned short *)addr;
+#elif BYTE_ORDER == BIG_ENDIAN
+  return *(unsigned short *)addr + 2;
+#else
+  return addr[0] | (addr[1] << 8);
+#endif
+}
+
+static inline void 
+write_dword (unsigned char *addr, unsigned int value)
+{
+#if BYTE_ORDER == LITTLE_ENDIAN
+  *(unsigned int *)addr = value;
+#elif BYTE_ORDER == BIG_ENDIAN
+#error unknown byte order
+#else
+  addr[0] = value & 0xff;
+  addr[1] = (value >> 8) & 0xff;
+  addr[2] = (value >> 16) & 0xff;
+  addr[3] = (value >> 24) & 0xff;
+#endif
+}
+
+static inline void
+write_word (unsigned char *addr, unsigned int value)
+{
+#if BYTE_ORDER == LITTLE_ENDIAN
+  *(unsigned short *)addr = value;
+#elif BYTE_ORDER == BIG_ENDIAN
+#error unknown byte order
+#else
+  addr[0] = value & 0xff;
+  addr[1] = (value >> 8) & 0xff;
+#endif
+}
+
+#endif /* FAT_H */
diff --git a/fatfs/fatfs.h b/fatfs/fatfs.h
new file mode 100644
index 00000000..f46695ba
--- /dev/null
+++ b/fatfs/fatfs.h
@@ -0,0 +1,121 @@
+/* fatfs.h - Interface for fatfs.
+   Copyright (C) 1997, 1999, 2002 Free Software Foundation, Inc.
+   Written by Thomas Bushnell, n/BSG and Marcus Brinkmann.
+
+   This file is part of the GNU Hurd.
+
+   The GNU Hurd is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   The GNU Hurd is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA. */
+
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <hurd/diskfs.h>
+#include <hurd/diskfs-pager.h>
+#include <hurd/store.h>
+
+#include "fat.h"
+#include "virt-inode.h"
+
+/* There is no such thing as an inode in this format, all such information
+   being recorded in the directory entry.  So we report inode numbers as
+   the start cluster number of the file. When messing around with the
+   directory entry, hold the DIRENT_LOCK.  */
+
+struct disknode
+{
+  cluster_t start_cluster;
+
+  /* Links on hash list.  */
+  struct node *hnext, **hprevp;
+
+  /* The inode as returned by virtual inode management routines.  */
+  inode_t inode;
+
+  struct rwlock dirent_lock;
+    
+  char *link_target;            /* For S_ISLNK.  */
+
+  size_t translen;
+  char *translator;
+
+  /* Lock to hold while fiddling with this inode's block allocation
+     info.  */
+  struct rwlock alloc_lock;
+  /* Lock to hold while extending this inode's block allocation info.
+     Hold only if you hold readers alloc_lock, then you don't need to
+     hold it if you hold writers alloc_lock already.  */
+  spin_lock_t chain_extension_lock;
+  struct cluster_chain *first;
+  struct cluster_chain *last;
+  cluster_t length_of_chain;
+  int chain_complete;
+
+  /* This file's pager.  */
+  struct pager *pager;
+
+  /* Index to start a directory lookup at.  */
+  int dir_idx;
+};
+
+struct user_pager_info
+{
+  struct node *node;
+  enum pager_type
+  {
+    FAT,
+    FILE_DATA,
+  } type;
+  vm_prot_t max_prot;
+};
+
+/* The physical media.  */
+extern struct store *store;
+
+/* The UID and GID for all files in the filesystem.  */
+extern uid_t fs_uid;
+extern gid_t fs_gid;
+
+/* Mapped image of the FAT.  */
+extern void *fat_image;
+
+/* Handy source of zeroes.  */
+extern vm_address_t zerocluster;
+
+extern struct dirrect dr_root_node;
+
+
+#define round_cluster(offs)					\
+  ((((offs) + bytes_per_cluster - 1)				\
+    >> log2_bytes_per_cluster) << log2_bytes_per_cluster)
+
+#define fat_first_cluster_byte(cluster) \
+ (first_data_byte + ((cluster - 2) << log2_bytes_per_cluster))
+
+void drop_pager_softrefs (struct node *);
+void allow_pager_softrefs (struct node *);
+void create_fat_pager (void);
+
+void flush_node_pager (struct node *node);
+
+void write_all_disknodes ();
+
+struct node *ifind (ino_t inum);
+
+error_t fat_get_next_cluster (cluster_t cluster, cluster_t *next_cluster);
+void fat_to_unix_filename (const char *, char *);
+
+error_t diskfs_cached_lookup_in_dirbuf (int cache_id, struct node **npp,
+					vm_address_t buf);
+void refresh_node_stats (void);
+
diff --git a/fatfs/inode.c b/fatfs/inode.c
new file mode 100644
index 00000000..4119551a
--- /dev/null
+++ b/fatfs/inode.c
@@ -0,0 +1,764 @@
+/* inode.c - Inode management routines.
+   Copyright (C) 1994,95,96,97,98,99, 2000, 2002 Free Software Foundation, Inc.
+   Modified for fatfs by Marcus Brinkmann <marcus@gnu.org>
+
+   This file is part of the GNU Hurd.
+
+   The GNU Hurd is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2, or (at
+   your option) any later version.
+
+   The GNU Hurd is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA. */
+
+#include <string.h>
+#include "fatfs.h"
+
+/* These flags aren't actually defined by a header file yet, so
+   temporarily disable them if necessary.  */
+#ifndef UF_APPEND
+#define UF_APPEND 0
+#endif
+#ifndef UF_NODUMP
+#define UF_NODUMP 0
+#endif
+#ifndef UF_IMMUTABLE
+#define UF_IMMUTABLE 0
+#endif
+
+#define INOHSZ  512
+#if     ((INOHSZ&(INOHSZ-1)) == 0)
+#define INOHASH(ino)    ((ino)&(INOHSZ-1))
+#else
+#define INOHASH(ino)    (((unsigned)(ino))%INOHSZ)
+#endif
+
+static struct node *nodehash[INOHSZ];
+
+static error_t read_node (struct node *np, vm_address_t buf);
+
+/* Initialize the inode hash table.  */
+void
+inode_init ()
+{
+  int n;
+  for (n = 0; n < INOHSZ; n++)
+    nodehash[n] = 0;
+}
+
+/* Fetch inode INUM, set *NPP to the node structure; gain one user
+   reference and lock the node.  */
+error_t
+diskfs_cached_lookup (ino64_t inum, struct node **npp)
+{
+  error_t err;
+  struct node *np;
+  struct disknode *dn;
+
+  spin_lock (&diskfs_node_refcnt_lock);
+  for (np = nodehash[INOHASH(inum)]; np; np = np->dn->hnext)
+    if (np->cache_id == inum)
+      {
+        np->references++;
+        spin_unlock (&diskfs_node_refcnt_lock);
+        mutex_lock (&np->lock);
+        *npp = np;
+        return 0;
+      }
+
+  /* Format specific data for the new node.  */
+  dn = malloc (sizeof (struct disknode));
+  if (! dn)
+    {
+      spin_unlock (&diskfs_node_refcnt_lock);
+      return ENOMEM;
+    }
+  dn->pager = 0;
+  dn->first = 0;
+  dn->last = 0;
+  dn->length_of_chain = 0;
+  dn->chain_complete = 0;
+  dn->chain_extension_lock = SPIN_LOCK_INITIALIZER;
+  rwlock_init (&dn->alloc_lock);
+  rwlock_init (&dn->dirent_lock);
+  
+  /* Create the new node.  */
+  np = diskfs_make_node (dn);
+  np->cache_id = inum;
+  np->dn->inode = vi_lookup(inum);
+
+  mutex_lock (&np->lock);
+  
+  /* Put NP in NODEHASH.  */
+  dn->hnext = nodehash[INOHASH(inum)];
+  if (dn->hnext)
+    dn->hnext->dn->hprevp = &dn->hnext;
+  dn->hprevp = &nodehash[INOHASH(inum)];
+  nodehash[INOHASH(inum)] = np;
+
+  spin_unlock (&diskfs_node_refcnt_lock);
+  
+  /* Get the contents of NP off disk.  */
+  err = read_node (np, 0);
+
+  if (err)
+    return err;
+  else
+    {
+      *npp = np;
+      return 0;
+    }
+}
+
+/* Fetch inode INUM, set *NPP to the node structure;
+   gain one user reference and lock the node.
+   On the way, use BUF as the directory file map.  */
+error_t
+diskfs_cached_lookup_in_dirbuf (int inum, struct node **npp, vm_address_t buf)
+{
+  error_t err;
+  struct node *np;
+  struct disknode *dn;
+
+  spin_lock (&diskfs_node_refcnt_lock);
+  for (np = nodehash[INOHASH(inum)]; np; np = np->dn->hnext)
+    if (np->cache_id == inum)
+      {
+        np->references++;
+        spin_unlock (&diskfs_node_refcnt_lock);
+        mutex_lock (&np->lock);
+        *npp = np;
+        return 0;
+      }
+
+  /* Format specific data for the new node.  */
+  dn = malloc (sizeof (struct disknode));
+  if (! dn)
+    {
+      spin_unlock (&diskfs_node_refcnt_lock);
+      return ENOMEM;
+    }
+  dn->pager = 0;
+  dn->first = 0;
+  dn->last = 0;
+  dn->length_of_chain = 0;
+  dn->chain_complete = 0;
+  dn->chain_extension_lock = SPIN_LOCK_INITIALIZER;
+  rwlock_init (&dn->alloc_lock);
+  rwlock_init (&dn->dirent_lock);
+  
+  /* Create the new node.  */
+  np = diskfs_make_node (dn);
+  np->cache_id = inum;
+  np->dn->inode = vi_lookup(inum);
+
+  mutex_lock (&np->lock);
+  
+  /* Put NP in NODEHASH.  */
+  dn->hnext = nodehash[INOHASH(inum)];
+  if (dn->hnext)
+    dn->hnext->dn->hprevp = &dn->hnext;
+  dn->hprevp = &nodehash[INOHASH(inum)];
+  nodehash[INOHASH(inum)] = np;
+
+  spin_unlock (&diskfs_node_refcnt_lock);
+  
+  /* Get the contents of NP off disk.  */
+  err = read_node (np, buf);
+
+  if (err)
+    return err;
+  else
+    {
+      *npp = np;
+      return 0;
+    }
+}
+
+/* Lookup node INUM (which must have a reference already) and return
+   it without allocating any new references.  */
+struct node *
+ifind (ino_t inum)
+{
+  struct node *np;
+
+  spin_lock (&diskfs_node_refcnt_lock);
+  for (np = nodehash[INOHASH(inum)]; np; np = np->dn->hnext)
+    {
+      if (np->cache_id != inum)
+        continue;
+
+      assert (np->references);
+      spin_unlock (&diskfs_node_refcnt_lock);
+      return np;
+    }
+  assert (0);
+}
+
+/* The last reference to a node has gone away; drop it from the hash
+   table and clean all state in the dn structure.  */
+void
+diskfs_node_norefs (struct node *np)
+{
+  struct cluster_chain *last = np->dn->first;
+
+  *np->dn->hprevp = np->dn->hnext;
+  if (np->dn->hnext)
+    np->dn->hnext->dn->hprevp = np->dn->hprevp;
+  
+  while (last)
+    {
+      struct cluster_chain *next = last->next;
+      free(last);
+      last = next;
+    }
+
+  if (np->dn->translator)
+    free (np->dn->translator);
+
+  assert (!np->dn->pager);
+
+  free (np->dn);
+  free (np);
+}
+
+/* The last hard reference to a node has gone away; arrange to have
+   all the weak references dropped that can be.  */
+void
+diskfs_try_dropping_softrefs (struct node *np)
+{
+  drop_pager_softrefs (np);
+}
+
+/* The last hard reference to a node has gone away.  */
+void
+diskfs_lost_hardrefs (struct node *np)
+{
+}
+
+/* A new hard reference to a node has been created; it's now OK to
+   have unused weak references. */
+void
+diskfs_new_hardrefs (struct node *np)
+{
+  allow_pager_softrefs (np);
+}
+
+/* Read stat information out of the directory entry. */
+static error_t
+read_node (struct node *np, vm_address_t buf)
+{
+  /* XXX This needs careful investigation */
+  error_t err;
+  struct stat *st = &np->dn_stat;
+  struct disknode *dn = np->dn;
+  struct dirrect *dr;
+  struct node *dp = 0;
+  struct vi_key vk = vi_key(np->dn->inode);
+  vm_prot_t prot = VM_PROT_READ;
+  memory_object_t memobj;
+  vm_size_t buflen = 0;
+  int our_buf = 0;
+
+  if (vk.dir_inode == 0)
+    dr = &dr_root_node;
+  else
+    {
+      if (buf == 0)
+	{
+	  err = diskfs_cached_lookup (vk.dir_inode, &dp);
+	  if (err)
+	    return err;
+      
+	  /* Map in the directory contents. */
+	  memobj = diskfs_get_filemap (dp, prot);
+      
+	  if (memobj == MACH_PORT_NULL)
+	    {
+	      diskfs_nput (dp);
+	      return errno;
+	    }
+
+	  buflen = round_page (dp->dn_stat.st_size);
+	  err = vm_map (mach_task_self (),
+			&buf, buflen, 0, 1, memobj, 0, 0, prot, prot, 0);
+	  mach_port_deallocate (mach_task_self (), memobj);
+	  our_buf = 1;
+	}
+      
+      dr = (struct dirrect *) (buf + vk.dir_offset);
+    }
+
+  st->st_fstype = FSTYPE_MSLOSS;
+  st->st_fsid = getpid ();
+  st->st_ino = np->cache_id;
+  st->st_gen = 0;
+  st->st_rdev = 0;
+
+  st->st_nlink = 1;
+  st->st_uid = fs_uid;
+  st->st_gid = fs_gid;
+
+  st->st_rdev = 0;
+
+  np->dn->translator = 0;
+  np->dn->translen = 0;
+
+  st->st_flags = 0;
+
+  /* If we are called for a newly allocated node that has no directory
+     entry yet, only set a minimal amount of data until the dirent is
+     created (and we get called a second time?).  */
+  /* We will avoid this by overriding the relevant functions.
+     if (dr == (void *)1)
+     return 0;
+  */
+
+  rwlock_reader_lock(&np->dn->dirent_lock);
+
+  dn->start_cluster = (read_word (dr->first_cluster_high) << 16)
+    + read_word (dr->first_cluster_low);
+
+  if (dr->attribute & FAT_DIR_ATTR_DIR)
+    {
+      st->st_mode = S_IFDIR | 0777;
+      /* When we read in the node the first time, diskfs_root_node is
+	 zero.  */
+      if (diskfs_root_node == 0 ||
+	  (np == diskfs_root_node && (fat_type == FAT12 || fat_type == FAT16)))
+	{
+	  st->st_size = read_dword (dr->file_size);
+	  np->allocsize = nr_of_root_dir_sectors << log2_bytes_per_sector;
+	}
+      else
+	{
+	  np->allocsize = 0;
+	  rwlock_reader_lock(&dn->alloc_lock);
+	  err = fat_extend_chain (np, FAT_EOC, 0);
+	  rwlock_reader_unlock(&dn->alloc_lock);
+	  if (err)
+	    {
+	      if (our_buf && buf)
+		munmap ((caddr_t) buf, buflen);
+	      if (dp)
+		diskfs_nput (dp);
+	      return err;
+	    }
+	  st->st_size = np->allocsize;
+	}
+    }
+  else
+    {
+      unsigned offset;
+      st->st_mode = S_IFREG | 0666;
+      st->st_size = read_dword (dr->file_size);
+      np->allocsize = np->dn_stat.st_size;
+
+      /* Round up to a cluster multiple.  */
+      offset = np->allocsize & (bytes_per_cluster - 1);
+      if (offset > 0)
+	np->allocsize += bytes_per_cluster - offset;
+    }
+  if (dr->attribute & FAT_DIR_ATTR_RDONLY)
+    st->st_mode &= ~0222;
+
+  {
+    struct timespec ts;
+    fat_to_epoch (dr->write_date, dr->write_time, &ts);
+    st->st_ctime = st->st_mtime = st->st_atime = ts.tv_sec;
+    st->st_ctime_usec = st->st_mtime_usec = st->st_atime_usec
+      = ts.tv_nsec * 1000;
+  }
+  
+  st->st_blksize = bytes_per_sector;
+  st->st_blocks = (st->st_size - 1) / bytes_per_sector + 1;
+
+  rwlock_reader_unlock(&np->dn->dirent_lock);
+
+  if (our_buf && buf)
+    munmap ((caddr_t) buf, buflen);
+  if (dp)
+    diskfs_nput (dp);
+  return 0;
+}
+
+/* Return 0 if NP's owner can be changed to UID; otherwise return an
+   error code.  */
+error_t
+diskfs_validate_owner_change (struct node *np, uid_t uid)
+{
+  /* Allow configurable uid. */
+  if (uid != 0)
+    return EINVAL;
+  return 0;
+}
+
+/* Return 0 if NP's group can be changed to GID; otherwise return an
+   error code.  */
+error_t
+diskfs_validate_group_change (struct node *np, gid_t gid)
+{
+  /* Allow configurable gid. */
+  if (gid != 0)
+    return EINVAL;
+  return 0;
+}
+
+/* Return 0 if NP's mode can be changed to MODE; otherwise return an
+   error code.  It must always be possible to clear the mode; diskfs
+   will not ask for permission before doing so.  */
+error_t
+diskfs_validate_mode_change (struct node *np, mode_t mode)
+{
+  /* XXX */
+  return 0;
+}
+
+/* Return 0 if NP's author can be changed to AUTHOR; otherwise return
+   an error code.  */
+error_t
+diskfs_validate_author_change (struct node *np, uid_t author)
+{
+  return (author == np->dn_stat.st_uid) ? 0 : EINVAL;
+}
+
+/* The user may define this function.  Return 0 if NP's flags can be
+   changed to FLAGS; otherwise return an error code.  It must always
+   be possible to clear the flags.  */
+error_t
+diskfs_validate_flags_change (struct node *np, int flags)
+{
+  if (flags & ~(UF_NODUMP | UF_IMMUTABLE | UF_APPEND))
+    return EINVAL;
+  else
+    return 0;
+}
+
+/* Writes everything from NP's inode to the disk image.  */
+void
+write_node (struct node *np)
+{
+  error_t err;
+  struct stat *st = &np->dn_stat;
+  struct dirrect *dr;
+  struct node *dp;
+  struct vi_key vk = vi_key(np->dn->inode);
+  vm_prot_t prot = VM_PROT_READ | VM_PROT_WRITE;
+  memory_object_t memobj;
+  vm_address_t buf = 0;
+  vm_size_t buflen;
+
+  /* XXX: If we are called from node-create before direnter was
+     called, DR is zero and we can't update the node. Just return
+     here, and leave it to direnter to call us again when we are
+     ready.
+     If we are called for the root directory node, we can't do anything,
+     as FAT root dirs don't have a directory entry for themselve.
+  */
+  if (vk.dir_inode == 0 || np == diskfs_root_node)
+    return;
+
+  assert (!np->dn_set_ctime && !np->dn_set_atime && !np->dn_set_mtime);
+  if (np->dn_stat_dirty)
+    {
+      assert (!diskfs_readonly);
+
+      err = diskfs_cached_lookup (vk.dir_inode, &dp);
+      if (err)
+	return;
+
+      /* Map in the directory contents. */
+      memobj = diskfs_get_filemap (dp, prot);
+
+      if (memobj == MACH_PORT_NULL)
+	return;
+
+      buflen = round_page (dp->dn_stat.st_size);
+      err = vm_map (mach_task_self (),
+		    &buf, buflen, 0, 1, memobj, 0, 0, prot, prot, 0);
+      mach_port_deallocate (mach_task_self (), memobj);
+
+      dr = (struct dirrect *) (buf + vk.dir_offset);
+
+      rwlock_writer_lock(&np->dn->dirent_lock);
+      write_word (dr->first_cluster_low, np->dn->start_cluster & 0xffff);
+      write_word (dr->first_cluster_high, np->dn->start_cluster >> 16);
+
+      write_dword (dr->file_size, st->st_size);
+
+      /* Write time. */
+      fat_from_epoch ((unsigned char *) &dr->write_date,
+		      (unsigned char *) &dr->write_time, &st->st_mtime);
+
+      rwlock_writer_unlock(&np->dn->dirent_lock);
+      np->dn_stat_dirty = 0;
+
+      munmap ((caddr_t) buf, buflen);
+      diskfs_nput (dp);
+    }
+}
+
+/* Reload all data specific to NODE from disk, without writing anything.
+   Always called with DISKFS_READONLY true.  */
+error_t
+diskfs_node_reload (struct node *node)
+{
+  struct cluster_chain *last = node->dn->first;
+
+  while (last)
+    {
+      struct cluster_chain *next = last->next;
+      free(last);
+      last = next;
+    }
+  flush_node_pager (node);
+  read_node (node, 0);
+
+  return 0;
+}
+
+/* For each active node, call FUN.  The node is to be locked around the call
+   to FUN.  If FUN returns non-zero for any node, then immediately stop, and
+   return that value.  */
+error_t
+diskfs_node_iterate (error_t (*fun)(struct node *))
+{
+  error_t err = 0;
+  int n, num_nodes = 0;
+  struct node *node, **node_list, **p;
+
+  spin_lock (&diskfs_node_refcnt_lock);
+
+  /* We must copy everything from the hash table into another data structure
+     to avoid running into any problems with the hash-table being modified
+     during processing (normally we delegate access to hash-table with
+     diskfs_node_refcnt_lock, but we can't hold this while locking the
+     individual node locks).  */
+
+  for (n = 0; n < INOHSZ; n++)
+    for (node = nodehash[n]; node; node = node->dn->hnext)
+      num_nodes++;
+
+  node_list = alloca (num_nodes * sizeof (struct node *));
+  p = node_list;
+  for (n = 0; n < INOHSZ; n++)
+    for (node = nodehash[n]; node; node = node->dn->hnext)
+      {
+        *p++ = node;
+        node->references++;
+      }
+
+  spin_unlock (&diskfs_node_refcnt_lock);
+
+  p = node_list;
+  while (num_nodes-- > 0)
+    {
+      node = *p++;
+      if (!err)
+        {
+          mutex_lock (&node->lock);
+          err = (*fun)(node);
+          mutex_unlock (&node->lock);
+        }
+      diskfs_nrele (node);
+    }
+
+  return err;
+}
+
+/* Write all active disknodes into the ext2_inode pager. */
+void
+write_all_disknodes ()
+{
+  error_t write_one_disknode (struct node *node)
+    {
+      diskfs_set_node_times (node);
+
+      /* Update the inode image.  */
+      write_node (node);
+
+      return 0;
+    }
+  
+  diskfs_node_iterate (write_one_disknode);
+}
+
+
+void
+refresh_node_stats ()
+{
+  error_t refresh_one_node_stat (struct node *node)
+    {
+      node->dn_stat.st_uid = fs_uid;
+      node->dn_stat.st_gid = fs_gid;
+      return 0;
+    }
+
+  diskfs_node_iterate (refresh_one_node_stat);
+}
+
+
+/* Sync the info in NP->dn_stat and any associated format-specific
+   information to disk.  If WAIT is true, then return only after the
+   physicial media has been completely updated.  */
+void
+diskfs_write_disknode (struct node *np, int wait)
+{
+  write_node (np);
+}
+
+/* Set *ST with appropriate values to reflect the current state of the
+   filesystem.  */
+error_t
+diskfs_set_statfs (struct statfs *st)
+{
+  st->f_type = FSTYPE_MSLOSS;
+  st->f_bsize = bytes_per_sector;
+  st->f_blocks = total_sectors;
+  st->f_bfree = fat_get_freespace () * sectors_per_cluster;
+  st->f_bavail = st->f_bfree;
+  /* There is no easy way to determine the number of (free) files on a
+     FAT filesystem.  */
+  st->f_files = 0;
+  st->f_ffree = 0;
+  st->f_fsid = getpid ();
+  st->f_namelen = 0;
+  st->f_favail = st->f_ffree;
+  st->f_frsize = bytes_per_cluster;
+  return 0;
+}
+
+error_t
+diskfs_set_translator (struct node *node,
+		       const char *name, u_int namelen,
+		       struct protid *cred)
+{
+  assert (!diskfs_readonly);
+  return EOPNOTSUPP;
+}
+
+error_t
+diskfs_get_translator (struct node *node, char **namep, u_int *namelen)
+{
+  assert(0);
+}
+
+void
+diskfs_shutdown_soft_ports ()
+{
+    /* Should initiate termination of internally held pager ports
+     (the only things that should be soft) XXX */
+}
+
+/* The user must define this function.  Truncate locked node NODE to be SIZE
+   bytes long.  (If NODE is already less than or equal to SIZE bytes
+   long, do nothing.)  If this is a symlink (and diskfs_shortcut_symlink
+   is set) then this should clear the symlink, even if
+   diskfs_create_symlink_hook stores the link target elsewhere.  */
+error_t
+diskfs_truncate (struct node *node, loff_t length)
+{
+  error_t err;
+  loff_t offset;
+
+  diskfs_check_readonly ();
+  assert (!diskfs_readonly);
+
+  if (length >= node->dn_stat.st_size)
+    return 0;
+
+  /* If the file is not being truncated to a cluster boundary, the
+     contents of the partial cluster following the end of the file
+     must be zeroed in case it ever becomes accessible again because
+     of subsequent file growth.  */
+  offset = length & (bytes_per_cluster - 1);
+  if (offset > 0)
+    {
+      diskfs_node_rdwr (node, (void *)zerocluster, length, bytes_per_cluster - offset,
+                        1, 0, 0);
+      diskfs_file_update (node, 1);
+    }
+
+  rwlock_writer_lock (&node->dn->alloc_lock);
+
+  /* Update the size on disk; if we crash, we'll loose.  */
+  node->dn_stat.st_size = length;
+  node->dn_set_mtime = 1;
+  node->dn_set_ctime = 1;
+  diskfs_node_update (node, 1);
+
+  err = diskfs_catch_exception ();
+  if (!err)
+    {
+      fat_truncate_node(node, round_cluster(length) >> log2_bytes_per_cluster);
+      node->allocsize = round_cluster(length);
+    }
+  diskfs_end_catch_exception ();
+
+  node->dn_set_mtime = 1;
+  node->dn_set_ctime = 1;
+  node->dn_stat_dirty = 1;
+
+  rwlock_writer_unlock (&node->dn->alloc_lock);
+  
+  return err;
+}
+
+error_t
+diskfs_S_file_get_storage_info (struct protid *cred,
+				mach_port_t **ports,
+				mach_msg_type_name_t *ports_type,
+				mach_msg_type_number_t *num_ports,
+				int **ints, mach_msg_type_number_t *num_ints,
+				loff_t **offsets,
+				mach_msg_type_number_t *num_offsets,
+				char **data, mach_msg_type_number_t *data_len)
+{
+  /* XXX */
+  return EOPNOTSUPP;
+}
+
+/* Free node NP; the on disk copy has already been synced with
+   diskfs_node_update (where NP->dn_stat.st_mode was 0).  It's
+   mode used to be OLD_MODE.  */
+void
+diskfs_free_node (struct node *np, mode_t old_mode)
+{
+  assert (!diskfs_readonly);
+
+  vi_free(np->dn->inode);
+}
+
+/* The user must define this function.  Allocate a new node to be of
+   mode MODE in locked directory DP (don't actually set the mode or
+   modify the dir, that will be done by the caller); the user
+   responsible for the request can be identified with CRED.  Set *NP
+   to be the newly allocated node.  */
+error_t
+diskfs_alloc_node (struct node *dir, mode_t mode, struct node **node)
+{
+  error_t err;
+  ino_t inum;
+  inode_t inode;
+  struct node *np;
+  
+  assert (!diskfs_readonly);
+
+  err = vi_new((struct vi_key) {0,1} /* XXX not allocated yet */, &inum, &inode);
+  if (err)
+    return err;
+
+  err = diskfs_cached_lookup (inum, &np);
+  if (err)
+    return err;
+
+  *node = np;
+  return 0;
+}
diff --git a/fatfs/main.c b/fatfs/main.c
new file mode 100644
index 00000000..800e58b4
--- /dev/null
+++ b/fatfs/main.c
@@ -0,0 +1,265 @@
+/* main.c - FAT filesystem.
+   Copyright (C) 1997, 1998, 1999, 2002 Free Software Foundation, Inc.
+   Written by Thomas Bushnell, n/BSG and Marcus Brinkmann.
+
+   This file is part of the GNU Hurd.
+
+   The GNU Hurd is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   The GNU Hurd is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA. */
+
+#include <string.h>
+#include <error.h>
+#include <argp.h>
+#include <argz.h>
+#include <limits.h>
+
+#include <version.h>
+#include "fatfs.h"
+
+struct node *diskfs_root_node;
+
+struct store *store = 0;
+struct store_parsed *store_parsed = 0;
+char *diskfs_disk_name = 0;
+
+char *diskfs_server_name = "fatfs";
+char *diskfs_server_version = HURD_VERSION;
+char *diskfs_extra_version = "GNU Hurd";
+int diskfs_synchronous = 0;
+
+int diskfs_link_max = 1;
+int diskfs_name_max = FAT_NAME_MAX;
+int diskfs_maxsymlinks = 8;     /* XXX */
+
+/* This filesystem is not capable of writing yet.  */
+int diskfs_readonly = 1, diskfs_hard_readonly = 1;
+
+/* Handy source of zeroes.  */
+vm_address_t zerocluster;
+
+struct dirrect dr_root_node;
+
+/* The UID and GID for all files in the filesystem.  */
+uid_t default_fs_uid;
+gid_t default_fs_gid;
+uid_t fs_uid;
+gid_t fs_gid;
+
+/* fatfs specific options.  */
+static const struct argp_option options[] =
+  {
+    { "uid", 'U', "uid", 0, "Default uid for files" },
+    { "gid", 'G', "gid", 0, "Default gid for files" },
+    { 0 }
+  };
+
+static error_t
+parse_opt (int key, char *arg, struct argp_state *state)
+{
+  switch (key)
+    {
+    case 'U':
+      if (arg)
+	fs_uid = atoi (arg);
+      refresh_node_stats ();
+      break;
+    case 'G':
+      if (arg)
+	fs_gid = atoi (arg);
+      refresh_node_stats ();
+      break;
+    case ARGP_KEY_INIT:
+      state->child_inputs[0] = state->input;
+      break;
+    case ARGP_KEY_SUCCESS:
+      break;
+    default:
+      return ARGP_ERR_UNKNOWN;
+    }
+  
+  return 0;
+}
+
+/* Add our startup arguments to the standard diskfs set.  */
+static const struct argp_child startup_children[] =
+ { { &diskfs_store_startup_argp }, { 0 } };
+static struct argp startup_argp =
+  { options, parse_opt, 0, 0, startup_children };
+
+/* Similarly at runtime.  */
+static const struct argp_child runtime_children[] =
+ { { &diskfs_std_runtime_argp }, { 0 } };
+static struct argp runtime_argp =
+  { options, parse_opt, 0, 0, runtime_children };
+
+struct argp *diskfs_runtime_argp = (struct argp *) &runtime_argp;
+
+
+/* Override the standard diskfs routine so we can add our own
+   output.  */
+error_t
+diskfs_append_args (char **argz, unsigned *argz_len)
+{
+  error_t err;
+  char buf[100];
+
+  /* Get the standard things.  */
+  err = diskfs_append_std_options (argz, argz_len);
+
+  if (!err && fs_uid != default_fs_uid)
+    {
+      snprintf (buf, sizeof buf, "--uid=%d", fs_uid);
+      err = argz_add (argz, argz_len, buf);
+    }
+
+  if (!err && fs_gid != default_fs_gid)
+    {
+      snprintf (buf, sizeof buf, "--gid=%d", fs_gid);
+      err = argz_add (argz, argz_len, buf);
+    }
+
+  if (! err)
+    err = store_parsed_append_args (store_parsed, argz, argz_len);
+
+  return err;
+}
+
+
+/* Fetch the root node.  */
+static void
+fetch_root ()
+{
+  error_t err;
+  ino_t inum;
+  inode_t inode;
+
+  memset (&dr_root_node, 0, sizeof(struct dirrect));
+
+  /* Fill root directory entry.  XXX Should partially be in fat.c  */
+  dr_root_node.attribute = FAT_DIR_ATTR_DIR;
+  if (fat_type == FAT32)
+    {
+      /* FAT12/16: There is no such thing as a start cluster, because
+	 the whole root dir is in a special region after the FAT.  The
+	 start cluster of the root node is undefined.  */
+      dr_root_node.first_cluster_high[1]
+	= sblock->compat.fat32.root_cluster[3];
+      dr_root_node.first_cluster_high[0]
+	= sblock->compat.fat32.root_cluster[2];
+      dr_root_node.first_cluster_low[1] = sblock->compat.fat32.root_cluster[1];
+      dr_root_node.first_cluster_low[0] = sblock->compat.fat32.root_cluster[0];
+    }
+
+  /* Determine size of the directory (different for fat12/16 vs 32).  */
+  switch (fat_type)
+    {
+    case FAT12:
+    case FAT16:
+      write_dword(dr_root_node.file_size, nr_of_root_dir_sectors
+		  << log2_bytes_per_sector);
+      break;
+
+    case FAT32:
+      {
+	/* Extend the cluster chain of the root directory and calculate
+	   file_size based on that.  */
+	cluster_t rootdir;
+	int cs = 0;
+
+	rootdir = (cluster_t) *sblock->compat.fat32.root_cluster;
+	while (rootdir != FAT_EOC)
+	  {
+	    fat_get_next_cluster (rootdir, &rootdir);
+	    cs++;
+	  }
+	write_dword (dr_root_node.file_size, cs << log2_bytes_per_cluster);
+      }
+      break;
+
+    default:
+      assert(!"don't know how to set size of root dir");
+    };
+
+  /* The magic vi_key {0, 1} for the root directory is distinguished
+     from the vi_zero_key (in the dir_offset value) as well as all
+     normal virtual inode keys (in the dir_inode value).  Enter the
+     disknode into the inode table.  */
+  err = vi_new ((struct vi_key) {0, 1}, &inum, &inode);
+  assert_perror (err);
+
+  /* Allocate a node for the root directory disknode in
+     diskfs_root_node.  */
+  if (!err)
+    err = diskfs_cached_lookup (inum, &diskfs_root_node);
+
+  assert_perror (err);
+
+  mutex_unlock (&diskfs_root_node->lock);
+}
+
+
+int
+main (int argc, char **argv)
+{
+  mach_port_t bootstrap;
+
+  default_fs_uid = getuid ();
+  default_fs_gid = getgid ();
+  fs_uid = default_fs_uid;
+  fs_gid = default_fs_gid;
+
+  /* Initialize the diskfs library, parse arguments, and open the
+     store.  This starts the first diskfs thread for us.  */
+  store = diskfs_init_main (&startup_argp, argc, argv, &store_parsed,
+			    &bootstrap);
+
+  fat_read_sblock ();
+
+  create_fat_pager ();
+
+  zerocluster = (vm_address_t) mmap (0, bytes_per_cluster, PROT_READ|PROT_WRITE,
+				     MAP_ANON, 0, 0);
+
+  fetch_root ();
+
+  diskfs_startup_diskfs (bootstrap, 0);
+
+  cthread_exit (0);
+
+  return 0;
+}
+
+
+/* Nothing to do for read-only medium.  */
+error_t
+diskfs_reload_global_state ()
+{
+  return 0;
+}
+
+
+error_t
+diskfs_set_hypermetadata (int wait, int clean)
+{
+  return 0;
+}
+
+
+void
+diskfs_readonly_changed (int readonly)
+{
+  /* We should never get here because we set diskfs_hard_readonly above.  */
+  abort ();
+}
+
diff --git a/fatfs/pager.c b/fatfs/pager.c
new file mode 100644
index 00000000..606dc4d8
--- /dev/null
+++ b/fatfs/pager.c
@@ -0,0 +1,1019 @@
+/* pager.c - Pager for fatfs.
+   Copyright (C) 1997, 1999, 2002 Free Software Foundation, Inc.
+   Written by Thomas Bushnell, n/BSG and Marcus Brinkmann.
+
+   This file is part of the GNU Hurd.
+
+   The GNU Hurd is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   The GNU Hurd is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA. */
+
+#include <string.h>
+#include <hurd/store.h>
+#include "fatfs.h"
+
+/* A ports bucket to hold pager ports.  */
+struct port_bucket *pager_bucket;
+
+/* Mapped image of the FAT.  */
+void *fat_image;
+
+spin_lock_t node_to_page_lock = SPIN_LOCK_INITIALIZER;
+
+#ifdef DONT_CACHE_MEMORY_OBJECTS
+#define MAY_CACHE 0
+#else
+#define MAY_CACHE 1
+#endif
+
+#define STAT_INC(field) /* nop */0
+
+#define MAX_FREE_PAGE_BUFS 32
+
+static spin_lock_t free_page_bufs_lock = SPIN_LOCK_INITIALIZER;
+static void *free_page_bufs = 0;
+static int num_free_page_bufs = 0;
+
+/* Returns a single page page-aligned buffer.  */
+static void *
+get_page_buf ()
+{
+  void *buf;
+
+  spin_lock (&free_page_bufs_lock);
+
+  buf = free_page_bufs;
+  if (buf == 0)
+    {
+      spin_unlock (&free_page_bufs_lock);
+      buf = mmap (0, vm_page_size, PROT_READ|PROT_WRITE, MAP_ANON, 0, 0);
+      if (buf == (void *) -1)
+        buf = 0;
+    }
+  else
+    {
+      free_page_bufs = *(void **)buf;
+      num_free_page_bufs--;
+      spin_unlock (&free_page_bufs_lock);
+    }
+
+  return buf;
+}
+
+/* Frees a block returned by get_page_buf.  */
+static void
+free_page_buf (void *buf)
+{
+  spin_lock (&free_page_bufs_lock);
+  if (num_free_page_bufs < MAX_FREE_PAGE_BUFS)
+    {
+      *(void **)buf = free_page_bufs;
+      free_page_bufs = buf;
+      num_free_page_bufs++;
+      spin_unlock (&free_page_bufs_lock);
+    }
+  else
+    {
+      spin_unlock (&free_page_bufs_lock);
+      munmap (buf, vm_page_size);
+    }
+}
+
+/* Find the location on disk of page OFFSET in NODE.  Return the disk
+   cluster in CLUSTER. If *LOCK is 0, then it a reader
+   lock is aquired on NODE's ALLOC_LOCK before doing anything, and left
+   locked after return -- even if an error is returned.  0 on success or an
+   error code otherwise is returned.  */
+static error_t
+find_cluster (struct node *node, vm_offset_t offset,
+	      cluster_t *cluster, struct rwlock **lock)
+{
+  error_t err;
+
+  if (!*lock)
+    {
+      *lock = &node->dn->alloc_lock;
+      rwlock_reader_lock (*lock);
+    }
+
+  if (offset + bytes_per_cluster > node->allocsize)
+    return EIO;
+
+  err = fat_getcluster (node, offset >> log2_bytes_per_cluster, 0, cluster);
+
+  return err;
+}
+
+/* Read one page for the root dir pager at offset PAGE, into BUF.  This
+   may need to select several filesystem sectors to satisfy one page.
+   Assumes that fat_type is FAT12 or FAT16, and that vm_page_size is a
+   power of two multiple of bytes_per_sector (which happens to be true).
+*/
+static error_t
+root_dir_pager_read_page (vm_offset_t page, void **buf, int *writelock)
+{
+  error_t err;
+  daddr_t addr;
+  int overrun = 0;
+  size_t read = 0;
+
+  *writelock = 0;
+
+  if (page >= diskfs_root_node->allocsize)
+    {
+      return EIO;
+    }
+  
+  rwlock_reader_lock(&diskfs_root_node->dn->alloc_lock);
+
+  addr = first_root_dir_byte + page;
+  if (page + vm_page_size > diskfs_root_node->allocsize)
+    overrun = page + vm_page_size - diskfs_root_node->allocsize;
+
+  err = store_read (store, addr >> store->log2_block_size,
+		    vm_page_size, (void **) buf, &read);
+  if (!err && read != vm_page_size)
+    err = EIO;
+  
+  rwlock_reader_unlock (&diskfs_root_node->dn->alloc_lock);
+
+  if (overrun)
+    bzero ((void *) *buf + vm_page_size - overrun, overrun);
+
+  return err;
+}
+
+/* Read one page for the pager backing NODE at offset PAGE, into BUF.  This
+   may need to select only a part of a filesystem block to satisfy one page.
+   Assumes that bytes_per_cluster is a power of two multiple of vm_page_size.
+*/
+static error_t
+file_pager_read_small_page (struct node *node, vm_offset_t page,
+			    void **buf, int *writelock)
+{
+  error_t err;
+  struct rwlock *lock = NULL;
+  cluster_t cluster;
+  size_t read = 0;
+
+  *writelock = 0;
+
+  if (page >= node->allocsize)
+    {
+      return EIO;
+    }
+
+  err = find_cluster (node, page, &cluster, &lock);
+
+  if (!err)
+    {
+      err = store_read (store,
+			(fat_first_cluster_byte(cluster) +
+			 (page % bytes_per_cluster)) >> store->log2_block_size,
+			vm_page_size, (void **) buf, &read);
+      if (read != vm_page_size)
+	err = EIO;
+    }
+
+  if (lock)
+    rwlock_reader_unlock (lock);
+
+  return err;
+}
+
+/* Read one page for the pager backing NODE at offset PAGE, into BUF.  This
+   may need to read several filesystem blocks to satisfy one page, and tries
+   to consolidate the i/o if possible.
+   Assumes that vm_page_size is a power of two multiple of bytes_per_cluster.
+*/
+static error_t
+file_pager_read_huge_page (struct node *node, vm_offset_t page,
+			   void **buf, int *writelock)
+{
+  error_t err;
+  int offs = 0;
+  struct rwlock *lock = NULL;
+  int left = vm_page_size;
+  cluster_t pending_clusters = 0;
+  int num_pending_clusters = 0;
+
+  /* Read the NUM_PENDING_CLUSTERS cluster in PENDING_CLUSTERS, into the buffer
+     pointed to by BUF (allocating it if necessary) at offset OFFS.  OFFS in
+     adjusted by the amount read, and NUM_PENDING_CLUSTERS is zeroed.  Any read
+     error is returned.  */
+  error_t do_pending_reads ()
+    {
+      if (num_pending_clusters > 0)
+        {
+          size_t dev_block = fat_first_cluster_byte(pending_clusters) >> store->log2_block_size;
+          size_t amount = num_pending_clusters << log2_bytes_per_cluster;
+	  /* The buffer we try to read into; on the first read, we pass in a
+	     size of zero, so that the read is guaranteed to allocate a new
+	     buffer, otherwise, we try to read directly into the tail of the
+	     buffer we've already got.  */
+	  void *new_buf = *buf + offs;
+	  size_t new_len = offs == 0 ? 0 : vm_page_size - offs;
+
+          STAT_INC (file_pagein_reads);
+	  
+	  err = store_read (store, dev_block, amount, &new_buf, &new_len);
+	  if (err)
+	    return err;
+	  else if (amount != new_len)
+	    return EIO;
+
+	  if (new_buf != *buf + offs)
+	    {
+	      /* The read went into a different buffer than the one we
+		 passed. */
+	      if (offs == 0)
+		/* First read, make the returned page be our buffer.  */
+		*buf = new_buf;
+	      else
+		/* We've already got some buffer, so copy into it.  */
+		{
+		  memcpy (*buf + offs, new_buf, new_len);
+                  free_page_buf (new_buf); /* Return NEW_BUF to our pool.  */
+                  STAT_INC (file_pagein_freed_bufs);
+		}
+	    }
+	  
+	  offs += new_len;
+	  num_pending_clusters = 0;
+	}
+
+      return 0;
+    }
+
+  STAT_INC (file_pageins);
+ 
+  *writelock = 0;
+
+  if (page >= node->allocsize)
+    {
+      err = EIO;
+      left = 0;
+    }
+  else if (page + left > node->allocsize)
+      left = node->allocsize - page;
+
+  while (left > 0)
+    {
+      cluster_t cluster;
+
+      err = find_cluster (node, page, &cluster, &lock);
+      if (err)
+        break;
+
+      if (cluster != pending_clusters + num_pending_clusters)
+        {
+          err = do_pending_reads ();
+          if (err)
+            break;
+          pending_clusters = cluster;
+        }
+
+      num_pending_clusters++;
+      
+      page += bytes_per_cluster;
+      left -= bytes_per_cluster;
+    }
+
+  if (!err && num_pending_clusters > 0)
+    err = do_pending_reads();
+
+  if (lock)
+    rwlock_reader_unlock (lock);
+
+  return err;
+}
+
+struct pending_clusters
+  {
+    /* The cluster number of the first of the clusters.  */
+    cluster_t cluster;
+    /* How many clusters we have.  */
+    loff_t num;
+    /* A (page-aligned) buffer pointing to the data we're dealing with.  */
+    void *buf;
+    /* And an offset into BUF.  */
+    int offs;
+};
+
+/* Write the any pending clusters in PC.  */
+static error_t
+pending_clusters_write (struct pending_clusters *pc)
+{
+  if (pc->num > 0)
+    {
+      error_t err;
+      size_t dev_block = fat_first_cluster_byte(pc->cluster) >> store->log2_block_size;
+      size_t length = pc->num << log2_bytes_per_cluster, amount;
+
+      if (pc->offs > 0)
+        /* Put what we're going to write into a page-aligned buffer.  */
+        {
+          void *page_buf = get_page_buf ();
+          memcpy ((void *) page_buf, pc->buf + pc->offs, length);
+          err = store_write (store, dev_block, page_buf, length, &amount);
+          free_page_buf (page_buf);
+        }
+      else
+        err = store_write (store, dev_block, pc->buf, length, &amount);
+      if (err)
+        return err;
+      else if (amount != length)
+        return EIO;
+
+      pc->offs += length;
+      pc->num = 0;
+    }
+
+  return 0;
+}
+
+static void
+pending_clusters_init (struct pending_clusters *pc, void *buf)
+{
+  pc->buf = buf;
+  pc->cluster = 0;
+  pc->num = 0;
+  pc->offs = 0;
+}
+
+/* Add the disk cluster CLUSTER to the list of destination disk clusters pending in
+   PC.  */
+static error_t
+pending_clusters_add (struct pending_clusters *pc, cluster_t cluster)
+{
+  if (cluster != pc->cluster + pc->num)
+    {
+      error_t err = pending_clusters_write (pc);
+      if (err)
+        return err;
+      pc->cluster = cluster;
+    }
+  pc->num++;
+  return 0;
+}
+
+/* Write one page for the pager backing NODE, at offset PAGE, into BUF.  This
+   may need to write several filesystem blocks to satisfy one page, and tries
+   to consolidate the i/o if possible.
+   Assumes that vm_page_size is a power of two multiple of bytes_per_cluster. 
+*/
+static error_t
+file_pager_write_huge_page (struct node *node, vm_offset_t offset, void *buf)
+{
+  error_t err = 0;
+  struct pending_clusters pc;
+  struct rwlock *lock = &node->dn->alloc_lock;
+  cluster_t cluster;
+  int left = vm_page_size;
+
+  pending_clusters_init (&pc, buf);
+
+  /* Holding NODE->dn->alloc_lock effectively locks NODE->allocsize,
+     at least for the cases we care about: pager_unlock_page,
+     diskfs_grow and diskfs_truncate.  */
+  rwlock_reader_lock (&node->dn->alloc_lock);
+
+  if (offset >= node->allocsize)
+    left = 0;
+  else if (offset + left > node->allocsize)
+    left = node->allocsize - offset;
+
+  STAT_INC (file_pageouts);
+
+  while (left > 0)
+    {
+      err = find_cluster (node, offset, &cluster, &lock);
+      if (err)
+        break;
+      pending_clusters_add (&pc, cluster);
+      offset += bytes_per_cluster;
+      left -= bytes_per_cluster;
+    }
+
+  if (!err)
+    pending_clusters_write (&pc);
+
+  rwlock_reader_unlock (&node->dn->alloc_lock);
+
+  return err;
+}
+
+/* Write one page for the root dir pager, at offset OFFSET, into BUF.  This
+   may need to write several filesystem blocks to satisfy one page, and tries
+   to consolidate the i/o if possible.
+   Assumes that fat_type is FAT12 or FAT16 and that vm_page_size is a
+   power of two multiple of bytes_per_sector.
+*/
+static error_t
+root_dir_pager_write_page (vm_offset_t offset, void *buf)
+{
+  error_t err;
+  daddr_t addr;
+  size_t length;
+  size_t write = 0;
+
+  if (offset >= diskfs_root_node->allocsize)
+    return 0;
+
+  /* Holding NODE->dn->alloc_lock effectively locks NODE->allocsize,
+     at least for the cases we care about: pager_unlock_page,
+     diskfs_grow and diskfs_truncate.  */
+  rwlock_reader_lock (&diskfs_root_node->dn->alloc_lock);
+
+  addr = first_root_dir_byte + offset;
+
+  if (offset + vm_page_size > diskfs_root_node->allocsize)
+    length = diskfs_root_node->allocsize - offset;
+  else
+    length = vm_page_size;
+
+  err = store_write (store, addr >> store->log2_block_size, (void **) buf,
+		     length, &write);
+  if (!err && write != length)
+    err = EIO;
+
+  rwlock_reader_unlock (&diskfs_root_node->dn->alloc_lock);
+
+  return err;
+}
+
+/* Write one page for the pager backing NODE, at offset OFFSET, into BUF.  This
+   may need to write several filesystem blocks to satisfy one page, and tries
+   to consolidate the i/o if possible.
+   Assumes that bytes_per_cluster is a power of two multiple of vm_page_size.
+*/
+static error_t
+file_pager_write_small_page (struct node *node, vm_offset_t offset, void *buf)
+{
+  error_t err;
+  struct rwlock *lock = NULL;
+  cluster_t cluster;
+  size_t write = 0;
+
+  if (offset >= node->allocsize)
+    return 0;
+
+  /* Holding NODE->dn->alloc_lock effectively locks NODE->allocsize,
+     at least for the cases we care about: pager_unlock_page,
+     diskfs_grow and diskfs_truncate.  */
+  rwlock_reader_lock (&node->dn->alloc_lock);
+
+  err = find_cluster (node, offset, &cluster, &lock);
+
+  if (!err)
+    {
+      err = store_write (store,
+			 (fat_first_cluster_byte(cluster) +
+			  (offset % bytes_per_cluster)) >> store->log2_block_size,
+			  (void **) buf, vm_page_size, &write);
+      if (write != vm_page_size)
+	err = EIO;
+    }
+
+  if (lock)
+    rwlock_reader_unlock (lock);
+
+  return err;
+}
+
+static error_t
+fat_pager_read_page (vm_offset_t page, void **buf, int *writelock)
+{
+  error_t err;
+  size_t length = vm_page_size, read = 0;
+  vm_size_t fat_end = bytes_per_sector * sectors_per_fat;
+
+  if (page + vm_page_size > fat_end)
+    length = fat_end - page;
+
+  page += first_fat_sector * bytes_per_sector;
+  err = store_read (store, page >> store->log2_block_size, length, buf, &read);
+  if (read != length)
+    return EIO;
+  if (!err && length != vm_page_size)
+    memset ((void *)(*buf + length), 0, vm_page_size - length);
+
+  *writelock = 0;
+
+  return err;
+}
+
+static error_t
+fat_pager_write_page (vm_offset_t page, void *buf)
+{
+  error_t err = 0;
+  size_t length = vm_page_size, amount;
+  vm_size_t fat_end = bytes_per_sector * sectors_per_fat;
+
+  if (page + vm_page_size > fat_end)
+    length = fat_end - page;
+
+  page += first_fat_sector * bytes_per_sector;
+  err = store_write (store, page >> store->log2_block_size,
+		     buf, length, &amount);
+  if (!err && length != amount)
+    err = EIO;
+
+  return err;
+}
+
+/* Satisfy a pager read request for either the disk pager or file pager
+   PAGER, to the page at offset PAGE into BUF.  WRITELOCK should be set if
+   the pager should make the page writeable.  */
+error_t
+pager_read_page (struct user_pager_info *pager, vm_offset_t page,
+                 vm_address_t *buf, int *writelock)
+{
+  if (pager->type == FAT)
+    return fat_pager_read_page (page, (void **)buf, writelock);
+  else
+    {
+      if (pager->node == diskfs_root_node
+	  && (fat_type == FAT12 || fat_type == FAT16))
+	return root_dir_pager_read_page (page, (void **)buf, writelock);
+      else
+	{
+	  if (bytes_per_cluster < vm_page_size)
+	    return file_pager_read_huge_page (pager->node, page,
+					      (void **)buf, writelock);
+	  else
+	    return file_pager_read_small_page (pager->node, page,
+					       (void **)buf, writelock);
+	}
+    }
+}
+
+/* Satisfy a pager write request for either the disk pager or file pager
+   PAGER, from the page at offset PAGE from BUF.  */
+error_t
+pager_write_page (struct user_pager_info *pager, vm_offset_t page,
+                  vm_address_t buf)
+{
+  if (pager->type == FAT)
+    return fat_pager_write_page (page, (void *)buf);
+  else
+    {
+      if (pager->node == diskfs_root_node
+	  && (fat_type == FAT12 || fat_type == FAT16))
+	return root_dir_pager_write_page (page, (void *)buf);
+      else
+	{
+	  if (bytes_per_cluster < vm_page_size)
+	    return file_pager_write_huge_page (pager->node, page,
+					       (void *)buf);
+	  else
+	    return file_pager_write_small_page (pager->node, page,
+						(void *)buf);
+	}
+    }
+}
+
+/* Make page PAGE writable, at least up to ALLOCSIZE.  */
+error_t
+pager_unlock_page (struct user_pager_info *pager,
+		   vm_offset_t page)
+{
+  /* All pages are writeable. The disk pages anyway, and the file
+     pages because blocks are directly allocated in diskfs_grow.  */
+  return 0;
+}
+
+/* Grow the disk allocated to locked node NODE to be at least SIZE
+   bytes, and set NODE->allocsize to the actual allocated size.  (If
+   the allocated size is already SIZE bytes, do nothing.)  CRED
+   identifies the user responsible for the call.  Note that this will
+   only be called for real files, so there is no need to be careful
+   about the root dir node on FAT12/16.  */
+error_t
+diskfs_grow (struct node *node, loff_t size, struct protid *cred)
+{
+  diskfs_check_readonly ();
+  assert (!diskfs_readonly);
+  
+  if (size > node->allocsize)
+    {
+      error_t err = 0;
+      loff_t old_size;
+      volatile loff_t new_size;
+      volatile cluster_t end_cluster;
+      cluster_t new_end_cluster;
+      struct disknode *dn = node->dn;
+
+      rwlock_writer_lock (&dn->alloc_lock);
+
+      old_size = node->allocsize;
+      new_size = ((size + bytes_per_cluster - 1) >> log2_bytes_per_cluster)
+						 << log2_bytes_per_cluster;
+
+      /* The first unallocated clusters after the old and new ends of
+         the file, respectively.  */
+      end_cluster = old_size >> log2_bytes_per_cluster;
+      new_end_cluster = new_size >> log2_bytes_per_cluster;
+
+      if (new_end_cluster > end_cluster)
+        {
+	  err = diskfs_catch_exception ();
+	  while (!err && end_cluster < new_end_cluster)
+	    {
+	      cluster_t disk_cluster;
+	      err = fat_getcluster (node, end_cluster++, 1, &disk_cluster);
+	    }
+	  diskfs_end_catch_exception ();
+
+	  if (err)
+	    /* Reflect how much we allocated successfully.  */
+	    new_size = (end_cluster - 1) >> log2_bytes_per_cluster;
+	}
+      
+      STAT_INC (file_grows);
+
+      node->allocsize = new_size;
+
+      rwlock_writer_unlock (&dn->alloc_lock);
+
+      return err;
+    }
+  else
+    return 0;
+}
+
+/* This syncs a single file (NODE) to disk.  Wait for all I/O to
+   complete if WAIT is set.  NODE->lock must be held.  */
+void
+diskfs_file_update (struct node *node, int wait)
+{
+  struct pager *pager;
+
+  spin_lock (&node_to_page_lock);
+  pager = node->dn->pager;
+  if (pager)
+    ports_port_ref (pager);
+  spin_unlock (&node_to_page_lock);
+
+  if (pager)
+    {
+      pager_sync (pager, wait);
+      ports_port_deref (pager);
+    }
+
+  diskfs_node_update (node, wait);
+}
+
+/* Invalidate any pager data associated with NODE.  */
+void
+flush_node_pager (struct node *node)
+{
+  struct pager *pager;
+  struct disknode *dn = node->dn;
+
+  spin_lock (&node_to_page_lock);
+  pager = dn->pager;
+  if (pager)
+    ports_port_ref (pager);
+  spin_unlock (&node_to_page_lock);
+
+  if (pager)
+    {
+      pager_flush (pager, 1);
+      ports_port_deref (pager);
+    }
+}
+
+/* Return in *OFFSET and *SIZE the minimum valid address the pager
+   will accept and the size of the object.  */
+inline error_t
+pager_report_extent (struct user_pager_info *pager,
+                     vm_address_t *offset, vm_size_t *size)
+{
+  assert (pager->type == FAT || pager->type == FILE_DATA);
+
+  *offset = 0;
+
+  if (pager->type == FAT)
+    *size = bytes_per_sector * sectors_per_fat;
+  else
+    *size = pager->node->allocsize;
+
+  return 0;
+}
+
+/* This is called when a pager is being deallocated after all extant
+   send rights have been destroyed.  */
+void
+pager_clear_user_data (struct user_pager_info *upi)
+{
+  if (upi->type == FILE_DATA)
+    {
+      struct pager *pager;
+      
+      spin_lock (&node_to_page_lock);
+      pager = upi->node->dn->pager;
+      if (pager && pager_get_upi (pager) == upi)
+	upi->node->dn->pager = 0;
+      spin_unlock (&node_to_page_lock);
+      
+      diskfs_nrele_light (upi->node);
+    }
+  
+  free (upi);
+}
+
+/* This will be called when the ports library wants to drop weak
+   references.  The pager library creates no weak references itself.
+   If the user doesn't either, then it's OK for this function to do
+   nothing.  */
+void
+pager_dropweak (struct user_pager_info *p __attribute__ ((unused)))
+{
+}
+
+/* Create the disk pager.  */
+void
+create_fat_pager (void)
+{
+  struct user_pager_info *upi = malloc (sizeof (struct user_pager_info));
+  upi->type = FAT;
+  pager_bucket = ports_create_bucket ();
+  diskfs_start_disk_pager (upi, pager_bucket, MAY_CACHE,
+			   bytes_per_sector * sectors_per_fat,
+			   &fat_image);
+}
+
+/* Call this to create a FILE_DATA pager and return a send right.
+   NODE must be locked.  */
+mach_port_t
+diskfs_get_filemap (struct node *node, vm_prot_t prot)
+{
+  mach_port_t right;
+  
+  assert (S_ISDIR (node->dn_stat.st_mode)
+	  || S_ISREG (node->dn_stat.st_mode)
+	  || (S_ISLNK (node->dn_stat.st_mode)));
+  
+  spin_lock (&node_to_page_lock);
+  do
+    {
+      struct pager *pager = node->dn->pager;
+      if (pager)
+	{
+          /* Because PAGER is not a real reference, this might be
+             nearly deallocated.  If that's so, then the port right
+             will be null.  In that case, clear here and loop.  The
+             deallocation will complete separately. */
+          right = pager_get_port (pager);
+          if (right == MACH_PORT_NULL)
+            node->dn->pager = 0;
+          else
+            pager_get_upi (pager)->max_prot |= prot;
+        }
+      else
+        {
+          struct user_pager_info *upi =
+            malloc (sizeof (struct user_pager_info));
+          upi->type = FILE_DATA;
+          upi->node = node;
+          upi->max_prot = 0;
+          diskfs_nref_light (node);
+          node->dn->pager =
+            pager_create (upi, pager_bucket, MAY_CACHE,
+                          MEMORY_OBJECT_COPY_DELAY);
+          if (node->dn->pager == 0)
+            {
+              diskfs_nrele_light (node);
+              free (upi);
+              spin_unlock (&node_to_page_lock);
+              return MACH_PORT_NULL;
+            }
+
+          right = pager_get_port (node->dn->pager);
+          ports_port_deref (node->dn->pager);
+        }
+    }
+  while (right == MACH_PORT_NULL);
+  spin_unlock (&node_to_page_lock);
+
+  mach_port_insert_right (mach_task_self (), right, right,
+                          MACH_MSG_TYPE_MAKE_SEND);
+
+  return right;
+}
+
+/* Call this when we should turn off caching so that unused memory
+   object ports get freed.  */
+void
+drop_pager_softrefs (struct node *node)
+{
+  struct pager *pager;
+
+  spin_lock (&node_to_page_lock);
+  pager = node->dn->pager;
+  if (pager)
+    ports_port_ref (pager);
+  spin_unlock (&node_to_page_lock);
+
+  if (MAY_CACHE && pager)
+    pager_change_attributes (pager, 0, MEMORY_OBJECT_COPY_DELAY, 0);
+  if (pager)
+    ports_port_deref (pager);
+}
+
+/* Call this when we should turn on caching because it's no longer
+   important for unused memory object ports to get freed.  */
+void
+allow_pager_softrefs (struct node *node)
+{
+  struct pager *pager;
+
+  spin_lock (&node_to_page_lock);
+  pager = node->dn->pager;
+  if (pager)
+    ports_port_ref (pager);
+  spin_unlock (&node_to_page_lock);
+
+  if (MAY_CACHE && pager)
+    pager_change_attributes (pager, 1, MEMORY_OBJECT_COPY_DELAY, 0);
+  if (pager)
+    ports_port_deref (pager);
+}
+
+/* Call this to find out the struct pager * corresponding to the
+   FILE_DATA pager of inode IP.  This should be used *only* as a
+   subsequent argument to register_memory_fault_area, and will be
+   deleted when the kernel interface is fixed.  NODE must be
+   locked.  */
+struct pager *
+diskfs_get_filemap_pager_struct (struct node *node)
+{
+  /* This is safe because pager can't be cleared; there must be an
+     active mapping for this to be called. */
+  return node->dn->pager;
+}
+
+/* Shutdown all the pagers (except the disk pager). */
+void
+diskfs_shutdown_pager ()
+{
+  error_t shutdown_one (void *v_p)
+    {
+      struct pager *p = v_p;
+      if (p != diskfs_disk_pager)
+        pager_shutdown (p);
+      return 0;
+    }
+
+  write_all_disknodes ();
+
+  ports_bucket_iterate (pager_bucket, shutdown_one);
+
+  pager_sync (diskfs_disk_pager, 1);
+
+  /* Despite the name of this function, we never actually shutdown the
+     disk pager, just make sure it's synced. */
+}
+
+/* Sync all the pagers. */
+void
+diskfs_sync_everything (int wait)
+{
+  error_t sync_one (void *v_p)
+    {
+      struct pager *p = v_p;
+      if (p != diskfs_disk_pager)
+        pager_sync (p, wait);
+      return 0;
+    }
+
+  write_all_disknodes ();
+  ports_bucket_iterate (pager_bucket, sync_one);
+  pager_sync (diskfs_disk_pager, wait);
+}
+
+static void
+disable_caching ()
+{
+  error_t block_cache (void *arg)
+    {
+      struct pager *p = arg;
+
+      pager_change_attributes (p, 0, MEMORY_OBJECT_COPY_DELAY, 1);
+      return 0;
+    }
+
+  /* Loop through the pagers and turn off caching one by one,
+     synchronously.  That should cause termination of each pager.  */
+  ports_bucket_iterate (pager_bucket, block_cache);
+}
+	  
+static void
+enable_caching ()
+{
+  error_t enable_cache (void *arg)
+    {
+      struct pager *p = arg;
+      struct user_pager_info *upi = pager_get_upi (p);
+
+      pager_change_attributes (p, 1, MEMORY_OBJECT_COPY_DELAY, 0);
+
+      /* It's possible that we didn't have caching on before, because
+	 the user here is the only reference to the underlying node
+	 (actually, that's quite likely inside this particular
+	 routine), and if that node has no links.  So dinkle the node
+	 ref counting scheme here, which will cause caching to be
+	 turned off, if that's really necessary.  */
+      if (upi->type == FILE_DATA)
+	{
+	  diskfs_nref (upi->node);
+	  diskfs_nrele (upi->node);
+	}
+
+      return 0;
+    }
+
+  ports_bucket_iterate (pager_bucket, enable_cache);
+}
+	    
+/* Tell diskfs if there are pagers exported, and if none, then
+   prevent any new ones from showing up.  */
+int
+diskfs_pager_users ()
+{
+  int npagers = ports_count_bucket (pager_bucket);
+
+  if (npagers <= 1)
+    return 0;
+
+  if (MAY_CACHE)
+    {
+      disable_caching ();
+      
+      /* Give it a second; the kernel doesn't actually shutdown
+	 immediately.  XXX */
+      sleep (1);
+      
+      npagers = ports_count_bucket (pager_bucket);
+      if (npagers <= 1)
+	return 0;
+
+      /* Darn, there are actual honest users.  Turn caching back on,
+	 and return failure.  */
+      enable_caching ();
+    }
+  
+  ports_enable_bucket (pager_bucket);
+
+  return 1;
+}
+
+/* Return the bitwise or of the maximum prot parameter (the second arg
+   to diskfs_get_filemap) for all active user pagers.  */
+vm_prot_t
+diskfs_max_user_pager_prot ()
+{
+  vm_prot_t max_prot = 0;
+  int npagers = ports_count_bucket (pager_bucket);
+
+  if (npagers > 1)
+    /* More than just the disk pager.  */
+    {
+      error_t add_pager_max_prot (void *v_p)
+        {
+          struct pager *p = v_p;
+          struct user_pager_info *upi = pager_get_upi (p);
+          if (upi->type == FILE_DATA)
+            max_prot |= upi->max_prot;
+          /* Stop iterating if MAX_PROT is as filled as it is going to
+	     get.  */
+          return (max_prot
+		  == (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE)) ? 1 : 0;
+        }
+
+      disable_caching ();               /* Make any silly pagers go away.  */
+
+      /* Give it a second; the kernel doesn't actually shutdown
+         immediately.  XXX */
+      sleep (1);
+
+      ports_bucket_iterate (pager_bucket, add_pager_max_prot);
+
+      enable_caching ();
+    }
+
+  ports_enable_bucket (pager_bucket);
+
+  return max_prot;
+}
diff --git a/fatfs/virt-inode.c b/fatfs/virt-inode.c
new file mode 100644
index 00000000..d7c990d6
--- /dev/null
+++ b/fatfs/virt-inode.c
@@ -0,0 +1,235 @@
+/* Virtual Inode management routines
+   Copyright (C) 2002 Free Software Foundation, Inc.
+   Written by Marcus Brinkmann.
+
+   This file is part of the GNU Hurd.
+
+   The GNU Hurd is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   The GNU Hurd is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA. */
+
+/* TODO: Improve NEW by keeping a bitmap of free inodes.
+   TODO: Improve RLOOKUP by keeping an open hash for keys (need to change
+   CHANGE and FREE, too).
+   TODO: Improve FREE by keeping the highest inode in use and keep it
+   up-to-date. When a table page can be freed, do so.  */
+
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>
+#include <spin-lock.h>
+#include "virt-inode.h"
+
+/* Each virtual inode contains the UNIQUE key it belongs to,
+   which must not be zero.  */
+
+vi_key_t vi_zero_key = {0, 0};
+
+struct v_inode
+{
+  vi_key_t key;
+};
+
+/* All inodes are stored in a table by their index number - 1.
+   Decrementing by one is necessary because inode numbers start from 1,
+   but our table is zero based.  */
+
+#define LOG2_TABLE_PAGE_SIZE 10
+#define TABLE_PAGE_SIZE (1 << LOG2_TABLE_PAGE_SIZE)
+
+struct table_page
+{
+  struct table_page *next;
+
+  struct v_inode vi[TABLE_PAGE_SIZE];
+};
+
+struct table_page *inode_table;
+
+spin_lock_t inode_table_lock = SPIN_LOCK_INITIALIZER;
+
+/* See vi_new and vi_rlookup.  */
+error_t
+_vi_new(vi_key_t key, ino_t *inode, inode_t *v_inode)
+{
+  struct table_page *table = inode_table;
+  struct table_page *prev_table = 0;
+  int page = 0;
+  int offset = 0;
+
+  while (table && memcmp(&vi_zero_key, &table->vi[offset].key, sizeof(vi_key_t)))
+    {
+      offset++;
+      if (offset == TABLE_PAGE_SIZE)
+	{
+	  offset = 0;
+	  page++;
+	  prev_table = table;
+	  table = table->next;
+	}
+    }
+
+  if (table)
+    {
+      table->vi[offset].key = key;
+      /* See above for rationale of increment. */
+      *inode = (page << LOG2_TABLE_PAGE_SIZE) + offset + 1;
+      *v_inode = &table->vi[offset];
+    }
+  else
+    {
+      struct table_page **pagep;
+
+      if (prev_table)
+	pagep = &prev_table->next;
+      else
+	pagep = &inode_table;
+      *pagep = (struct table_page *) malloc (sizeof (struct table_page));
+      if (!*pagep)
+	{
+	  return ENOSPC;
+	}
+      memset (*pagep, 0, sizeof (struct table_page));
+      (*pagep)->vi[0].key = key;
+      /* See above for rationale of increment. */
+      *inode = (page << LOG2_TABLE_PAGE_SIZE) + 1;
+      *v_inode = &(*pagep)->vi[0];
+    }
+
+  return 0;
+}
+
+/* Allocate a new inode number INODE for KEY and return it as well as
+   the virtual inode V_INODE. Return 0 on success, otherwise an error
+   value (ENOSPC).  */
+error_t
+vi_new(vi_key_t key, ino_t *inode, inode_t *v_inode)
+{
+  error_t err;
+
+  assert (memcmp(&vi_zero_key, &key, sizeof (vi_key_t)));
+
+  spin_lock (&inode_table_lock);
+  err = _vi_new(key, inode, v_inode);
+  spin_unlock (&inode_table_lock);
+
+  return err;
+}
+
+/* Get the key for virtual inode V_INODE.  */
+vi_key_t
+vi_key(inode_t v_inode)
+{
+  return v_inode->key;
+}
+
+/* Get the inode V_INODE belonging to inode number INODE.
+   Returns 0 if this inode number is free.  */
+inode_t
+vi_lookup(ino_t inode)
+{
+  struct table_page *table = inode_table;
+  /* See above for rationale of decrement. */
+  int page = (inode - 1) >> LOG2_TABLE_PAGE_SIZE;
+  int offset = (inode - 1) & (TABLE_PAGE_SIZE - 1);
+  inode_t v_inode = 0;
+
+  spin_lock (&inode_table_lock);
+
+  while (table && page > 0)
+    {
+      page--;
+      table = table->next;
+    }
+
+  if (table)
+    v_inode = &table->vi[offset];
+
+  spin_unlock (&inode_table_lock);
+
+  return v_inode;
+}
+
+/* Get the inode number and virtual inode belonging to key KEY.
+   Returns 0 on success and EINVAL if no inode is found for KEY and
+   CREATE is false. Otherwise, if CREATE is true, allocate new inode.  */
+error_t
+vi_rlookup(vi_key_t key, ino_t *inode, inode_t *v_inode, int create)
+{
+  error_t err = 0;
+  struct table_page *table = inode_table;
+  int page = 0;
+  int offset = 0;
+
+  assert (memcmp(&vi_zero_key, &key, sizeof (vi_key_t)));
+
+  spin_lock (&inode_table_lock);
+
+  while (table && memcmp(&table->vi[offset].key, &key, sizeof (vi_key_t)))
+    {
+      offset++;
+      if (offset == TABLE_PAGE_SIZE)
+	{
+	  offset = 0;
+	  page++;
+	  table = table->next;
+	}
+    }
+
+  if (table)
+    {
+      /* See above for rationale of increment. */
+      *inode = (page << LOG2_TABLE_PAGE_SIZE) + offset + 1;
+      *v_inode = &table->vi[offset];
+    }
+  else
+    {
+      if (create)
+	err = _vi_new (key, inode, v_inode);
+      else
+	err = EINVAL;
+    }
+
+  spin_unlock (&inode_table_lock);
+
+  return err;
+}
+
+/* Change the key of virtual inode V_INODE to KEY and return the old
+   key. */
+vi_key_t vi_change(inode_t v_inode, vi_key_t key)
+{
+  vi_key_t okey = v_inode->key;
+
+  assert (memcmp(&vi_zero_key, &key, sizeof (vi_key_t)));
+  v_inode->key = key;
+  return okey;
+}
+
+/* Release virtual inode V_INODE, freeing the inode number.  Return
+   the key.  */
+vi_key_t vi_free(inode_t v_inode)
+{
+  vi_key_t key;
+  spin_lock (&inode_table_lock);
+  key = v_inode->key;
+  v_inode->key = vi_zero_key;
+  spin_unlock (&inode_table_lock);
+  return key;
+}
+
+
+
+
+
+
diff --git a/fatfs/virt-inode.h b/fatfs/virt-inode.h
new file mode 100644
index 00000000..5b889d23
--- /dev/null
+++ b/fatfs/virt-inode.h
@@ -0,0 +1,69 @@
+/* virt-inode.h - Public interface for the virtual inode management routines.
+   Copyright (C) 2002 Free Software Foundation, Inc.
+   Written by Marcus Brinkmann.
+
+   This file is part of the GNU Hurd.
+
+   The GNU Hurd is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   The GNU Hurd is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA. */
+
+#ifndef VIRT_INODE_H
+#define VIRT_INODE_H
+
+#include <errno.h>
+#include <dirent.h>
+
+/* Define struct vi_key to match your needs.  It is passed by copy,
+   so don't make it too huge.  Equality is tested with memcpy, because
+   C == operator doesn't work on structs.  */
+
+struct vi_key
+{
+  ino_t dir_inode;
+  int dir_offset;
+};
+
+typedef struct vi_key vi_key_t;
+
+extern vi_key_t vi_zero_key;
+
+typedef struct v_inode *inode_t;
+
+/* Allocate a new inode number INODE for KEY and return it as well as
+   the virtual inode V_INODE. Return 0 on success, otherwise an error
+   value (ENOSPC).  */
+error_t vi_new(vi_key_t key, ino_t *inode, inode_t *v_inode);
+
+/* Get the key for virtual inode V_INODE. */
+vi_key_t vi_key(inode_t v_inode);
+
+/* Get the inode V_INODE belonging to inode number INODE.
+   Returns 0 if this inode number is free.  */
+inode_t vi_lookup(ino_t inode);
+
+/* Get the inode number and virtual inode belonging to key KEY.
+   Returns 0 on success and EINVAL if no inode is found for KEY and
+   CREATE is false. Otherwise, if CREATE is true, allocate a new
+   inode.  */
+error_t vi_rlookup(vi_key_t key, ino_t *inode, inode_t *v_inode, int create);
+
+/* Change the key of virtual inode V_INODE to KEY and return the old
+   key. */
+vi_key_t vi_change(inode_t v_inode, vi_key_t key);
+
+/* Release virtual inode V_INODE, freeing the inode number.  Return
+   the key.  */
+vi_key_t vi_free(inode_t v_inode);
+
+#endif
-- 
cgit v1.2.3