/* Node state and file contents for tmpfs. Copyright (C) 2000,01,02 Free Software Foundation, Inc. This file is part of the GNU Hurd. The GNU Hurd is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. The GNU Hurd is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with the GNU Hurd; see the file COPYING. If not, write to the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ #include "tmpfs.h" #include <stddef.h> #include <stdlib.h> #include <fcntl.h> #include <hurd/hurd_types.h> #include <hurd/store.h> #include "default_pager_U.h" #include "libdiskfs/fs_S.h" unsigned int num_files; static unsigned int gen; /* all_nodes is a list of all nodes. Access to all_nodes and all_nodes_nr_items is protected by all_nodes_lock. Every node in all_nodes carries a light reference. When we are asked to give up that light reference, we reacquire our lock momentarily to check whether someone else reacquired a reference. */ struct node *all_nodes; static size_t all_nodes_nr_items; pthread_rwlock_t all_nodes_lock = PTHREAD_RWLOCK_INITIALIZER; error_t diskfs_alloc_node (struct node *dp, mode_t mode, struct node **npp) { struct disknode *dn; dn = calloc (1, sizeof *dn); if (dn == 0) return ENOSPC; if (round_page (get_used () + sizeof *dn) / vm_page_size > tmpfs_page_limit) { pthread_rwlock_unlock (&all_nodes_lock); free (dn); return ENOSPC; } dn->gen = gen++; __atomic_add_fetch (&num_files, 1, __ATOMIC_RELAXED); adjust_used (sizeof *dn); dn->type = IFTODT (mode & S_IFMT); return diskfs_cached_lookup ((ino_t) (uintptr_t) dn, npp); } void diskfs_free_node (struct node *np, mode_t mode) { switch (np->dn->type) { case DT_REG: if (np->dn->u.reg.memobj != MACH_PORT_NULL) { vm_deallocate (mach_task_self (), np->dn->u.reg.memref, 4096); mach_port_deallocate (mach_task_self (), np->dn->u.reg.memobj); } break; case DT_DIR: assert (np->dn->u.dir.entries == 0); break; case DT_LNK: free (np->dn->u.lnk); break; } pthread_rwlock_wrlock (&all_nodes_lock); *np->dn->hprevp = np->dn->hnext; if (np->dn->hnext != 0) np->dn->hnext->dn->hprevp = np->dn->hprevp; all_nodes_nr_items -= 1; pthread_rwlock_unlock (&all_nodes_lock); free (np->dn); np->dn = 0; __atomic_sub_fetch (&num_files, 1, __ATOMIC_RELAXED); adjust_used (-sizeof *np->dn); } void diskfs_node_norefs (struct node *np) { if (np->dn != 0) { /* We don't bother to do this in diskfs_write_disknode, since it only ever matters here. The node state goes back into the `struct disknode' while it has no associated diskfs node. */ np->dn->size = np->dn_stat.st_size; np->dn->mode = np->dn_stat.st_mode; np->dn->nlink = np->dn_stat.st_nlink; np->dn->uid = np->dn_stat.st_uid; np->dn->author = np->dn_stat.st_author; np->dn->gid = np->dn_stat.st_gid; np->dn->atime = np->dn_stat.st_atim; np->dn->mtime = np->dn_stat.st_mtim; np->dn->ctime = np->dn_stat.st_ctim; np->dn->flags = np->dn_stat.st_flags; switch (np->dn->type) { case DT_REG: assert (np->allocsize % vm_page_size == 0); np->dn->u.reg.allocpages = np->allocsize / vm_page_size; break; case DT_CHR: case DT_BLK: np->dn->u.chr = np->dn_stat.st_rdev; break; } } free (np); } static void recompute_blocks (struct node *np) { struct disknode *const dn = np->dn; struct stat *const st = &np->dn_stat; st->st_blocks = sizeof *dn + dn->translen; switch (dn->type) { case DT_REG: np->allocsize = dn->u.reg.allocpages * vm_page_size; st->st_blocks += np->allocsize; break; case DT_LNK: st->st_blocks += st->st_size + 1; break; case DT_CHR: case DT_BLK: st->st_rdev = dn->u.chr; break; case DT_DIR: st->st_blocks += dn->size; break; } st->st_blocks = (st->st_blocks + 511) / 512; } /* Fetch inode INUM, set *NPP to the node structure; gain one user reference and lock the node. */ error_t diskfs_cached_lookup (ino_t inum, struct node **npp) { struct disknode *dn = (void *) (uintptr_t) inum; struct node *np; assert (npp); pthread_rwlock_rdlock (&all_nodes_lock); if (dn->hprevp != 0) /* There is already a node. */ goto gotit; else /* Create the new node. */ { struct stat *st; pthread_rwlock_unlock (&all_nodes_lock); np = diskfs_make_node (dn); np->cache_id = (ino_t) (uintptr_t) dn; pthread_rwlock_wrlock (&all_nodes_lock); if (dn->hprevp != NULL) { /* We lost a race. */ diskfs_nrele (np); goto gotit; } dn->hnext = all_nodes; if (dn->hnext) dn->hnext->dn->hprevp = &dn->hnext; dn->hprevp = &all_nodes; all_nodes = np; all_nodes_nr_items += 1; diskfs_nref_light (np); pthread_rwlock_unlock (&all_nodes_lock); st = &np->dn_stat; memset (st, 0, sizeof *st); st->st_fstype = FSTYPE_MEMFS; st->st_fsid = getpid (); st->st_blksize = vm_page_size; st->st_ino = (ino_t) (uintptr_t) dn; st->st_gen = dn->gen; st->st_size = dn->size; st->st_mode = dn->mode; st->st_nlink = dn->nlink; st->st_uid = dn->uid; st->st_author = dn->author; st->st_gid = dn->gid; st->st_atim = dn->atime; st->st_mtim = dn->mtime; st->st_ctim = dn->ctime; st->st_flags = dn->flags; st->st_rdev = 0; np->allocsize = 0; recompute_blocks (np); } pthread_mutex_lock (&np->lock); *npp = np; return 0; gotit: np = *dn->hprevp; assert (np->dn == dn); assert (*dn->hprevp == np); diskfs_nref (np); pthread_rwlock_unlock (&all_nodes_lock); pthread_mutex_lock (&np->lock); *npp = np; return 0; } error_t diskfs_node_iterate (error_t (*fun) (struct node *)) { error_t err = 0; size_t num_nodes; struct node *node, **node_list, **p; pthread_rwlock_rdlock (&all_nodes_lock); /* We must copy everything from the hash table into another data structure to avoid running into any problems with the hash-table being modified during processing (normally we delegate access to hash-table with all_nodes_lock, but we can't hold this while locking the individual node locks). */ num_nodes = all_nodes_nr_items; p = node_list = alloca (num_nodes * sizeof (struct node *)); for (node = all_nodes; node != 0; node = node->dn->hnext) { *p++ = node; /* We acquire a hard reference for node, but without using diskfs_nref. We do this so that diskfs_new_hardrefs will not get called. */ refcounts_ref (&node->refcounts, NULL); } pthread_rwlock_unlock (&all_nodes_lock); p = node_list; while (num_nodes-- > 0) { node = *p++; if (!err) { pthread_mutex_lock (&node->lock); err = (*fun) (node); pthread_mutex_unlock (&node->lock); } diskfs_nrele (node); } return err; } /* The user must define this function. Node NP has some light references, but has just lost its last hard references. Take steps so that if any light references can be freed, they are. NP is locked as is the pager refcount lock. This function will be called after diskfs_lost_hardrefs. */ void diskfs_try_dropping_softrefs (struct node *np) { pthread_rwlock_wrlock (&all_nodes_lock); if (np->cache_id != 0) { /* Check if someone reacquired a reference. */ struct references result; refcounts_references (&np->refcounts, &result); if (result.hard > 0) { /* A reference was reacquired. It's fine, we didn't touch anything yet. */ pthread_rwlock_unlock (&all_nodes_lock); return; } /* Just let go of the weak reference. The node will be removed from all_nodes in diskfs_free_node. */ np->cache_id = 0; diskfs_nrele_light (np); } pthread_rwlock_unlock (&all_nodes_lock); } /* The user must define this funcction. Node NP has some light references but has just lost its last hard reference. NP is locked. */ void diskfs_lost_hardrefs (struct node *np) { } /* The user must define this function. Node NP has just acquired a hard reference where it had none previously. It is thus now OK again to have light references without real users. NP is locked. */ void diskfs_new_hardrefs (struct node *np) { } error_t diskfs_get_translator (struct node *np, char **namep, u_int *namelen) { *namelen = np->dn->translen; if (*namelen == 0) return 0; *namep = malloc (*namelen); if (*namep == 0) return ENOMEM; memcpy (*namep, np->dn->trans, *namelen); return 0; } error_t diskfs_set_translator (struct node *np, const char *name, u_int namelen, struct protid *cred) { char *new; if (namelen == 0) { free (np->dn->trans); new = 0; np->dn_stat.st_mode &= ~S_IPTRANS; } else { new = realloc (np->dn->trans, namelen); if (new == 0) return ENOSPC; memcpy (new, name, namelen); np->dn_stat.st_mode |= S_IPTRANS; } adjust_used (namelen - np->dn->translen); np->dn->trans = new; np->dn->translen = namelen; recompute_blocks (np); return 0; } static error_t create_symlink_hook (struct node *np, const char *target) { assert (np->dn->u.lnk == 0); np->dn_stat.st_size = strlen (target); if (np->dn_stat.st_size > 0) { const size_t size = np->dn_stat.st_size + 1; np->dn->u.lnk = malloc (size); if (np->dn->u.lnk == 0) return ENOSPC; memcpy (np->dn->u.lnk, target, size); np->dn->type = DT_LNK; adjust_used (size); recompute_blocks (np); } return 0; } error_t (*diskfs_create_symlink_hook)(struct node *np, const char *target) = create_symlink_hook; static error_t read_symlink_hook (struct node *np, char *target) { memcpy (target, np->dn->u.lnk, np->dn_stat.st_size + 1); return 0; } error_t (*diskfs_read_symlink_hook)(struct node *np, char *target) = read_symlink_hook; void diskfs_write_disknode (struct node *np, int wait) { } void diskfs_file_update (struct node *np, int wait) { diskfs_node_update (np, wait); } error_t diskfs_node_reload (struct node *node) { return 0; } /* The user must define this function. Truncate locked node NP to be SIZE bytes long. (If NP is already less than or equal to SIZE bytes long, do nothing.) If this is a symlink (and diskfs_shortcut_symlink is set) then this should clear the symlink, even if diskfs_create_symlink_hook stores the link target elsewhere. */ error_t diskfs_truncate (struct node *np, off_t size) { if (np->dn->type == DT_LNK) { free (np->dn->u.lnk); adjust_used (size - np->dn_stat.st_size); np->dn->u.lnk = 0; np->dn_stat.st_size = size; return 0; } if (np->allocsize <= size) return 0; assert (np->dn->type == DT_REG); if (default_pager == MACH_PORT_NULL) return EIO; np->dn_stat.st_size = size; off_t set_size = size; size = round_page (size); if (np->dn->u.reg.memobj != MACH_PORT_NULL) { error_t err = default_pager_object_set_size (np->dn->u.reg.memobj, set_size); if (err == MIG_BAD_ID) /* This is an old default pager. We have no way to truncate the memory object. Note that the behavior here will be wrong in two ways: user accesses past the end won't fault; and, more importantly, later growing the file won't zero the contents past the size we just supposedly truncated to. For proper behavior, use a new default pager. */ return 0; if (err) return err; } /* Otherwise it never had any real contents. */ adjust_used (size - np->allocsize); np->dn_stat.st_blocks += (size - np->allocsize) / 512; np->allocsize = size; return 0; } /* The user must define this function. Grow the disk allocated to locked node NP to be at least SIZE bytes, and set NP->allocsize to the actual allocated size. (If the allocated size is already SIZE bytes, do nothing.) CRED identifies the user responsible for the call. */ error_t diskfs_grow (struct node *np, off_t size, struct protid *cred) { assert (np->dn->type == DT_REG); if (np->allocsize >= size) return 0; off_t set_size = size; size = round_page (size); if (round_page (get_used () + size - np->allocsize) / vm_page_size > tmpfs_page_limit) return ENOSPC; if (default_pager == MACH_PORT_NULL) return EIO; if (np->dn->u.reg.memobj != MACH_PORT_NULL) { /* Increase the limit the memory object will allow to be accessed. */ error_t err = default_pager_object_set_size (np->dn->u.reg.memobj, set_size); if (err == MIG_BAD_ID) /* Old default pager, never limited it. */ err = 0; if (err) return err; } adjust_used (size - np->allocsize); np->dn_stat.st_blocks += (size - np->allocsize) / 512; np->allocsize = size; return 0; } mach_port_t diskfs_get_filemap (struct node *np, vm_prot_t prot) { error_t err; if (np->dn->type != DT_REG) { errno = EOPNOTSUPP; /* ? */ return MACH_PORT_NULL; } if (default_pager == MACH_PORT_NULL) { errno = EIO; return MACH_PORT_NULL; } /* We don't bother to create the memory object until the first time we need it (i.e. first mapping or i/o). This way we might have a clue what size it's going to be beforehand, so we can tell the default pager how big to make its bitmaps. This is just an optimization for the default pager; the memory object can be expanded at any time just by accessing more of it. (It also optimizes the case of empty files so we might never make a memory object at all.) */ if (np->dn->u.reg.memobj == MACH_PORT_NULL) { error_t err = default_pager_object_create (default_pager, &np->dn->u.reg.memobj, np->allocsize); if (err) { errno = err; return MACH_PORT_NULL; } assert (np->dn->u.reg.memobj != MACH_PORT_NULL); /* XXX we need to keep a reference to the object, or GNU Mach will terminate it when we release the map. */ np->dn->u.reg.memref = 0; vm_map (mach_task_self (), &np->dn->u.reg.memref, 4096, 0, 1, np->dn->u.reg.memobj, 0, 0, VM_PROT_NONE, VM_PROT_NONE, VM_INHERIT_NONE); assert_perror (err); } /* XXX always writable */ /* Add a reference for each call, the caller will deallocate it. */ err = mach_port_mod_refs (mach_task_self (), np->dn->u.reg.memobj, MACH_PORT_RIGHT_SEND, +1); assert_perror (err); return np->dn->u.reg.memobj; } /* The user must define this function. Return a `struct pager *' suitable for use as an argument to diskfs_register_memory_fault_area that refers to the pager returned by diskfs_get_filemap for node NP. NP is locked. */ struct pager * diskfs_get_filemap_pager_struct (struct node *np) { return 0; } /* We have no pager of our own, so there is no need to worry about users of it, or to shut it down. */ int diskfs_pager_users () { return 0; } void diskfs_shutdown_pager () { } /* The purpose of this is to decide that it's ok to make the fs read-only. Turning a temporary filesystem read-only seem pretty useless. */ vm_prot_t diskfs_max_user_pager_prot () { return VM_PROT_READ; /* Probable lie that lets us go read-only. */ } error_t diskfs_S_file_get_storage_info (struct protid *cred, mach_port_t **ports, mach_msg_type_name_t *ports_type, mach_msg_type_number_t *num_ports, int **ints, mach_msg_type_number_t *num_ints, off_t **offsets, mach_msg_type_number_t *num_offsets, char **data, mach_msg_type_number_t *data_len) { mach_port_t memobj = diskfs_get_filemap (cred->po->np, VM_PROT_ALL); if (memobj == MACH_PORT_NULL) return errno; assert (*num_ports >= 1); /* mig always gives us some */ *num_ports = 1; *ports_type = MACH_MSG_TYPE_MOVE_SEND; (*ports)[0] = (cred->po->openstat & O_RDWR) == O_RDWR ? memobj : MACH_PORT_NULL; assert (*num_offsets >= 2); /* mig always gives us some */ *num_offsets = 2; (*offsets)[0] = 0; (*offsets)[1] = cred->po->np->dn_stat.st_size; assert (*num_ints >= 6); /* mig always gives us some */ *num_ints = 6; (*ints)[0] = STORAGE_MEMORY; (*ints)[1] = (cred->po->openstat & O_WRITE) ? 0 : STORE_READONLY; (*ints)[2] = 1; /* block size */ (*ints)[3] = 1; /* 1 run in offsets list */ (*ints)[4] = 0; /* name len */ (*ints)[5] = 0; /* misc len */ *data_len = 0; return 0; }