diff options
Diffstat (limited to 'storeio')
-rw-r--r-- | storeio/dev.c | 414 | ||||
-rw-r--r-- | storeio/dev.h | 101 | ||||
-rw-r--r-- | storeio/io.c | 329 | ||||
-rw-r--r-- | storeio/open.c | 124 | ||||
-rw-r--r-- | storeio/open.h | 68 | ||||
-rw-r--r-- | storeio/pager.c | 264 | ||||
-rw-r--r-- | storeio/storeio.c | 328 |
7 files changed, 1628 insertions, 0 deletions
diff --git a/storeio/dev.c b/storeio/dev.c new file mode 100644 index 00000000..d9a4e083 --- /dev/null +++ b/storeio/dev.c @@ -0,0 +1,414 @@ +/* store `device' I/O + + Copyright (C) 1995, 1996 Free Software Foundation, Inc. + + Written by Miles Bader <miles@gnu.ai.mit.edu> + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2, or (at + your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include <hurd.h> +#include <assert.h> +#include <string.h> +#include <hurd/pager.h> +#include <hurd/store.h> + +#include "dev.h" + +/* These functions deal with the buffer used for doing non-block-aligned I/O. */ + +static inline int +dev_buf_is_active (struct dev *dev) +{ + return dev->buf_offs >= 0; +} + +/* Invalidate DEV's buffer, writing it to disk if necessary. */ +static error_t +dev_buf_discard (struct dev *dev) +{ + if (dev_buf_is_active (dev)) + { + if (dev->buf_dirty) + { + size_t amount; + struct store *store = dev->store; + error_t err = + store_write (store, dev->buf_offs >> store->log2_block_size, + dev->buf, store->block_size, &amount); + if (amount < store->block_size) + err = EIO; + if (err) + return err; + dev->buf_dirty = 0; + } + dev->buf_offs = -1; + } + return 0; +} + +/* Make DEV's buffer active, reading the block from DEV's store which + contains OFFS. */ +static error_t +dev_buf_fill (struct dev *dev, off_t offs) +{ + error_t err; + unsigned block_mask = dev->block_mask; + char *buf = dev->buf; + struct store *store = dev->store; + size_t buf_len = store->block_size; + + if (dev_buf_is_active (dev)) + if ((dev->buf_offs & ~block_mask) == (offs & ~block_mask)) + return 0; /* Correct block alredy in buffer. */ + else + { + err = dev_buf_discard (dev); + if (err) + return err; + } + + err = store_read (store, offs >> store->log2_block_size, store->block_size, + &buf, &buf_len); + if (err) + return err; + + if (buf != dev->buf) + { + vm_deallocate (mach_task_self (), + (vm_address_t)dev->buf, store->block_size); + dev->buf = buf; + } + + dev->buf_offs = offs & ~block_mask; + + return 0; +} + +/* Do an in-buffer partial-block I/O operation. */ +static error_t +dev_buf_rw (struct dev *dev, size_t buf_offs, size_t *io_offs, size_t *len, + inline error_t (*const buf_rw) (size_t buf_offs, + size_t io_offs, size_t len)) +{ + size_t block_size = dev->store->block_size; + + assert (dev_buf_is_active (dev)); + + if (buf_offs + *len >= block_size) + /* Only part of BUF lies within the buffer (or everything up + to the end of the block, in which case we want to flush + the buffer anyway). */ + { + size_t buf_len = block_size - buf_offs; + error_t err = (*buf_rw) (buf_offs, *io_offs, buf_len); + if (err) + return err; + *io_offs += buf_len; + *len -= buf_len; + return dev_buf_discard (dev); + } + else + /* All I/O is within the block. */ + { + error_t err = (*buf_rw) (buf_offs, *io_offs, *len); + if (err) + return err; + *io_offs += *len; + *len = 0; + return 0; + } +} + +/* Returns a pointer to a new device structure in DEV for the kernel device + NAME, with the given FLAGS. If BLOCK_SIZE is non-zero, it should be the + desired block size, and must be a multiple of the device block size. + If an error occurs, the error code is returned, otherwise 0. */ +error_t +dev_open (struct store_parsed *name, int flags, struct dev **dev) +{ + error_t err; + struct dev *new = malloc (sizeof (struct dev)); + + if (! new) + return ENOMEM; + + err = store_parsed_open (name, flags, 0, &new->store); + if (err) + { + free (new); + return err; + } + +#if 0 /* valloc doesn't work */ + new->buf = valloc (new->store->block_size); + if (new->buf == 0) +#else + if (vm_allocate (mach_task_self (), + (vm_address_t *)&new->buf, new->store->block_size, 1)) +#endif + { + store_free (new->store); + free (new); + return ENOMEM; + } + + new->buf_offs = -1; + rwlock_init (&new->io_lock); + new->owner = 0; + new->block_mask = (1 << new->store->log2_block_size) - 1; + new->pager = 0; + mutex_init (&new->pager_lock); + *dev = new; + + return 0; +} + +/* Free DEV and any resources it consumes. */ +void +dev_close (struct dev *dev) +{ + if (dev->pager != NULL) + pager_shutdown (dev->pager); + + dev_buf_discard (dev); + +#if 0 + free (dev->buf); +#else + vm_deallocate (mach_task_self (), + (vm_address_t)dev->buf, dev->store->block_size); +#endif + + store_free (dev->store); + + free (dev); +} + +/* Try and write out any pending writes to DEV. If WAIT is true, will wait + for any paging activity to cease. */ +error_t +dev_sync(struct dev *dev, int wait) +{ + error_t err; + + /* Sync any paged backing store. */ + if (dev->pager != NULL) + pager_sync (dev->pager, wait); + + rwlock_writer_lock (&dev->io_lock); + err = dev_buf_discard (dev); + rwlock_writer_unlock (&dev->io_lock); + + return err; +} + +/* Takes care of buffering I/O to/from DEV for a transfer at position OFFS, + length LEN; the amount of I/O sucessfully done is returned in AMOUNT. + BUF_RW is called to do I/O that's entirely inside DEV's internal buffer, + and RAW_RW to do I/O directly to DEV's store. */ +static inline error_t +buffered_rw (struct dev *dev, off_t offs, size_t len, size_t *amount, + inline error_t (* const buf_rw) (size_t buf_offs, + size_t io_offs, size_t len), + inline error_t (* const raw_rw) (off_t offs, + size_t io_offs, size_t len, + size_t *amount)) +{ + error_t err = 0; + unsigned block_mask = dev->block_mask; + unsigned block_size = dev->store->block_size; + size_t io_offs = 0; /* Offset within this I/O operation. */ + unsigned block_offs = offs & block_mask; /* Offset within a block. */ + + rwlock_writer_lock (&dev->io_lock); + + if (block_offs != 0) + /* The start of the I/O isn't block aligned. */ + { + err = dev_buf_fill (dev, offs); + if (! err) + err = dev_buf_rw (dev, block_offs, &io_offs, &len, buf_rw); + } + + if (!err && len > 0) + /* Now the I/O should be block aligned. */ + { + if (len >= block_size) + { + size_t amount; + err = dev_buf_discard (dev); + if (! err) + err = + (*raw_rw) (offs + io_offs, io_offs, len & ~block_mask, &amount); + if (! err) + { + io_offs += amount; + len -= amount; + } + } + if (len > 0 && len < block_size) + /* All full blocks were written successfully, so write + the tail end into the buffer. */ + { + err = dev_buf_fill (dev, offs + io_offs); + if (! err) + err = dev_buf_rw (dev, 0, &io_offs, &len, buf_rw); + } + } + + if (! err) + *amount = io_offs; + + rwlock_writer_unlock (&dev->io_lock); + + return err; +} + +/* Takes care of buffering I/O to/from DEV for a transfer at position OFFS, + length LEN, and direction DIR. BUF_RW is called to do I/O to/from data + buffered in DEV, and RAW_RW to do I/O directly to DEV's store. */ +static inline error_t +dev_rw (struct dev *dev, off_t offs, size_t len, size_t *amount, + inline error_t (* const buf_rw) (size_t buf_offs, + size_t io_offs, size_t len), + inline error_t (* const raw_rw) (off_t offs, + size_t io_offs, size_t len, + size_t *amount)) +{ + error_t err; + unsigned block_mask = dev->block_mask; + + if (offs < 0) + return EINVAL; + else if (offs > dev->store->size) + return EIO; + else if (offs + len > dev->store->size) + len = dev->store->size - offs; + + rwlock_reader_lock (&dev->io_lock); + if (dev_buf_is_active (dev) + || (offs & block_mask) != 0 || (len & block_mask) != 0) + /* Some non-aligned I/O has been done, or is needed, so we need to deal + with DEV's buffer, which means getting an exclusive lock. */ + { + /* Aquire a writer lock instead of a reader lock. Note that other + writers may have aquired the lock by the time we get it. */ + rwlock_reader_unlock (&dev->io_lock); + err = buffered_rw (dev, offs, len, amount, buf_rw, raw_rw); + } + else + /* Only block-aligned I/O is being done, so things are easy. */ + { + err = (*raw_rw) (offs, 0, len, amount); + rwlock_reader_unlock (&dev->io_lock); + } + + return err; +} + +/* Write LEN bytes from BUF to DEV, returning the amount actually written in + AMOUNT. If successful, 0 is returned, otherwise an error code is + returned. */ +error_t +dev_write (struct dev *dev, off_t offs, char *buf, size_t len, + size_t *amount) +{ + error_t buf_write (size_t buf_offs, size_t io_offs, size_t len) + { + bcopy (buf + io_offs, dev->buf + buf_offs, len); + dev->buf_dirty = 1; + return 0; + } + error_t raw_write (off_t offs, size_t io_offs, size_t len, size_t *amount) + { + struct store *store = dev->store; + return + store_write (store, offs >> store->log2_block_size, + buf + io_offs, len, amount); + } + + return dev_rw (dev, offs, len, amount, buf_write, raw_write); +} + +/* Read up to WHOLE_AMOUNT bytes from DEV, returned in BUF and LEN in the + with the usual mach memory result semantics. If successful, 0 is + returned, otherwise an error code is returned. */ +error_t +dev_read (struct dev *dev, off_t offs, size_t whole_amount, + char **buf, size_t *len) +{ + error_t err; + int allocated_buf = 0; + error_t ensure_buf () + { + error_t err; + if (*len < whole_amount) + { + err = vm_allocate (mach_task_self (), + (vm_address_t *)buf, whole_amount, 1); + if (! err) + allocated_buf = 1; + } + else + err = 0; + return err; + } + error_t buf_read (size_t buf_offs, size_t io_offs, size_t len) + { + error_t err = ensure_buf (); + if (! err) + bcopy (dev->buf + buf_offs, *buf + io_offs, len); + return err; + } + error_t raw_read (off_t offs, size_t io_offs, size_t len, size_t *amount) + { + struct store *store = dev->store; + off_t addr = offs >> store->log2_block_size; + if (len == whole_amount) + /* Just return whatever the device does. */ + return store_read (store, addr, len, buf, amount); + else + /* This read is returning less than the whole request, so we allocate + a buffer big enough to hold everything, in case we have to + coalesce multiple reads into a single return buffer. */ + { + error_t err = ensure_buf (); + if (! err) + { + char *_req_buf = *buf + io_offs, *req_buf = _req_buf; + size_t req_len = len; + err = store_read (store, addr, len, &req_buf, &req_len); + if (! err) + { + if (req_buf != _req_buf) + /* Copy from wherever the read put it. */ + { + bcopy (req_buf, _req_buf, req_len); + vm_deallocate (mach_task_self (), + (vm_address_t)req_buf, req_len); + } + *amount = req_len; + } + } + return err; + } + } + + err = dev_rw (dev, offs, whole_amount, len, buf_read, raw_read); + if (err && allocated_buf) + vm_deallocate (mach_task_self (), (vm_address_t)*buf, whole_amount); + + return err; +} diff --git a/storeio/dev.h b/storeio/dev.h new file mode 100644 index 00000000..8ef64090 --- /dev/null +++ b/storeio/dev.h @@ -0,0 +1,101 @@ +/* store `device' I/O + + Copyright (C) 1995, 1996 Free Software Foundation, Inc. + + Written by Miles Bader <miles@gnu.ai.mit.edu> + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2, or (at + your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#ifndef __DEV_H__ +#define __DEV_H__ + +#include <mach.h> +#include <device/device.h> +#include <rwlock.h> +#include <hurd/store.h> + +/* Information about a kernel device. */ +struct dev +{ + /* The device to which we're doing io. */ + struct store *store; + + /* A bitmask corresponding to the part of an offset that lies within a + device block. */ + unsigned block_mask; + + /* Lock to arbitrate I/O through this device. Block I/O can occur in + parallel, and requires only a reader-lock. + Non-block I/O is always serialized, and requires a writer-lock. */ + struct rwlock io_lock; + + /* Non-block I/O is buffered through BUF. BUF_OFFS is the device offset + corresponding to the start of BUF (which holds one block); if it is -1, + then BUF is inactive. */ + char *buf; + off_t buf_offs; + int buf_dirty; + + struct pager *pager; + struct mutex pager_lock; + + /* The current owner of the open device. For terminals, this affects + controlling terminal behavior (see term_become_ctty). For all objects + this affects old-style async IO. Negative values represent pgrps. This + has nothing to do with the owner of a file (as returned by io_stat, and + as used for various permission checks by filesystems). An owner of 0 + indicates that there is no owner. */ + pid_t owner; +}; + +/* Returns a pointer to a new device structure in DEV for the device + NAME, with the given FLAGS. If BLOCK_SIZE is non-zero, it should be the + desired block size, and must be a multiple of the device block size. + If an error occurs, the error code is returned, otherwise 0. */ +error_t dev_open (struct store_parsed *name, int flags, struct dev **dev); + +/* Free DEV and any resources it consumes. */ +void dev_close (struct dev *dev); + +/* Returns in MEMOBJ the port for a memory object backed by the storage on + DEV. Returns 0 or the error code if an error occurred. */ +error_t dev_get_memory_object(struct dev *dev, memory_object_t *memobj); + +/* Try to stop all paging activity on DEV, returning true if we were + successful. If NOSYNC is true, then we won't write back any (kernel) + cached pages to the device. */ +int dev_stop_paging (struct dev *dev, int nosync); + +/* Try and write out any pending writes to DEV. If WAIT is true, will wait + for any paging activity to cease. */ +error_t dev_sync (struct dev *dev, int wait); + +#ifdef MSG +char *brep(vm_address_t buf, vm_size_t len); +#endif + +/* Write LEN bytes from BUF to DEV, returning the amount actually written in + AMOUNT. If successful, 0 is returned, otherwise an error code is + returned. */ +error_t dev_write (struct dev *dev, off_t offs, char *buf, size_t len, + size_t *amount); + +/* Read up to AMOUNT bytes from DEV, returned in BUF and LEN in the with the + usual mach memory result semantics. If successful, 0 is returned, + otherwise an error code is returned. */ +error_t dev_read (struct dev *dev, off_t offs, size_t amount, + char **buf, size_t *len); + +#endif /* !__DEV_H__ */ diff --git a/storeio/io.c b/storeio/io.c new file mode 100644 index 00000000..f8067698 --- /dev/null +++ b/storeio/io.c @@ -0,0 +1,329 @@ +/* The hurd io interface to storeio + + Copyright (C) 1995, 1996 Free Software Foundation, Inc. + + Written by Miles Bader <miles@gnu.ai.mit.edu> + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2, or (at + your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include <hurd/trivfs.h> +#include <stdio.h> +#include <fcntl.h> + +#include "open.h" +#include "dev.h" + +/* Return objects mapping the data underlying this memory object. If + the object can be read then memobjrd will be provided; if the + object can be written then memobjwr will be provided. For objects + where read data and write data are the same, these objects will be + equal, otherwise they will be disjoint. Servers are permitted to + implement io_map but not io_map_cntl. Some objects do not provide + mapping; they will set none of the ports and return an error. Such + objects can still be accessed by io_read and io_write. */ +error_t +trivfs_S_io_map (struct trivfs_protid *cred, + memory_object_t *rd_obj, mach_msg_type_name_t *rd_type, + memory_object_t *wr_obj, mach_msg_type_name_t *wr_type) +{ + if (! cred) + return EOPNOTSUPP; + else + { + mach_port_t memobj; + struct open *open = (struct open *)cred->po->hook; + error_t err = dev_get_memory_object (open->dev, &memobj); + + if (!err) + { + if (cred->po->openmodes & O_READ) + { + *rd_obj = memobj; + *rd_type = MACH_MSG_TYPE_MOVE_SEND; + } + else + *rd_obj = MACH_PORT_NULL; + + if (cred->po->openmodes & O_WRITE) + { + *wr_obj = memobj; + *wr_type = MACH_MSG_TYPE_MOVE_SEND; + } + else + *wr_obj = MACH_PORT_NULL; + } + + return err; + } +} + +/* Read data from an IO object. If offset if -1, read from the object + maintained file pointer. If the object is not seekable, offset is + ignored. The amount desired to be read is in AMOUNT. */ +error_t +trivfs_S_io_read (struct trivfs_protid *cred, + mach_port_t reply, mach_msg_type_name_t reply_type, + vm_address_t *data, mach_msg_type_number_t *data_len, + off_t offs, mach_msg_type_number_t amount) +{ + if (! cred) + return EOPNOTSUPP; + else if (!(cred->po->openmodes & O_READ)) + return EBADF; + else + return open_read ((struct open *)cred->po->hook, + offs, amount, (char **)data, data_len); +} + +/* Tell how much data can be read from the object without blocking for + a "long time" (this should be the same meaning of "long time" used + by the nonblocking flag. */ +error_t +trivfs_S_io_readable (struct trivfs_protid *cred, + mach_port_t reply, mach_msg_type_name_t reply_type, + mach_msg_type_number_t *amount) +{ + if (! cred) + return EOPNOTSUPP; + else if (! (cred->po->openmodes & O_READ)) + return EINVAL; + else + { + struct open *open = (struct open *)cred->po->hook; + *amount = open->dev->store->size - open->offs; + return 0; + } +} + +/* Write data to an IO object. If offset is -1, write at the object + maintained file pointer. If the object is not seekable, offset is + ignored. The amount successfully written is returned in amount. A + given user should not have more than one outstanding io_write on an + object at a time; servers implement congestion control by delaying + responses to io_write. Servers may drop data (returning ENOBUFS) + if they recevie more than one write when not prepared for it. */ +error_t +trivfs_S_io_write (struct trivfs_protid *cred, + mach_port_t reply, mach_msg_type_name_t reply_type, + vm_address_t data, mach_msg_type_number_t data_len, + off_t offs, mach_msg_type_number_t *amount) +{ + if (! cred) + return EOPNOTSUPP; + else if (!(cred->po->openmodes & O_WRITE)) + return EBADF; + else + return open_write ((struct open *)cred->po->hook, + offs, (char *)data, data_len, amount); +} + +/* Change current read/write offset */ +error_t +trivfs_S_io_seek (struct trivfs_protid *cred, + mach_port_t reply, mach_msg_type_name_t reply_type, + off_t offs, int whence, off_t *new_offs) +{ + if (! cred) + return EOPNOTSUPP; + else + return open_seek ((struct open *)cred->po->hook, offs, whence, new_offs); +} + +/* SELECT_TYPE is the bitwise OR of SELECT_READ, SELECT_WRITE, and SELECT_URG. + Block until one of the indicated types of i/o can be done "quickly", and + return the types that are then available. */ +error_t +trivfs_S_io_select (struct trivfs_protid *cred, + mach_port_t reply, mach_msg_type_name_t reply_type, + int *type) +{ + if (! cred) + return EOPNOTSUPP; + else if (((*type & SELECT_READ) && !(cred->po->openmodes & O_READ)) + || ((*type & SELECT_WRITE) && !(cred->po->openmodes & O_WRITE))) + return EBADF; + else + *type &= ~SELECT_URG; + return 0; +} + +/* Truncate file. */ +error_t +trivfs_S_file_set_size (struct trivfs_protid *cred, + mach_port_t reply, mach_msg_type_name_t reply_type, + off_t size) +{ + if (! cred) + return EOPNOTSUPP; + else + return 0; +} + +/* These four routines modify the O_APPEND, O_ASYNC, O_FSYNC, and + O_NONBLOCK bits for the IO object. In addition, io_get_openmodes + will tell you which of O_READ, O_WRITE, and O_EXEC the object can + be used for. The O_ASYNC bit affects icky async I/O; good async + I/O is done through io_async which is orthogonal to these calls. */ + +error_t +trivfs_S_io_get_openmodes (struct trivfs_protid *cred, + mach_port_t reply, mach_msg_type_name_t reply_type, + int *bits) +{ + if (! cred) + return EOPNOTSUPP; + else + { + *bits = cred->po->openmodes; + return 0; + } +} + +error_t +trivfs_S_io_set_all_openmodes (struct trivfs_protid *cred, + mach_port_t reply, + mach_msg_type_name_t reply_type, + int mode) +{ + if (! cred) + return EOPNOTSUPP; + else + return 0; +} + +error_t +trivfs_S_io_set_some_openmodes (struct trivfs_protid *cred, + mach_port_t reply, + mach_msg_type_name_t reply_type, + int bits) +{ + if (! cred) + return EOPNOTSUPP; + else + return 0; +} + +error_t +trivfs_S_io_clear_some_openmodes (struct trivfs_protid *cred, + mach_port_t reply, + mach_msg_type_name_t reply_type, + int bits) +{ + if (! cred) + return EOPNOTSUPP; + else + return 0; +} + +/* Get/set the owner of the IO object. For terminals, this affects + controlling terminal behavior (see term_become_ctty). For all + objects this affects old-style async IO. Negative values represent + pgrps. This has nothing to do with the owner of a file (as + returned by io_stat, and as used for various permission checks by + filesystems). An owner of 0 indicates that there is no owner. */ +error_t +trivfs_S_io_get_owner (struct trivfs_protid *cred, + mach_port_t reply, + mach_msg_type_name_t reply_type, + pid_t *owner) +{ + if (! cred) + return EOPNOTSUPP; + else + { + struct open *open = (struct open *)cred->po->hook; + *owner = open->dev->owner; + return 0; + } +} + +error_t +trivfs_S_io_mod_owner (struct trivfs_protid *cred, + mach_port_t reply, mach_msg_type_name_t reply_type, + pid_t owner) +{ + if (! cred) + return EOPNOTSUPP; + else + { + struct open *open = (struct open *)cred->po->hook; + open->dev->owner = owner; + return 0; + } +} + +/* File syncing operations; these all do the same thing, sync the underlying + device. */ + +error_t +trivfs_S_file_sync (struct trivfs_protid *cred, + mach_port_t reply, mach_msg_type_name_t reply_type, + int wait) +{ + if (cred) + return dev_sync (((struct open *)cred->po->hook)->dev, wait); + else + return EOPNOTSUPP; +} + +error_t +trivfs_S_file_syncfs (struct trivfs_protid *cred, + mach_port_t reply, mach_msg_type_name_t reply_type, + int wait, int dochildren) +{ + if (! cred) + return dev_sync (((struct open *)cred->po->hook)->dev, wait); + else + return EOPNOTSUPP; +} + +error_t +trivfs_S_file_get_storage_info (struct trivfs_protid *cred, + mach_port_t reply, + mach_msg_type_name_t reply_type, + mach_port_t **ports, + mach_msg_type_name_t *ports_type, + mach_msg_type_number_t *num_ports, + int **ints, mach_msg_type_number_t *num_ints, + off_t **offsets, + mach_msg_type_number_t *num_offsets, + char **data, mach_msg_type_number_t *data_len) +{ + error_t err = 0; + struct store_enc enc; + + if (! cred) + return EOPNOTSUPP; + + store_enc_init (&enc, *ports, *num_ports, *ints, *num_ints, + *offsets, *num_offsets, *data, *data_len); + + err = store_encode (((struct open *)cred->po->hook)->dev->store, &enc); + if (! err) + { + *ports = enc.ports; + *num_ports = enc.num_ports; + *ints = enc.ints; + *num_ints = enc.num_ints; + *offsets = enc.offsets; + *num_offsets = enc.num_offsets; + *data = enc.data; + *data_len = enc.data_len; + + *ports_type = MACH_MSG_TYPE_COPY_SEND; + } + + return err; +} diff --git a/storeio/open.c b/storeio/open.c new file mode 100644 index 00000000..f27dc3ab --- /dev/null +++ b/storeio/open.c @@ -0,0 +1,124 @@ +/* Per-open information for storeio + + Copyright (C) 1995, 1996 Free Software Foundation, Inc. + + Written by Miles Bader <miles@gnu.ai.mit.edu> + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2, or (at + your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include <hurd.h> +#include <stdio.h> + +#include "open.h" +#include "dev.h" + +/* Returns a new per-open structure for the device DEV in OPEN. If an error + occurs, the error-code is returned, otherwise 0. */ +error_t +open_create (struct dev *dev, struct open **open) +{ + *open = malloc (sizeof (struct open)); + if (*open == NULL) + return ENOMEM; + + (*open)->dev = dev; + (*open)->offs = 0; + mutex_init (&(*open)->lock); + + return 0; +} + +/* Free OPEN and any resources it holds. */ +void +open_free (struct open *open) +{ + free (open); +} + +/* Writes up to LEN bytes from BUF to OPEN's device at device offset OFFS + (which may be ignored if the device doesn't support random access), + and returns the number of bytes written in AMOUNT. If no error occurs, + zero is returned, otherwise the error code is returned. */ +error_t +open_write (struct open *open, off_t offs, char *buf, size_t len, + vm_size_t *amount) +{ + error_t err; + if (offs < 0) + /* Use OPEN's offset. */ + { + mutex_lock (&open->lock); + err = dev_write (open->dev, open->offs, buf, len, amount); + if (! err) + open->offs += *amount; + mutex_unlock (&open->lock); + } + else + err = dev_write (open->dev, offs, buf, len, amount); + return err; +} + +/* Reads up to AMOUNT bytes from the device into BUF and LEN using the + standard mach out-array convention. If no error occurs, zero is returned, + otherwise the error code is returned. */ +error_t +open_read (struct open *open, off_t offs, size_t amount, + char **buf, vm_size_t *len) +{ + error_t err; + if (offs < 0) + /* Use OPEN's offset. */ + { + mutex_lock (&open->lock); + err = dev_read (open->dev, open->offs, amount, buf, len); + if (! err) + open->offs += *len; + mutex_unlock (&open->lock); + } + else + err = dev_read (open->dev, offs, amount, buf, len); + return err; +} + +/* Set OPEN's location to OFFS, interpreted according to WHENCE as by seek. + The new absolute location is returned in NEW_OFFS (and may not be the same + as OFFS). If no error occurs, zero is returned, otherwise the error code + is returned. */ +error_t +open_seek (struct open *open, off_t offs, int whence, off_t *new_offs) +{ + error_t err = 0; + + mutex_lock (&open->lock); + + switch (whence) + { + case SEEK_SET: + open->offs = offs; break; + case SEEK_CUR: + open->offs += offs; break; + case SEEK_END: + open->offs = open->dev->store->size - offs; break; + default: + err = EINVAL; + } + + if (! err) + *new_offs = open->offs; + + mutex_unlock (&open->lock); + + return err; +} diff --git a/storeio/open.h b/storeio/open.h new file mode 100644 index 00000000..efb2128d --- /dev/null +++ b/storeio/open.h @@ -0,0 +1,68 @@ +/* Per-open information for storeio + + Copyright (C) 1995, 1996 Free Software Foundation, Inc. + + Written by Miles Bader <miles@gnu.ai.mit.edu> + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2, or (at + your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#ifndef __OPEN_H__ +#define __OPEN_H__ + +#include "dev.h" + +/* ---------------------------------------------------------------- */ + +/* A structure describing a particular i/o stream on this device. */ +struct open +{ + /* The device that this an open on. */ + struct dev *dev; + + /* The per-open offset used for I/O operations that don't specify an + explicit offset. */ + off_t offs; + + /* A lock used to control write access to OFFS. */ + struct mutex lock; +}; + +/* Returns a new per-open structure for the device DEV in OPEN. If an error + occurs, the error-code is returned, otherwise 0. */ +error_t open_create (struct dev *dev, struct open **open); + +/* Free OPEN and any resources it holds. */ +void open_free (struct open *open); + +/* Writes up to LEN bytes from BUF to OPEN's device at device offset OFFS + (which may be ignored if the device doesn't support random access), + and returns the number of bytes written in AMOUNT. If no error occurs, + zero is returned, otherwise the error code is returned. */ +error_t open_write (struct open *open, off_t offs, char *buf, size_t len, + size_t *amount); + +/* Reads up to AMOUNT bytes from the device into BUF and BUF_LEN using the + standard mach out-array convention. If no error occurs, zero is returned, + otherwise the error code is returned. */ +error_t open_read (struct open *open, off_t offs, size_t amount, + char **buf, size_t *buf_len); + +/* Set OPEN's location to OFFS, interpreted according to WHENCE as by seek. + The new absolute location is returned in NEW_OFFS (and may not be the same + as OFFS). If no error occurs, zero is returned, otherwise the error code + is returned. */ +error_t open_seek (struct open *open, off_t offs, int whence, off_t *new_offs); + +#endif /* !__OPEN_H__ */ diff --git a/storeio/pager.c b/storeio/pager.c new file mode 100644 index 00000000..e723f602 --- /dev/null +++ b/storeio/pager.c @@ -0,0 +1,264 @@ +/* Paging interface for storeio devices + + Copyright (C) 1995, 1996 Free Software Foundation, Inc. + + Written by Miles Bader <miles@gnu.ai.mit.edu> + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2, or (at + your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include <hurd.h> +#include <hurd/pager.h> +#include <assert.h> +#include <strings.h> +#include <unistd.h> + +#include "dev.h" + +/* ---------------------------------------------------------------- */ +/* Pager library callbacks; see <hurd/pager.h> for more info. */ + +/* For pager PAGER, read one page from offset PAGE. Set *BUF to be the + address of the page, and set *WRITE_LOCK if the page must be provided + read-only. The only permissable error returns are EIO, EDQUOT, and + ENOSPC. */ +error_t +pager_read_page (struct user_pager_info *upi, + vm_offset_t page, vm_address_t *buf, int *writelock) +{ + error_t err; + int read; /* bytes actually read */ + int want = vm_page_size; /* bytes we want to read */ + struct dev *dev = (struct dev *)upi; + struct store *store = dev->store; + + if (page + want > store->size) + /* Read a partial page if necessary to avoid reading off the end. */ + want = store->size - page; + + err = dev_read (dev, page, want, (char **)buf, &read); + + if (!err && want < vm_page_size) + /* Zero anything we didn't read. Allocation only happens in page-size + multiples, so we know we can write there. */ + bzero ((char *)*buf + want, vm_page_size - want); + + *writelock = (store->flags & STORE_READONLY); + + if (err || read < want) + return EIO; + else + return 0; +} + +/* For pager PAGER, synchronously write one page from BUF to offset PAGE. In + addition, vm_deallocate (or equivalent) BUF. The only permissable error + returns are EIO, EDQUOT, and ENOSPC. */ +error_t +pager_write_page (struct user_pager_info *upi, + vm_offset_t page, vm_address_t buf) +{ + struct dev *dev = (struct dev *)upi; + struct store *store = dev->store; + + if (store->flags & STORE_READONLY) + return EROFS; + else + { + error_t err; + int written; + int want = vm_page_size; + + if (page + want > store->size) + /* Write a partial page if necessary to avoid reading off the end. */ + want = store->size - page; + + err = dev_write (dev, page, (char *)buf, want, &written); + + vm_deallocate (mach_task_self (), buf, vm_page_size); + + if (err || written < want) + return EIO; + else + return 0; + } +} + +/* A page should be made writable. */ +error_t +pager_unlock_page (struct user_pager_info *upi, vm_offset_t address) +{ + struct dev *dev = (struct dev *)upi; + + if (dev->store->flags & STORE_READONLY) + return EROFS; + else + return 0; +} + +/* The user must define this function. It should report back (in + *OFFSET and *SIZE the minimum valid address the pager will accept + and the size of the object. */ +error_t +pager_report_extent (struct user_pager_info *upi, + vm_address_t *offset, vm_size_t *size) +{ + *offset = 0; + *size = ((struct dev *)upi)->store->size; + return 0; +} + +/* This is called when a pager is being deallocated after all extant send + rights have been destroyed. */ +void +pager_clear_user_data (struct user_pager_info *upi) +{ +} + +static struct port_bucket *pager_port_bucket = 0; + +/* A top-level function for the paging thread that just services paging + requests. */ +static void +service_paging_requests (any_t arg) +{ + for (;;) + ports_manage_port_operations_multithread (pager_port_bucket, + pager_demuxer, + 1000 * 30, 1000 * 60 * 5, + 1, MACH_PORT_NULL); +} + +/* Initialize paging for this device. */ +static void +init_dev_paging () +{ + if (! pager_port_bucket) + { + static struct mutex pager_global_lock = MUTEX_INITIALIZER; + + mutex_lock (&pager_global_lock); + if (pager_port_bucket == NULL) + { + pager_port_bucket = ports_create_bucket (); + + /* Make a thread to service paging requests. */ + cthread_detach (cthread_fork ((cthread_fn_t)service_paging_requests, + (any_t)0)); + } + mutex_unlock (&pager_global_lock); + } +} + +void +pager_dropweak (struct user_pager_info *upi __attribute__ ((unused))) +{ +} + +/* Try to stop all paging activity on DEV, returning true if we were + successful. If NOSYNC is true, then we won't write back any (kernel) + cached pages to the device. */ +int +dev_stop_paging (struct dev *dev, int nosync) +{ + int success = 1; /* Initially assume success. */ + + mutex_lock (&dev->pager_lock); + + if (dev->pager != NULL) + { + int num_pagers = ports_count_bucket (pager_port_bucket); + if (num_pagers > 0 && !nosync) + { + error_t block_cache (void *arg) + { + struct pager *p = arg; + pager_change_attributes (p, 0, MEMORY_OBJECT_COPY_DELAY, 1); + return 0; + } + error_t enable_cache (void *arg) + { + struct pager *p = arg; + pager_change_attributes (p, 1, MEMORY_OBJECT_COPY_DELAY, 0); + return 0; + } + + /* Loop through the pagers and turn off caching one by one, + synchronously. That should cause termination of each pager. */ + ports_bucket_iterate (pager_port_bucket, block_cache); + + /* Give it a second; the kernel doesn't actually shutdown + immediately. XXX */ + sleep (1); + + num_pagers = ports_count_bucket (pager_port_bucket); + if (num_pagers > 0) + /* Darn, there are actual honest users. Turn caching back on, + and return failure. */ + { + ports_bucket_iterate (pager_port_bucket, enable_cache); + success = 0; + } + } + + if (success && !nosync) + /* shutdown the pager on DEV. If NOSYNC is set, we don't bother, for + fear that this may result in I/O. In this case we've disabled + rpcs on the pager's ports, so this will result in hanging... What + do we do??? XXXX */ + pager_shutdown (dev->pager); + } + + if (success) + dev->pager = NULL; + + mutex_unlock (&dev->pager_lock); + + return success; +} + +/* Returns in MEMOBJ the port for a memory object backed by the storage on + DEV. Returns 0 or the error code if an error occurred. */ +error_t +dev_get_memory_object (struct dev *dev, memory_object_t *memobj) +{ + error_t err = 0; + + init_dev_paging (); + + mutex_lock (&dev->pager_lock); + + if (dev->pager == NULL) + dev->pager = + pager_create ((struct user_pager_info *)dev, pager_port_bucket, + 1, MEMORY_OBJECT_COPY_DELAY); + else + ports_port_ref (dev->pager); + + if (dev->pager == NULL) + err = ENODEV; /* XXX ??? */ + else + { + *memobj = pager_get_port (dev->pager); + if (*memobj != MACH_PORT_NULL) + err = + mach_port_insert_right (mach_task_self (), + *memobj, *memobj, MACH_MSG_TYPE_MAKE_SEND); + ports_port_deref (dev->pager); /* Drop our original ref on PAGER. */ + } + + mutex_unlock (&dev->pager_lock); + + return err; +} diff --git a/storeio/storeio.c b/storeio/storeio.c new file mode 100644 index 00000000..8f189bf8 --- /dev/null +++ b/storeio/storeio.c @@ -0,0 +1,328 @@ +/* A translator for doing I/O to stores + + Copyright (C) 1995, 1996 Free Software Foundation, Inc. + + Written by Miles Bader <miles@gnu.ai.mit.edu> + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2, or (at + your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include <hurd.h> +#include <hurd/ports.h> +#include <hurd/pager.h> +#include <hurd/trivfs.h> +#include <hurd/fsys.h> + +#include <stdio.h> +#include <error.h> +#include <assert.h> +#include <fcntl.h> +#include <argp.h> + +#include "open.h" +#include "dev.h" + +/* The port class of our file system control pointer. */ +struct port_class *fsys_port_class; +/* The port class of the (only) root file port for the opened device. */ +struct port_class *root_port_class; + +/* A bucket to put all our ports in. */ +struct port_bucket *port_bucket; + +/* Trivfs noise. */ +struct port_class *trivfs_protid_portclasses[1]; +struct port_class *trivfs_cntl_portclasses[1]; +int trivfs_protid_nportclasses = 1; +int trivfs_cntl_nportclasses = 1; + +static struct argp_option options[] = +{ + {"readonly", 'r', 0, 0, "Disallow writing"}, + {"writable", 'w', 0, 0, "Allow writing"}, + {"rdev", 'n', "ID", 0, + "The stat rdev number for this node; may be either a" + " single integer, or of the form MAJOR,MINOR"}, + {0, 0, 0, 0} +}; +static char *args_doc = 0; +static char *doc = "Translator for devices and other stores"; + +/* ---------------------------------------------------------------- */ + +/* The open store. */ +static struct dev *device = NULL; +/* And a lock to arbitrate changes to it. */ +static struct mutex device_lock; + +/* Desired store parameters specified by the user. */ +struct store_parsed *store_name; +static int readonly = 0; + +/* A unixy device number to return when the device is stat'd. */ +static int rdev = 0; + +void main (int argc, char *argv[]) +{ + error_t err; + mach_port_t bootstrap; + error_t parse_opt (int key, char *arg, struct argp_state *state) + { + switch (key) + { + case 'r': readonly = 1; break; + case 'w': readonly = 0; break; + case 'n': + { + char *start = arg, *end; + + rdev = strtoul (start, &end, 0); + if (*end == ',') + /* MAJOR,MINOR form */ + { + start = end; + rdev = (rdev << 8) + strtoul (start, &end, 0); + } + + if (end == start || *end != '\0') + { + argp_error (state, "%s: Invalid argument to --rdev", arg); + return EINVAL; + } + } + break; + case ARGP_KEY_INIT: + state->child_inputs[0] = &store_name; break; + default: + return ARGP_ERR_UNKNOWN; + } + return 0; + } + const struct argp *kids[] = { &store_argp, 0 }; + const struct argp argp = { options, parse_opt, args_doc, doc, kids }; + + argp_parse (&argp, argc, argv, 0, 0, 0); + + if (readonly) + /* Catch illegal writes at the point of open. */ + trivfs_allow_open &= ~O_WRITE; + + task_get_bootstrap_port (mach_task_self (), &bootstrap); + if (bootstrap == MACH_PORT_NULL) + error (2, 0, "Must be started as a translator"); + + fsys_port_class = ports_create_class (trivfs_clean_cntl, 0); + root_port_class = ports_create_class (trivfs_clean_protid, 0); + port_bucket = ports_create_bucket (); + trivfs_protid_portclasses[0] = root_port_class; + trivfs_cntl_portclasses[0] = fsys_port_class; + + /* Reply to our parent */ + err = + trivfs_startup (bootstrap, 0, + fsys_port_class, port_bucket, + root_port_class, port_bucket, + NULL); + if (err) + error (3, err, "trivfs_startup"); + + /* Open the device only when necessary. */ + device = NULL; + mutex_init (&device_lock); + + /* Launch. */ + ports_manage_port_operations_multithread (port_bucket, trivfs_demuxer, + 30*1000, 5*60*1000, 0, 0); + + exit (0); +} + +/* Called whenever someone tries to open our node (even for a stat). We + delay opening the kernel device until this point, as we can usefully + return errors from here. */ +static error_t +check_open_hook (struct trivfs_control *trivfs_control, + uid_t *uids, u_int nuids, + gid_t *gids, u_int ngids, + int flags) +{ + error_t err = 0; + + mutex_lock (&device_lock); + if (device == NULL) + /* Try and open the device. */ + { + err = dev_open (store_name, readonly ? STORE_READONLY : 0, &device); + if (err) + device = NULL; + if (err && (flags & (O_READ|O_WRITE)) == 0) + /* If we're not opening for read or write, then just ignore the + error, as this allows stat to word correctly. XXX */ + err = 0; + } + mutex_unlock (&device_lock); + + return err; +} + +static error_t +open_hook (struct trivfs_peropen *peropen) +{ + struct dev *dev = device; + if (dev) + return open_create (dev, (struct open **)&peropen->hook); + else + return 0; +} + +static void +close_hook (struct trivfs_peropen *peropen) +{ + if (peropen->hook) + open_free (peropen->hook); +} + +/* ---------------------------------------------------------------- */ +/* Trivfs hooks */ + +int trivfs_fstype = FSTYPE_DEV; +int trivfs_fsid = 0; + +int trivfs_support_read = 1; +int trivfs_support_write = 1; +int trivfs_support_exec = 0; + +int trivfs_allow_open = O_READ | O_WRITE; + +void +trivfs_modify_stat (struct trivfs_protid *cred, struct stat *st) +{ + struct open *open = cred->po->hook; + + if (open) + /* An open device. */ + { + struct store *store = open->dev->store; + vm_size_t size = store->size; + + if (store->block_size > 1) + st->st_blksize = store->block_size; + + st->st_size = size; + st->st_blocks = size / 512; + } + else + /* Try and do things without an open device... */ + { + st->st_blksize = 0; + st->st_size = 0; + st->st_blocks = 0; + } + + st->st_mode &= ~S_IFMT; + st->st_mode |= S_IFCHR; + st->st_rdev = rdev; + if (readonly) + st->st_mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH); +} + +error_t +trivfs_goaway (struct trivfs_control *fsys, int flags) +{ + error_t err; + int force = (flags & FSYS_GOAWAY_FORCE); + int nosync = (flags & FSYS_GOAWAY_NOSYNC); + + mutex_lock (&device_lock); + + if (device == NULL) + exit (0); + + /* Wait until all pending rpcs are done. */ + err = ports_inhibit_class_rpcs (root_port_class); + if (err == EINTR || (err && !force)) + { + mutex_unlock (&device_lock); + return err; + } + + if (force && nosync) + /* Exit with extreme prejudice. */ + exit (0); + + if (!force && ports_count_class (root_port_class) > 0) + /* Still users, so don't exit. */ + goto busy; + + if (! nosync) + /* Sync the device here, if necessary, so that closing it won't result in + any I/O (which could get hung up trying to use one of our pagers). */ + dev_sync (device, 1); + + /* devpager_shutdown may sync the pagers as side-effect (if NOSYNC is 0), + so we put that first in this test. */ + if (dev_stop_paging (device, nosync) || force) + /* Bye-bye. */ + { + if (! nosync) + /* If NOSYNC is true, we don't close DEV, as that could cause data to + be written back. */ + dev_close (device); + exit (0); + } + + busy: + /* Allow normal operations to proceed. */ + ports_enable_class (root_port_class); + ports_resume_class_rpcs (root_port_class); + mutex_unlock (&device_lock); + + /* Complain that there are still users. */ + return EBUSY; +} + +/* If this variable is set, it is called every time an open happens. + UIDS, GIDS, and FLAGS are from the open; CNTL identifies the + node being opened. This call need not check permissions on the underlying + node. If the open call should block, then return EWOULDBLOCK. Other + errors are immediately reflected to the user. If O_NONBLOCK + is not set in FLAGS and EWOULDBLOCK is returned, then call + trivfs_complete_open when all pending open requests for this + file can complete. */ +error_t (*trivfs_check_open_hook)(struct trivfs_control *trivfs_control, + uid_t *uids, u_int nuids, + gid_t *gids, u_int ngids, + int flags) + = check_open_hook; + +/* If this variable is set, it is called every time a new peropen + structure is created and initialized. */ +error_t (*trivfs_peropen_create_hook)(struct trivfs_peropen *) = open_hook; + +/* If this variable is set, it is called every time a peropen structure + is about to be destroyed. */ +void (*trivfs_peropen_destroy_hook) (struct trivfs_peropen *) = close_hook; + +/* Sync this filesystem. */ +kern_return_t +trivfs_S_fsys_syncfs (struct trivfs_control *cntl, + mach_port_t reply, mach_msg_type_name_t replytype, + int wait, int dochildren) +{ + struct dev *dev = device; + if (dev) + return dev_sync (dev, wait); + else + return 0; +} |