diff options
Diffstat (limited to 'linux')
-rw-r--r-- | linux/dev/glue/block.c | 1691 | ||||
-rw-r--r-- | linux/dev/glue/kmem.c | 581 | ||||
-rw-r--r-- | linux/dev/glue/misc.c | 346 | ||||
-rw-r--r-- | linux/dev/glue/net.c | 530 |
4 files changed, 3148 insertions, 0 deletions
diff --git a/linux/dev/glue/block.c b/linux/dev/glue/block.c new file mode 100644 index 0000000..9d6bd6d --- /dev/null +++ b/linux/dev/glue/block.c @@ -0,0 +1,1691 @@ +/* + * Linux block driver support. + * + * Copyright (C) 1996 The University of Utah and the Computer Systems + * Laboratory at the University of Utah (CSL) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Author: Shantanu Goel, University of Utah CSL + */ + +/* + * linux/drivers/block/ll_rw_blk.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright (C) 1994, Karl Keyte: Added support for disk statistics + */ + +/* + * linux/fs/block_dev.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +/* + * linux/fs/buffer.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#include <sys/types.h> +#include <machine/spl.h> +#include <mach/mach_types.h> +#include <mach/kern_return.h> +#include <mach/mig_errors.h> +#include <mach/port.h> +#include <mach/vm_param.h> +#include <mach/notify.h> + +#include <ipc/ipc_port.h> +#include <ipc/ipc_space.h> + +#include <vm/vm_map.h> +#include <vm/vm_kern.h> +#include <vm/vm_page.h> + +#include <device/device_types.h> +#include <device/device_port.h> +#include <device/disk_status.h> +#include "device_reply.h" + +#include <linux_emul.h> + +#define MACH_INCLUDE +#include <linux/fs.h> +#include <linux/blk.h> +#include <linux/string.h> +#include <linux/errno.h> +#include <linux/fcntl.h> +#include <linux/major.h> +#include <linux/kdev_t.h> +#include <linux/delay.h> +#include <linux/malloc.h> +#include <linux/hdreg.h> +#include <asm/io.h> + +extern int linux_auto_config; +extern int linux_intr_pri; +extern int linux_to_mach_error (int); + +/* This task queue is not used in Mach: just for fixing undefined symbols. */ +DECLARE_TASK_QUEUE (tq_disk); + +/* Location of VTOC in units for sectors (512 bytes). */ +#define PDLOCATION 29 + +/* Linux kernel variables. */ + +/* Temporary data allocated on the stack. */ +struct temp_data +{ + struct inode inode; + struct file file; + struct request req; + queue_head_t pages; +}; + +/* One of these exists for each + driver associated with a major number. */ +struct device_struct +{ + const char *name; /* device name */ + struct file_operations *fops; /* operations vector */ + int busy:1; /* driver is being opened/closed */ + int want:1; /* someone wants to open/close driver */ + struct gendisk *gd; /* DOS partition information */ + int default_slice; /* what slice to use when none is given */ + struct disklabel **labels; /* disklabels for each DOS partition */ +}; + +/* An entry in the Mach name to Linux major number conversion table. */ +struct name_map +{ + const char *name; /* Mach name for device */ + unsigned major; /* Linux major number */ + unsigned unit; /* Linux unit number */ + int read_only; /* 1 if device is read only */ +}; + +/* Driver operation table. */ +static struct device_struct blkdevs[MAX_BLKDEV]; + +/* Driver request function table. */ +struct blk_dev_struct blk_dev[MAX_BLKDEV] = +{ + { NULL, NULL }, /* 0 no_dev */ + { NULL, NULL }, /* 1 dev mem */ + { NULL, NULL }, /* 2 dev fd */ + { NULL, NULL }, /* 3 dev ide0 or hd */ + { NULL, NULL }, /* 4 dev ttyx */ + { NULL, NULL }, /* 5 dev tty */ + { NULL, NULL }, /* 6 dev lp */ + { NULL, NULL }, /* 7 dev pipes */ + { NULL, NULL }, /* 8 dev sd */ + { NULL, NULL }, /* 9 dev st */ + { NULL, NULL }, /* 10 */ + { NULL, NULL }, /* 11 */ + { NULL, NULL }, /* 12 */ + { NULL, NULL }, /* 13 */ + { NULL, NULL }, /* 14 */ + { NULL, NULL }, /* 15 */ + { NULL, NULL }, /* 16 */ + { NULL, NULL }, /* 17 */ + { NULL, NULL }, /* 18 */ + { NULL, NULL }, /* 19 */ + { NULL, NULL }, /* 20 */ + { NULL, NULL }, /* 21 */ + { NULL, NULL } /* 22 dev ide1 */ +}; + +/* + * blk_size contains the size of all block-devices in units of 1024 byte + * sectors: + * + * blk_size[MAJOR][MINOR] + * + * if (!blk_size[MAJOR]) then no minor size checking is done. + */ +int *blk_size[MAX_BLKDEV] = { NULL, NULL, }; + +/* + * blksize_size contains the size of all block-devices: + * + * blksize_size[MAJOR][MINOR] + * + * if (!blksize_size[MAJOR]) then 1024 bytes is assumed. + */ +int *blksize_size[MAX_BLKDEV] = { NULL, NULL, }; + +/* + * hardsect_size contains the size of the hardware sector of a device. + * + * hardsect_size[MAJOR][MINOR] + * + * if (!hardsect_size[MAJOR]) + * then 512 bytes is assumed. + * else + * sector_size is hardsect_size[MAJOR][MINOR] + * This is currently set by some scsi device and read by the msdos fs driver + * This might be a some uses later. + */ +int *hardsect_size[MAX_BLKDEV] = { NULL, NULL, }; + +/* This specifies how many sectors to read ahead on the disk. + This is unused in Mach. It is here to make drivers compile. */ +int read_ahead[MAX_BLKDEV] = {0, }; + +/* Use to wait on when there are no free requests. + This is unused in Mach. It is here to make drivers compile. */ +struct wait_queue *wait_for_request = NULL; + +/* Map for allocating device memory. */ +extern vm_map_t device_io_map; + +/* Initialize block drivers. */ +int +blk_dev_init () +{ +#ifdef CONFIG_BLK_DEV_IDE + ide_init (); +#endif +#ifdef CONFIG_BLK_DEV_FD + floppy_init (); +#else + outb_p (0xc, 0x3f2); +#endif + return 0; +} + +/* Return 1 if major number MAJOR corresponds to a disk device. */ +static inline int +disk_major (int major) +{ + return (major == IDE0_MAJOR + || major == IDE1_MAJOR + || major == IDE2_MAJOR + || major == IDE3_MAJOR + || major == SCSI_DISK_MAJOR); +} + +/* Linux kernel block support routines. */ + +/* Register a driver for major number MAJOR, + with name NAME, and operations vector FOPS. */ +int +register_blkdev (unsigned major, const char *name, + struct file_operations *fops) +{ + int err = 0; + + if (major == 0) + { + for (major = MAX_BLKDEV - 1; major > 0; major--) + if (blkdevs[major].fops == NULL) + goto out; + return -LINUX_EBUSY; + } + if (major >= MAX_BLKDEV) + return -LINUX_EINVAL; + if (blkdevs[major].fops && blkdevs[major].fops != fops) + return -LINUX_EBUSY; + +out: + blkdevs[major].name = name; + blkdevs[major].fops = fops; + blkdevs[major].busy = 0; + blkdevs[major].want = 0; + blkdevs[major].gd = NULL; + blkdevs[major].default_slice = 0; + blkdevs[major].labels = NULL; + return 0; +} + +/* Unregister the driver associated with + major number MAJOR and having the name NAME. */ +int +unregister_blkdev (unsigned major, const char *name) +{ + int err; + + if (major >= MAX_BLKDEV) + return -LINUX_EINVAL; + if (! blkdevs[major].fops || strcmp (blkdevs[major].name, name)) + return -LINUX_EINVAL; + blkdevs[major].fops = NULL; + if (blkdevs[major].labels) + { + assert (blkdevs[major].gd); + kfree ((vm_offset_t) blkdevs[major].labels, + (sizeof (struct disklabel *) + * blkdevs[major].gd->max_p * blkdevs[major].gd->max_nr)); + } + return 0; +} + +void +set_blocksize (kdev_t dev, int size) +{ + extern int *blksize_size[]; + + if (! blksize_size[MAJOR (dev)]) + return; + + switch (size) + { + case 512: + case 1024: + case 2048: + case 4096: + break; + default: + panic ("Invalid blocksize passed to set_blocksize"); + break; + } + blksize_size[MAJOR (dev)][MINOR (dev)] = size; +} + +/* Allocate a buffer SIZE bytes long. */ +static void * +alloc_buffer (int size) +{ + vm_page_t m; + struct temp_data *d; + + assert (size <= PAGE_SIZE); + + if (! linux_auto_config) + { + while ((m = vm_page_grab ()) == 0) + VM_PAGE_WAIT (0); + d = current_thread ()->pcb->data; + assert (d); + queue_enter (&d->pages, m, vm_page_t, pageq); + return (void *) m->phys_addr; + } + return (void *) __get_free_pages (GFP_KERNEL, 0, ~0UL); +} + +/* Free buffer P which is SIZE bytes long. */ +static void +free_buffer (void *p, int size) +{ + int i; + struct temp_data *d; + vm_page_t m; + + assert (size <= PAGE_SIZE); + + if (! linux_auto_config) + { + d = current_thread ()->pcb->data; + assert (d); + queue_iterate (&d->pages, m, vm_page_t, pageq) + { + if (m->phys_addr == (vm_offset_t) p) + { + queue_remove (&d->pages, m, vm_page_t, pageq); + vm_page_lock_queues (); + vm_page_free (m); + vm_page_lock_queues (); + return; + } + } + panic ("free_buffer"); + } + free_pages ((unsigned long) p, 0); +} + +/* Allocate a buffer of SIZE bytes and + associate it with block number BLOCK of device DEV. */ +struct buffer_head * +getblk (kdev_t dev, int block, int size) +{ + struct buffer_head *bh; + static struct buffer_head bhead; + + assert (size <= PAGE_SIZE); + + if (! linux_auto_config) + bh = (struct buffer_head *) kalloc (sizeof (struct buffer_head)); + else + bh = &bhead; + if (bh) + { + memset (bh, 0, sizeof (struct buffer_head)); + bh->b_data = alloc_buffer (size); + if (! bh->b_data) + { + if (! linux_auto_config) + kfree ((vm_offset_t) bh, sizeof (struct buffer_head)); + return NULL; + } + bh->b_dev = dev; + bh->b_size = size; + bh->b_state = 1 << BH_Lock; + bh->b_blocknr = block; + } + return bh; +} + +/* Release buffer BH previously allocated by getblk. */ +void +__brelse (struct buffer_head *bh) +{ + free_buffer (bh->b_data, bh->b_size); + if (! linux_auto_config) + kfree ((vm_offset_t) bh, sizeof (*bh)); +} + +/* Allocate a buffer of SIZE bytes and fill it with data + from device DEV starting at block number BLOCK. */ +struct buffer_head * +bread (kdev_t dev, int block, int size) +{ + int err; + struct buffer_head *bh; + + bh = getblk (dev, block, size); + if (bh) + { + ll_rw_block (READ, 1, &bh); + wait_on_buffer (bh); + if (! buffer_uptodate (bh)) + { + __brelse (bh); + return NULL; + } + } + return bh; +} + +/* Return the block size for device DEV in *BSIZE and + log2(block size) in *BSHIFT. */ +static void +get_block_size (kdev_t dev, int *bsize, int *bshift) +{ + int i; + + *bsize = BLOCK_SIZE; + if (blksize_size[MAJOR (dev)] + && blksize_size[MAJOR (dev)][MINOR (dev)]) + *bsize = blksize_size[MAJOR (dev)][MINOR (dev)]; + for (i = *bsize, *bshift = 0; i != 1; i >>= 1, (*bshift)++) + ; +} + +/* Enqueue request REQ on a driver's queue. */ +static inline void +enqueue_request (struct request *req) +{ + struct request *tmp; + struct blk_dev_struct *dev; + + dev = blk_dev + MAJOR (req->rq_dev); + cli (); + tmp = dev->current_request; + if (! tmp) + { + dev->current_request = req; + (*dev->request_fn) (); + sti (); + return; + } + while (tmp->next) + { + if ((IN_ORDER (tmp, req) || ! IN_ORDER (tmp, tmp->next)) + && IN_ORDER (req, tmp->next)) + break; + tmp = tmp->next; + } + req->next = tmp->next; + tmp->next = req; + if (scsi_blk_major (MAJOR (req->rq_dev))) + (*dev->request_fn) (); + sti (); +} + +/* Perform the I/O operation RW on the buffer list BH + containing NR buffers. */ +void +ll_rw_block (int rw, int nr, struct buffer_head **bh) +{ + int i, bshift, bsize; + unsigned major; + struct request *r; + static struct request req; + + major = MAJOR (bh[0]->b_dev); + assert (major < MAX_BLKDEV); + + get_block_size (bh[0]->b_dev, &bsize, &bshift); + + if (! linux_auto_config) + { + assert (current_thread ()->pcb->data); + r = &((struct temp_data *) current_thread ()->pcb->data)->req; + } + else + r = &req; + + for (i = 0, r->nr_sectors = 0; i < nr - 1; i++) + { + r->nr_sectors += bh[i]->b_size >> 9; + bh[i]->b_reqnext = bh[i + 1]; + } + r->nr_sectors += bh[i]->b_size >> 9; + bh[i]->b_reqnext = NULL; + + r->rq_status = RQ_ACTIVE; + r->rq_dev = bh[0]->b_dev; + r->cmd = rw; + r->errors = 0; + r->sector = bh[0]->b_blocknr << (bshift - 9); + r->current_nr_sectors = bh[0]->b_size >> 9; + r->buffer = bh[0]->b_data; + r->bh = bh[0]; + r->bhtail = bh[nr - 1]; + r->sem = NULL; + r->next = NULL; + + enqueue_request (r); +} + +#define BSIZE (1 << bshift) +#define BMASK (BSIZE - 1) + +/* Perform read/write operation RW on device DEV + starting at *off to/from buffer *BUF of size *RESID. + The device block size is given by BSHIFT. *OFF and + *RESID may be non-multiples of the block size. + *OFF, *BUF and *RESID are updated if the operation + completed successfully. */ +static int +rdwr_partial (int rw, kdev_t dev, loff_t *off, + char **buf, int *resid, int bshift) +{ + int c, err = 0, o; + long sect, nsect; + struct buffer_head bhead, *bh = &bhead; + struct gendisk *gd; + + memset (bh, 0, sizeof (struct buffer_head)); + bh->b_state = 1 << BH_Lock; + bh->b_dev = dev; + bh->b_blocknr = *off >> bshift; + bh->b_size = BSIZE; + + /* Check if this device has non even number of blocks. */ + for (gd = gendisk_head, nsect = -1; gd; gd = gd->next) + if (gd->major == MAJOR (dev)) + { + nsect = gd->part[MINOR (dev)].nr_sects; + break; + } + if (nsect > 0) + { + sect = bh->b_blocknr << (bshift - 9); + assert ((nsect - sect) > 0); + if (nsect - sect < (BSIZE >> 9)) + bh->b_size = (nsect - sect) << 9; + } + bh->b_data = alloc_buffer (bh->b_size); + if (! bh->b_data) + return -LINUX_ENOMEM; + ll_rw_block (READ, 1, &bh); + wait_on_buffer (bh); + if (buffer_uptodate (bh)) + { + o = *off & BMASK; + c = bh->b_size - o; + assert (*resid <= c); + if (c > *resid) + c = *resid; + if (rw == READ) + memcpy (*buf, bh->b_data + o, c); + else + { + memcpy (bh->b_data + o, *buf, c); + bh->b_state = (1 << BH_Dirty) | (1 << BH_Lock); + ll_rw_block (WRITE, 1, &bh); + wait_on_buffer (bh); + if (! buffer_uptodate (bh)) + { + err = -LINUX_EIO; + goto out; + } + } + *buf += c; + *resid -= c; + *off += c; + } + else + err = -LINUX_EIO; +out: + free_buffer (bh->b_data, bh->b_size); + return err; +} + +#define BH_Bounce 16 +#define MAX_BUF VM_MAP_COPY_PAGE_LIST_MAX + +/* Perform read/write operation RW on device DEV + starting at *off to/from buffer *BUF of size *RESID. + The device block size is given by BSHIFT. *OFF and + *RESID must be multiples of the block size. + *OFF, *BUF and *RESID are updated if the operation + completed successfully. */ +static int +rdwr_full (int rw, kdev_t dev, loff_t *off, char **buf, int *resid, int bshift) +{ + int cc, err = 0, i, j, nb, nbuf; + long blk; + struct buffer_head bhead[MAX_BUF], *bh, *bhp[MAX_BUF]; + + assert ((*off & BMASK) == 0); + assert (*resid >= bsize); + + nbuf = *resid >> bshift; + blk = *off >> bshift; + for (i = nb = 0, bh = bhead; nb < nbuf; bh++) + { + memset (bh, 0, sizeof (*bh)); + bh->b_dev = dev; + bh->b_blocknr = blk; + set_bit (BH_Lock, &bh->b_state); + if (rw == WRITE) + set_bit (BH_Dirty, &bh->b_state); + cc = PAGE_SIZE - (((int) *buf) & PAGE_MASK); + if (cc >= BSIZE && ((int) *buf & 511) == 0) + cc &= ~BMASK; + else + { + cc = PAGE_SIZE; + set_bit (BH_Bounce, &bh->b_state); + } + if (cc > ((nbuf - nb) << bshift)) + cc = (nbuf - nb) << bshift; + if (! test_bit (BH_Bounce, &bh->b_state)) + bh->b_data = (char *) pmap_extract (vm_map_pmap (device_io_map), + (((vm_offset_t) *buf) + + (nb << bshift))); + else + { + bh->b_data = alloc_buffer (cc); + if (! bh->b_data) + { + err = -LINUX_ENOMEM; + break; + } + if (rw == WRITE) + memcpy (bh->b_data, *buf + (nb << bshift), cc); + } + bh->b_size = cc; + bhp[i] = bh; + nb += cc >> bshift; + blk += nb; + if (++i == MAX_BUF) + break; + } + if (! err) + { + ll_rw_block (rw, i, bhp); + wait_on_buffer (bhp[i - 1]); + } + for (bh = bhead, cc = 0, j = 0; j < i; cc += bh->b_size, bh++, j++) + { + if (! err && buffer_uptodate (bh) + && rw == READ && test_bit (BH_Bounce, &bh->b_state)) + memcpy (*buf + cc, bh->b_data, bh->b_size); + else if (! err && ! buffer_uptodate (bh)) + err = -LINUX_EIO; + if (test_bit (BH_Bounce, &bh->b_state)) + free_buffer (bh->b_data, bh->b_size); + } + if (! err) + { + *buf += cc; + *resid -= cc; + *off += cc; + } + return err; +} + +/* Perform read/write operation RW on device DEV + starting at *off to/from buffer BUF of size COUNT. + *OFF is updated if the operation completed successfully. */ +static int +do_rdwr (int rw, kdev_t dev, loff_t *off, char *buf, int count) +{ + int bsize, bshift, err = 0, resid = count; + + get_block_size (dev, &bsize, &bshift); + if (*off & BMASK) + err = rdwr_partial (rw, dev, off, &buf, &resid, bshift); + while (resid >= bsize && ! err) + err = rdwr_full (rw, dev, off, &buf, &resid, bshift); + if (! err && resid) + err = rdwr_partial (rw, dev, off, &buf, &resid, bshift); + return err ? err : count - resid; +} + +int +block_write (struct inode *inode, struct file *filp, + const char *buf, int count) +{ + return do_rdwr (WRITE, inode->i_rdev, &filp->f_pos, (char *) buf, count); +} + +int +block_read (struct inode *inode, struct file *filp, char *buf, int count) +{ + return do_rdwr (READ, inode->i_rdev, &filp->f_pos, buf, count); +} + +/* + * This routine checks whether a removable media has been changed, + * and invalidates all buffer-cache-entries in that case. This + * is a relatively slow routine, so we have to try to minimize using + * it. Thus it is called only upon a 'mount' or 'open'. This + * is the best way of combining speed and utility, I think. + * People changing diskettes in the middle of an operation deserve + * to loose :-) + */ +int +check_disk_change (kdev_t dev) +{ + unsigned i; + struct file_operations * fops; + + i = MAJOR(dev); + if (i >= MAX_BLKDEV || (fops = blkdevs[i].fops) == NULL) + return 0; + if (fops->check_media_change == NULL) + return 0; + if (! (*fops->check_media_change) (dev)) + return 0; + + /* printf ("Disk change detected on device %s\n", kdevname(dev));*/ + + if (fops->revalidate) + (*fops->revalidate) (dev); + + return 1; +} + +/* Mach device interface routines. */ + +/* Mach name to Linux major/minor number mapping table. */ +static struct name_map name_to_major[] = +{ + /* IDE disks */ + { "hd0", IDE0_MAJOR, 0, 0 }, + { "hd1", IDE0_MAJOR, 1, 0 }, + { "hd2", IDE1_MAJOR, 0, 0 }, + { "hd3", IDE1_MAJOR, 1, 0 }, + { "hd4", IDE2_MAJOR, 0, 0 }, + { "hd5", IDE2_MAJOR, 1, 0 }, + { "hd6", IDE3_MAJOR, 0, 0 }, + { "hd7", IDE3_MAJOR, 1, 0 }, + + /* IDE CDROMs */ + { "wcd0", IDE0_MAJOR, 0, 1 }, + { "wcd1", IDE0_MAJOR, 1, 1 }, + { "wcd2", IDE1_MAJOR, 0, 1 }, + { "wcd3", IDE1_MAJOR, 1, 1 }, + { "wcd4", IDE2_MAJOR, 0, 1 }, + { "wcd5", IDE2_MAJOR, 1, 1 }, + { "wcd6", IDE3_MAJOR, 0, 1 }, + { "wcd7", IDE3_MAJOR, 1, 1 }, + + /* SCSI disks */ + { "sd0", SCSI_DISK_MAJOR, 0, 0 }, + { "sd1", SCSI_DISK_MAJOR, 1, 0 }, + { "sd2", SCSI_DISK_MAJOR, 2, 0 }, + { "sd3", SCSI_DISK_MAJOR, 3, 0 }, + { "sd4", SCSI_DISK_MAJOR, 4, 0 }, + { "sd5", SCSI_DISK_MAJOR, 5, 0 }, + { "sd6", SCSI_DISK_MAJOR, 6, 0 }, + { "sd7", SCSI_DISK_MAJOR, 7, 0 }, + + /* SCSI CDROMs */ + { "cd0", SCSI_CDROM_MAJOR, 0, 1 }, + { "cd1", SCSI_CDROM_MAJOR, 1, 1 }, + + /* Floppy disks */ + { "fd0", FLOPPY_MAJOR, 0, 0 }, + { "fd1", FLOPPY_MAJOR, 1, 0 }, +}; + +#define NUM_NAMES (sizeof (name_to_major) / sizeof (name_to_major[0])) + +/* One of these is associated with each open instance of a device. */ +struct block_data +{ + const char *name; /* Mach name for device */ + int want:1; /* someone is waiting for I/O to complete */ + int open_count; /* number of opens */ + int iocount; /* number of pending I/O operations */ + int part; /* BSD partition number (-1 if none) */ + int flags; /* Linux file flags */ + int mode; /* Linux file mode */ + kdev_t dev; /* Linux device number */ + ipc_port_t port; /* port representing device */ + struct device_struct *ds; /* driver operation table entry */ + struct device device; /* generic device header */ + struct name_map *np; /* name to inode map */ + struct block_data *next; /* forward link */ +}; + +/* List of open devices. */ +static struct block_data *open_list; + +/* Forward declarations. */ + +extern struct device_emulation_ops linux_block_emulation_ops; + +static io_return_t device_close (void *); + +/* Return a send right for block device BD. */ +static ipc_port_t +dev_to_port (void *bd) +{ + return (bd + ? ipc_port_make_send (((struct block_data *) bd)->port) + : IP_NULL); +} + +/* Return 1 if C is a letter of the alphabet. */ +static inline int +isalpha (int c) +{ + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); +} + +/* Return 1 if C is a digit. */ +static inline int +isdigit (int c) +{ + return c >= '0' && c <= '9'; +} + +/* Find the name map entry for device NAME. + Set *SLICE to be the DOS partition and + *PART the BSD/Mach partition, if any. */ +static struct name_map * +find_name (char *name, int *slice, int *part) +{ + char *p, *q; + int i, len; + struct name_map *np; + + /* Parse name into name, unit, DOS partition (slice) and partition. */ + for (*slice = 0, *part = -1, p = name; isalpha (*p); p++) + ; + if (p == name || ! isdigit (*p)) + return NULL; + do + p++; + while (isdigit (*p)); + if (*p) + { + q = p; + if (*q == 's' && isdigit (*(q + 1))) + { + q++; + do + *slice = *slice * 10 + *q++ - '0'; + while (isdigit (*q)); + if (! *q) + goto find_major; + } + if (! isalpha (*q) || *(q + 1)) + return NULL; + *part = *q - 'a'; + } + +find_major: + /* Convert name to major number. */ + for (i = 0, np = name_to_major; i < NUM_NAMES; i++, np++) + { + len = strlen (np->name); + if (len == (p - name) && ! strncmp (np->name, name, len)) + return np; + } + return NULL; +} + +/* Attempt to read a BSD disklabel from device DEV. */ +static struct disklabel * +read_bsd_label (kdev_t dev) +{ + int bsize, bshift; + struct buffer_head *bh; + struct disklabel *dlp, *lp = NULL; + + get_block_size (dev, &bsize, &bshift); + bh = bread (dev, LBLLOC >> (bshift - 9), bsize); + if (bh) + { + dlp = (struct disklabel *) (bh->b_data + ((LBLLOC << 9) & (bsize - 1))); + if (dlp->d_magic == DISKMAGIC && dlp->d_magic2 == DISKMAGIC) + { + lp = (struct disklabel *) kalloc (sizeof (*lp)); + assert (lp); + memcpy (lp, dlp, sizeof (*lp)); + } + __brelse (bh); + } + return lp; +} + +/* Attempt to read a VTOC from device DEV. */ +static struct disklabel * +read_vtoc (kdev_t dev) +{ + int bshift, bsize, i; + struct buffer_head *bh; + struct evtoc *evp; + struct disklabel *lp = NULL; + + get_block_size (dev, &bsize, &bshift); + bh = bread (dev, PDLOCATION >> (bshift - 9), bsize); + if (bh) + { + evp = (struct evtoc *) (bh->b_data + ((PDLOCATION << 9) & (bsize - 1))); + if (evp->sanity == VTOC_SANE) + { + lp = (struct disklabel *) kalloc (sizeof (*lp)); + assert (lp); + lp->d_npartitions = evp->nparts; + if (lp->d_npartitions > MAXPARTITIONS) + lp->d_npartitions = MAXPARTITIONS; + for (i = 0; i < lp->d_npartitions; i++) + { + lp->d_partitions[i].p_size = evp->part[i].p_size; + lp->d_partitions[i].p_offset = evp->part[i].p_start; + lp->d_partitions[i].p_fstype = FS_BSDFFS; + } + } + __brelse (bh); + } + return lp; +} + +/* Initialize BSD/Mach partition table for device + specified by NP, DS and *DEV. Check SLICE and *PART for validity. */ +static kern_return_t +init_partition (struct name_map *np, kdev_t *dev, + struct device_struct *ds, int slice, int *part) +{ + int err, i, j; + struct disklabel *lp; + struct gendisk *gd = ds->gd; + struct partition *p; + struct temp_data *d = current_thread ()->pcb->data; + + if (! gd) + { + *part = -1; + return 0; + } + if (ds->labels) + goto check; + ds->labels = (struct disklabel **) kalloc (sizeof (struct disklabel *) + * gd->max_nr * gd->max_p); + if (! ds->labels) + return D_NO_MEMORY; + memset ((void *) ds->labels, 0, + sizeof (struct disklabel *) * gd->max_nr * gd->max_p); + for (i = 1; i < gd->max_p; i++) + { + d->inode.i_rdev = *dev | i; + if (gd->part[MINOR (d->inode.i_rdev)].nr_sects <= 0 + || gd->part[MINOR (d->inode.i_rdev)].start_sect < 0) + continue; + linux_intr_pri = SPL5; + d->file.f_flags = 0; + d->file.f_mode = O_RDONLY; + if (ds->fops->open && (*ds->fops->open) (&d->inode, &d->file)) + continue; + lp = read_bsd_label (d->inode.i_rdev); + if (! lp) + lp = read_vtoc (d->inode.i_rdev); + if (ds->fops->release) + (*ds->fops->release) (&d->inode, &d->file); + if (lp) + { + if (ds->default_slice == 0) + ds->default_slice = i; + for (j = 0, p = lp->d_partitions; j < lp->d_npartitions; j++, p++) + { + if (p->p_offset < 0 || p->p_size <= 0) + continue; + + /* Sanity check. */ + if (p->p_size > gd->part[MINOR (d->inode.i_rdev)].nr_sects) + p->p_size = gd->part[MINOR (d->inode.i_rdev)].nr_sects; + } + } + ds->labels[MINOR (d->inode.i_rdev)] = lp; + } + +check: + if (*part >= 0 && slice == 0) + slice = ds->default_slice; + if (*part >= 0 && slice == 0) + return D_NO_SUCH_DEVICE; + *dev = MKDEV (MAJOR (*dev), MINOR (*dev) | slice); + if (slice >= gd->max_p + || gd->part[MINOR (*dev)].start_sect < 0 + || gd->part[MINOR (*dev)].nr_sects <= 0) + return D_NO_SUCH_DEVICE; + if (*part >= 0) + { + lp = ds->labels[MINOR (*dev)]; + if (! lp + || *part >= lp->d_npartitions + || lp->d_partitions[*part].p_offset < 0 + || lp->d_partitions[*part].p_size <= 0) + return D_NO_SUCH_DEVICE; + } + return 0; +} + +#define DECL_DATA struct temp_data td +#define INIT_DATA() \ +{ \ + queue_init (&td.pages); \ + td.inode.i_rdev = bd->dev; \ + td.file.f_mode = bd->mode; \ + td.file.f_flags = bd->flags; \ + current_thread ()->pcb->data = &td; \ +} + +static io_return_t +device_open (ipc_port_t reply_port, mach_msg_type_name_t reply_port_type, + dev_mode_t mode, char *name, device_t *devp) +{ + int part, slice, err; + unsigned major, minor; + kdev_t dev; + ipc_port_t notify; + struct block_data *bd = NULL, *bdp; + struct device_struct *ds; + struct gendisk *gd; + struct name_map *np; + DECL_DATA; + + np = find_name (name, &slice, &part); + if (! np) + return D_NO_SUCH_DEVICE; + major = np->major; + ds = &blkdevs[major]; + + /* Check that driver exists. */ + if (! ds->fops) + return D_NO_SUCH_DEVICE; + + /* Wait for any other open/close calls to finish. */ + ds = &blkdevs[major]; + while (ds->busy) + { + ds->want = 1; + assert_wait ((event_t) ds, FALSE); + schedule (); + } + ds->busy = 1; + + /* Compute minor number. */ + if (! ds->gd) + { + for (gd = gendisk_head; gd && gd->major != major; gd = gd->next) + ; + ds->gd = gd; + } + minor = np->unit; + gd = ds->gd; + if (gd) + minor <<= gd->minor_shift; + dev = MKDEV (major, minor); + + queue_init (&td.pages); + current_thread ()->pcb->data = &td; + + /* Check partition. */ + err = init_partition (np, &dev, ds, slice, &part); + if (err) + goto out; + + /* Initialize file structure. */ + switch (mode & (D_READ|D_WRITE)) + { + case D_WRITE: + td.file.f_mode = O_WRONLY; + break; + + case D_READ|D_WRITE: + td.file.f_mode = O_RDWR; + break; + + default: + td.file.f_mode = O_RDONLY; + break; + } + td.file.f_flags = (mode & D_NODELAY) ? O_NDELAY : 0; + + /* Check if the device is currently open. */ + for (bdp = open_list; bdp; bdp = bdp->next) + if (bdp->dev == dev + && bdp->part == part + && bdp->mode == td.file.f_mode + && bdp->flags == td.file.f_flags) + { + bd = bdp; + goto out; + } + + /* Open the device. */ + if (ds->fops->open) + { + td.inode.i_rdev = dev; + linux_intr_pri = SPL5; + err = (*ds->fops->open) (&td.inode, &td.file); + if (err) + { + err = linux_to_mach_error (err); + goto out; + } + } + + /* Allocate and initialize device data. */ + bd = (struct block_data *) kalloc (sizeof (struct block_data)); + if (! bd) + { + err = D_NO_MEMORY; + goto bad; + } + bd->want = 0; + bd->open_count = 0; + bd->iocount = 0; + bd->part = part; + bd->ds = ds; + bd->device.emul_data = bd; + bd->device.emul_ops = &linux_block_emulation_ops; + bd->dev = dev; + bd->mode = td.file.f_mode; + bd->flags = td.file.f_flags; + bd->port = ipc_port_alloc_kernel (); + if (bd->port == IP_NULL) + { + err = KERN_RESOURCE_SHORTAGE; + goto bad; + } + ipc_kobject_set (bd->port, (ipc_kobject_t) &bd->device, IKOT_DEVICE); + notify = ipc_port_make_sonce (bd->port); + ip_lock (bd->port); + ipc_port_nsrequest (bd->port, 1, notify, ¬ify); + assert (notify == IP_NULL); + goto out; + +bad: + if (ds->fops->release) + (*ds->fops->release) (&td.inode, &td.file); + +out: + ds->busy = 0; + if (ds->want) + { + ds->want = 0; + thread_wakeup ((event_t) ds); + } + + if (bd && bd->open_count > 0) + { + if (err) + *devp = NULL; + else + { + *devp = &bd->device; + bd->open_count++; + } + return err; + } + + if (err) + { + if (bd) + { + if (bd->port != IP_NULL) + { + ipc_kobject_set (bd->port, IKO_NULL, IKOT_NONE); + ipc_port_dealloc_kernel (bd->port); + } + kfree ((vm_offset_t) bd, sizeof (struct block_data)); + bd = NULL; + } + } + else + { + bd->open_count = 1; + bd->next = open_list; + open_list = bd; + } + + if (IP_VALID (reply_port)) + ds_device_open_reply (reply_port, reply_port_type, err, dev_to_port (bd)); + else if (! err) + device_close (bd); + + return MIG_NO_REPLY; +} + +static io_return_t +device_close (void *d) +{ + struct block_data *bd = d, *bdp, **prev; + struct device_struct *ds = bd->ds; + DECL_DATA; + + INIT_DATA (); + + /* Wait for any other open/close to complete. */ + while (ds->busy) + { + ds->want = 1; + assert_wait ((event_t) ds, FALSE); + schedule (); + } + ds->busy = 1; + + if (--bd->open_count == 0) + { + /* Wait for pending I/O to complete. */ + while (bd->iocount > 0) + { + bd->want = 1; + assert_wait ((event_t) bd, FALSE); + schedule (); + } + + /* Remove device from open list. */ + prev = &open_list; + bdp = open_list; + while (bdp) + { + if (bdp == bd) + { + *prev = bdp->next; + break; + } + prev = &bdp->next; + bdp = bdp->next; + } + + assert (bdp == bd); + + if (ds->fops->release) + (*ds->fops->release) (&td.inode, &td.file); + + ipc_kobject_set (bd->port, IKO_NULL, IKOT_NONE); + ipc_port_dealloc_kernel (bd->port); + kfree ((vm_offset_t) bd, sizeof (struct block_data)); + } + + ds->busy = 0; + if (ds->want) + { + ds->want = 0; + thread_wakeup ((event_t) ds); + } + return D_SUCCESS; +} + +#define MAX_COPY (VM_MAP_COPY_PAGE_LIST_MAX << PAGE_SHIFT) + +/* Check block BN and size COUNT for I/O validity + to from device BD. Set *OFF to the byte offset + where I/O is to begin and return the size of transfer. */ +static int +check_limit (struct block_data *bd, loff_t *off, long bn, int count) +{ + int major, minor; + long maxsz, sz; + struct disklabel *lp = NULL; + + if (count <= 0) + return count; + + major = MAJOR (bd->dev); + minor = MINOR (bd->dev); + + if (bd->ds->gd) + { + if (bd->part >= 0) + { + assert (bd->ds->labels); + assert (bd->ds->labels[minor]); + lp = bd->ds->labels[minor]; + maxsz = lp->d_partitions[bd->part].p_size; + } + else + maxsz = bd->ds->gd->part[minor].nr_sects; + } + else + { + assert (blk_size[major]); + maxsz = blk_size[major][minor] << (BLOCK_SIZE_BITS - 9); + } + assert (maxsz > 0); + sz = maxsz - bn; + if (sz <= 0) + return sz; + if (sz < ((count + 511) >> 9)) + count = sz << 9; + if (lp) + bn += (lp->d_partitions[bd->part].p_offset + - bd->ds->gd->part[minor].start_sect); + *off = (loff_t) bn << 9; + bd->iocount++; + return count; +} + +static io_return_t +device_write (void *d, ipc_port_t reply_port, + mach_msg_type_name_t reply_port_type, dev_mode_t mode, + recnum_t bn, io_buf_ptr_t data, unsigned int orig_count, + int *bytes_written) +{ + int resid, amt, i; + int count = (int) orig_count; + io_return_t err = 0; + vm_map_copy_t copy; + vm_offset_t addr, uaddr; + vm_size_t len, size; + struct block_data *bd = d; + DECL_DATA; + + INIT_DATA (); + + *bytes_written = 0; + + if (bd->mode == O_RDONLY) + return D_INVALID_OPERATION; + if (! bd->ds->fops->write) + return D_READ_ONLY; + count = check_limit (bd, &td.file.f_pos, bn, count); + if (count < 0) + return D_INVALID_SIZE; + if (count == 0) + { + vm_map_copy_discard (copy); + return 0; + } + + resid = count; + copy = (vm_map_copy_t) data; + uaddr = copy->offset; + + /* Allocate a kernel buffer. */ + size = round_page (uaddr + count) - trunc_page (uaddr); + if (size > MAX_COPY) + size = MAX_COPY; + addr = vm_map_min (device_io_map); + err = vm_map_enter (device_io_map, &addr, size, 0, TRUE, + NULL, 0, FALSE, VM_PROT_READ|VM_PROT_WRITE, + VM_PROT_READ|VM_PROT_WRITE, VM_INHERIT_NONE); + if (err) + { + vm_map_copy_discard (copy); + goto out; + } + + /* Determine size of I/O this time around. */ + len = size - (uaddr & PAGE_MASK); + if (len > resid) + len = resid; + + while (1) + { + /* Map user pages. */ + for (i = 0; i < copy->cpy_npages; i++) + pmap_enter (vm_map_pmap (device_io_map), + addr + (i << PAGE_SHIFT), + copy->cpy_page_list[i]->phys_addr, + VM_PROT_READ|VM_PROT_WRITE, TRUE); + + /* Do the write. */ + amt = (*bd->ds->fops->write) (&td.inode, &td.file, + (char *) addr + (uaddr & PAGE_MASK), len); + + /* Unmap pages and deallocate copy. */ + pmap_remove (vm_map_pmap (device_io_map), + addr, addr + (copy->cpy_npages << PAGE_SHIFT)); + vm_map_copy_discard (copy); + + /* Check result of write. */ + if (amt > 0) + { + resid -= amt; + if (resid == 0) + break; + uaddr += amt; + } + else + { + if (amt < 0) + err = linux_to_mach_error (amt); + break; + } + + /* Determine size of I/O this time around and copy in pages. */ + len = round_page (uaddr + resid) - trunc_page (uaddr); + if (len > MAX_COPY) + len = MAX_COPY; + len -= uaddr & PAGE_MASK; + if (len > resid) + len = resid; + err = vm_map_copyin_page_list (current_map (), uaddr, len, + FALSE, FALSE, ©, FALSE); + if (err) + break; + } + + /* Delete kernel buffer. */ + vm_map_remove (device_io_map, addr, addr + size); + +out: + if (--bd->iocount == 0 && bd->want) + { + bd->want = 0; + thread_wakeup ((event_t) bd); + } + if (IP_VALID (reply_port)) + ds_device_write_reply (reply_port, reply_port_type, err, count - resid); + return MIG_NO_REPLY; +} + +static io_return_t +device_read (void *d, ipc_port_t reply_port, + mach_msg_type_name_t reply_port_type, dev_mode_t mode, + recnum_t bn, int count, io_buf_ptr_t *data, + unsigned *bytes_read) +{ + boolean_t dirty; + int resid, amt; + io_return_t err = 0; + queue_head_t pages; + vm_map_copy_t copy; + vm_offset_t addr, offset, alloc_offset, o; + vm_object_t object; + vm_page_t m; + vm_size_t len, size; + struct block_data *bd = d; + DECL_DATA; + + INIT_DATA (); + + *data = 0; + *bytes_read = 0; + + if (! bd->ds->fops->read) + return D_INVALID_OPERATION; + count = check_limit (bd, &td.file.f_pos, bn, count); + if (count < 0) + return D_INVALID_SIZE; + if (count == 0) + return 0; + + /* Allocate an object to hold the data. */ + size = round_page (count); + object = vm_object_allocate (size); + if (! object) + { + err = D_NO_MEMORY; + goto out; + } + alloc_offset = offset = 0; + resid = count; + + /* Allocate a kernel buffer. */ + addr = vm_map_min (device_io_map); + if (size > MAX_COPY) + size = MAX_COPY; + err = vm_map_enter (device_io_map, &addr, size, 0, TRUE, NULL, + 0, FALSE, VM_PROT_READ|VM_PROT_WRITE, + VM_PROT_READ|VM_PROT_WRITE, VM_INHERIT_NONE); + if (err) + goto out; + + queue_init (&pages); + + while (resid) + { + /* Determine size of I/O this time around. */ + len = round_page (offset + resid) - trunc_page (offset); + if (len > MAX_COPY) + len = MAX_COPY; + + /* Map any pages left from previous operation. */ + o = trunc_page (offset); + queue_iterate (&pages, m, vm_page_t, pageq) + { + pmap_enter (vm_map_pmap (device_io_map), + addr + o - trunc_page (offset), + m->phys_addr, VM_PROT_READ|VM_PROT_WRITE, TRUE); + o += PAGE_SIZE; + } + assert (o == alloc_offset); + + /* Allocate and map pages. */ + while (alloc_offset < trunc_page (offset) + len) + { + while ((m = vm_page_grab ()) == 0) + VM_PAGE_WAIT (0); + assert (! m->active && ! m->inactive); + m->busy = TRUE; + queue_enter (&pages, m, vm_page_t, pageq); + pmap_enter (vm_map_pmap (device_io_map), + addr + alloc_offset - trunc_page (offset), + m->phys_addr, VM_PROT_READ|VM_PROT_WRITE, TRUE); + alloc_offset += PAGE_SIZE; + } + + /* Do the read. */ + amt = len - (offset & PAGE_MASK); + if (amt > resid) + amt = resid; + amt = (*bd->ds->fops->read) (&td.inode, &td.file, + (char *) addr + (offset & PAGE_MASK), amt); + + /* Compute number of pages to insert in object. */ + o = trunc_page (offset); + if (amt > 0) + { + dirty = TRUE; + resid -= amt; + if (resid == 0) + { + /* Zero any unused space. */ + if (offset + amt < o + len) + memset ((void *) (addr + offset - o + amt), + 0, o + len - offset - amt); + offset = o + len; + } + else + offset += amt; + } + else + { + dirty = FALSE; + offset = o + len; + } + + /* Unmap pages and add them to the object. */ + pmap_remove (vm_map_pmap (device_io_map), addr, addr + len); + vm_object_lock (object); + while (o < trunc_page (offset)) + { + m = (vm_page_t) queue_first (&pages); + assert (! queue_end (&pages, (queue_entry_t) m)); + queue_remove (&pages, m, vm_page_t, pageq); + assert (m->busy); + vm_page_lock_queues (); + if (dirty) + { + PAGE_WAKEUP_DONE (m); + m->dirty = TRUE; + vm_page_insert (m, object, o); + } + else + vm_page_free (m); + vm_page_unlock_queues (); + o += PAGE_SIZE; + } + vm_object_unlock (object); + if (amt <= 0) + { + if (amt < 0) + err = linux_to_mach_error (amt); + break; + } + } + + /* Delete kernel buffer. */ + vm_map_remove (device_io_map, addr, addr + size); + + assert (queue_empty (&pages)); + +out: + if (! err) + err = vm_map_copyin_object (object, 0, round_page (count), ©); + if (! err) + { + *data = (io_buf_ptr_t) copy; + *bytes_read = count - resid; + } + else + vm_object_deallocate (object); + if (--bd->iocount == 0 && bd->want) + { + bd->want = 0; + thread_wakeup ((event_t) bd); + } + return err; +} + +static io_return_t +device_get_status (void *d, dev_flavor_t flavor, dev_status_t status, + mach_msg_type_number_t *status_count) +{ + struct block_data *bd = d; + + switch (flavor) + { + case DEV_GET_SIZE: + if (*status_count != DEV_GET_SIZE_COUNT) + return D_INVALID_SIZE; + if (disk_major (MAJOR (bd->dev))) + { + assert (bd->ds->gd); + + if (bd->part >= 0) + { + struct disklabel *lp; + + assert (bd->ds->labels); + lp = bd->ds->labels[MINOR (bd->dev)]; + assert (lp); + (status[DEV_GET_SIZE_DEVICE_SIZE] + = lp->d_partitions[bd->part].p_size << 9); + } + else + (status[DEV_GET_SIZE_DEVICE_SIZE] + = bd->ds->gd->part[MINOR (bd->dev)].nr_sects << 9); + } + else + { + assert (blk_size[MAJOR (bd->dev)]); + (status[DEV_GET_SIZE_DEVICE_SIZE] + = (blk_size[MAJOR (bd->dev)][MINOR (bd->dev)] + << BLOCK_SIZE_BITS)); + } + /* It would be nice to return the block size as reported by + the driver, but a lot of user level code assumes the sector + size to be 512. */ + status[DEV_GET_SIZE_RECORD_SIZE] = 512; + break; + + case V_GETPARMS: + if (*status_count < (sizeof (struct disk_parms) / sizeof (int))) + return D_INVALID_OPERATION; + else + { + struct disk_parms *dp = status; + struct hd_geometry hg; + DECL_DATA; + + INIT_DATA(); + + if ((*bd->ds->fops->ioctl) (&td.inode, &td.file, + HDIO_GETGEO, &hg)) + return D_INVALID_OPERATION; + + dp->dp_type = DPT_WINI; /* XXX: It may be a floppy... */ + dp->dp_heads = hg.heads; + dp->dp_cyls = hg.cylinders; + dp->dp_sectors = hg.sectors; + dp->dp_dosheads = hg.heads; + dp->dp_doscyls = hg.cylinders; + dp->dp_dossectors = hg.sectors; + dp->dp_secsiz = 512; /* XXX */ + dp->dp_ptag = 0; + dp->dp_pflag = 0; + + /* XXX */ + dp->dp_pstartsec = -1; + dp->dp_pnumsec = -1; + + *status_count = sizeof (struct disk_parms) / sizeof (int); + } + + break; + + default: + return D_INVALID_OPERATION; + } + + return D_SUCCESS; +} + +struct device_emulation_ops linux_block_emulation_ops = +{ + NULL, + NULL, + dev_to_port, + device_open, + device_close, + device_write, + NULL, + device_read, + NULL, + NULL, + device_get_status, + NULL, + NULL, + NULL, + NULL, + NULL +}; diff --git a/linux/dev/glue/kmem.c b/linux/dev/glue/kmem.c new file mode 100644 index 0000000..3a9d260 --- /dev/null +++ b/linux/dev/glue/kmem.c @@ -0,0 +1,581 @@ +/* + * Linux memory allocation. + * + * Copyright (C) 1996 The University of Utah and the Computer Systems + * Laboratory at the University of Utah (CSL) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Author: Shantanu Goel, University of Utah CSL + * + */ + +#include <sys/types.h> + +#include <mach/mach_types.h> +#include <mach/vm_param.h> + +#include <kern/assert.h> +#include <kern/kalloc.h> + +#include <vm/vm_page.h> +#include <vm/vm_kern.h> + +#define MACH_INCLUDE +#include <linux/sched.h> +#include <linux/malloc.h> +#include <linux/delay.h> + +#include <asm/system.h> + +extern void *alloc_contig_mem (unsigned, unsigned, unsigned, vm_page_t *); +extern int printf (const char *, ...); + +/* Amount of memory to reserve for Linux memory allocator. + We reserve 64K chunks to stay within DMA limits. + Increase MEM_CHUNKS if the kernel is running out of memory. */ +#define MEM_CHUNK_SIZE (64 * 1024) +#define MEM_CHUNKS 7 + +/* Mininum amount that linux_kmalloc will allocate. */ +#define MIN_ALLOC 12 + +#ifndef NBPW +#define NBPW 32 +#endif + +/* Memory block header. */ +struct blkhdr +{ + unsigned short free; /* 1 if block is free */ + unsigned short size; /* size of block */ +}; + +/* This structure heads a page allocated by linux_kmalloc. */ +struct pagehdr +{ + unsigned size; /* size (multiple of PAGE_SIZE) */ + struct pagehdr *next; /* next header in list */ +}; + +/* This structure describes a memory chunk. */ +struct chunkhdr +{ + unsigned long start; /* start address */ + unsigned long end; /* end address */ + unsigned long bitmap; /* busy/free bitmap of pages */ +}; + +/* Chunks from which pages are allocated. */ +static struct chunkhdr pages_free[MEM_CHUNKS]; + +/* Memory list maintained by linux_kmalloc. */ +static struct pagehdr *memlist; + +/* Some statistics. */ +int num_block_coalesce = 0; +int num_page_collect = 0; +int linux_mem_avail; + +/* Initialize the Linux memory allocator. */ +void +linux_kmem_init () +{ + int i, j; + vm_page_t p, pages; + + for (i = 0; i < MEM_CHUNKS; i++) + { + /* Allocate memory. */ + pages_free[i].start = (unsigned long) alloc_contig_mem (MEM_CHUNK_SIZE, + 16 * 1024 * 1024, + 0xffff, &pages); + + assert (pages_free[i].start); + assert ((pages_free[i].start & 0xffff) == 0); + + /* Sanity check: ensure pages are contiguous and within DMA limits. */ + for (p = pages, j = 0; j < MEM_CHUNK_SIZE - PAGE_SIZE; j += PAGE_SIZE) + { + assert (p->phys_addr < 16 * 1024 * 1024); + assert (p->phys_addr + PAGE_SIZE + == ((vm_page_t) p->pageq.next)->phys_addr); + + p = (vm_page_t) p->pageq.next; + } + + pages_free[i].end = pages_free[i].start + MEM_CHUNK_SIZE; + + /* Initialize free page bitmap. */ + pages_free[i].bitmap = 0; + j = MEM_CHUNK_SIZE >> PAGE_SHIFT; + while (--j >= 0) + pages_free[i].bitmap |= 1 << j; + } + + linux_mem_avail = (MEM_CHUNKS * MEM_CHUNK_SIZE) >> PAGE_SHIFT; +} + +/* Return the number by which the page size should be + shifted such that the resulting value is >= SIZE. */ +static unsigned long +get_page_order (int size) +{ + unsigned long order; + + for (order = 0; (PAGE_SIZE << order) < size; order++) + ; + return order; +} + +#ifdef LINUX_DEV_DEBUG +static void +check_page_list (int line) +{ + unsigned size; + struct pagehdr *ph; + struct blkhdr *bh; + + for (ph = memlist; ph; ph = ph->next) + { + if ((int) ph & PAGE_MASK) + panic ("%s:%d: page header not aligned", __FILE__, line); + + size = 0; + bh = (struct blkhdr *) (ph + 1); + while (bh < (struct blkhdr *) ((void *) ph + ph->size)) + { + size += bh->size + sizeof (struct blkhdr); + bh = (void *) (bh + 1) + bh->size; + } + + if (size + sizeof (struct pagehdr) != ph->size) + panic ("%s:%d: memory list destroyed", __FILE__, line); + } +} +#else +#define check_page_list(line) +#endif + +/* Merge adjacent free blocks in the memory list. */ +static void +coalesce_blocks () +{ + struct pagehdr *ph; + struct blkhdr *bh, *bhp, *ebh; + + num_block_coalesce++; + + for (ph = memlist; ph; ph = ph->next) + { + bh = (struct blkhdr *) (ph + 1); + ebh = (struct blkhdr *) ((void *) ph + ph->size); + while (1) + { + /* Skip busy blocks. */ + while (bh < ebh && !bh->free) + bh = (struct blkhdr *) ((void *) (bh + 1) + bh->size); + if (bh == ebh) + break; + + /* Merge adjacent free blocks. */ + while (1) + { + bhp = (struct blkhdr *) ((void *) (bh + 1) + bh->size); + if (bhp == ebh) + { + bh = bhp; + break; + } + if (!bhp->free) + { + bh = (struct blkhdr *) ((void *) (bhp + 1) + bhp->size); + break; + } + bh->size += bhp->size + sizeof (struct blkhdr); + } + } + } +} + +/* Allocate SIZE bytes of memory. + The PRIORITY parameter specifies various flags + such as DMA, atomicity, etc. It is not used by Mach. */ +void * +linux_kmalloc (unsigned int size, int priority) +{ + int order, coalesced = 0; + unsigned flags; + struct pagehdr *ph; + struct blkhdr *bh, *new_bh; + + if (size < MIN_ALLOC) + size = MIN_ALLOC; + else + size = (size + sizeof (int) - 1) & ~(sizeof (int) - 1); + + assert (size <= (MEM_CHUNK_SIZE + - sizeof (struct pagehdr) + - sizeof (struct blkhdr))); + + save_flags (flags); + cli (); + +again: + check_page_list (__LINE__); + + /* Walk the page list and find the first free block with size + greater than or equal to the one required. */ + for (ph = memlist; ph; ph = ph->next) + { + bh = (struct blkhdr *) (ph + 1); + while (bh < (struct blkhdr *) ((void *) ph + ph->size)) + { + if (bh->free && bh->size >= size) + { + bh->free = 0; + if (bh->size - size >= MIN_ALLOC + sizeof (struct blkhdr)) + { + /* Split the current block and create a new free block. */ + new_bh = (void *) (bh + 1) + size; + new_bh->free = 1; + new_bh->size = bh->size - size - sizeof (struct blkhdr); + bh->size = size; + } + + check_page_list (__LINE__); + + restore_flags (flags); + return bh + 1; + } + bh = (void *) (bh + 1) + bh->size; + } + } + + check_page_list (__LINE__); + + /* Allocation failed; coalesce free blocks and try again. */ + if (!coalesced) + { + coalesce_blocks (); + coalesced = 1; + goto again; + } + + /* Allocate more pages. */ + order = get_page_order (size + + sizeof (struct pagehdr) + + sizeof (struct blkhdr)); + ph = (struct pagehdr *) __get_free_pages (GFP_KERNEL, order, ~0UL); + if (!ph) + { + restore_flags (flags); + return NULL; + } + + ph->size = PAGE_SIZE << order; + ph->next = memlist; + memlist = ph; + bh = (struct blkhdr *) (ph + 1); + bh->free = 0; + bh->size = ph->size - sizeof (struct pagehdr) - sizeof (struct blkhdr); + if (bh->size - size >= MIN_ALLOC + sizeof (struct blkhdr)) + { + new_bh = (void *) (bh + 1) + size; + new_bh->free = 1; + new_bh->size = bh->size - size - sizeof (struct blkhdr); + bh->size = size; + } + + check_page_list (__LINE__); + + restore_flags (flags); + return bh + 1; +} + +/* Free memory P previously allocated by linux_kmalloc. */ +void +linux_kfree (void *p) +{ + unsigned flags; + struct blkhdr *bh; + struct pagehdr *ph; + + assert (((int) p & (sizeof (int) - 1)) == 0); + + save_flags (flags); + cli (); + + check_page_list (__LINE__); + + for (ph = memlist; ph; ph = ph->next) + if (p >= (void *) ph && p < (void *) ph + ph->size) + break; + + assert (ph); + + bh = (struct blkhdr *) p - 1; + + assert (!bh->free); + assert (bh->size >= MIN_ALLOC); + assert ((bh->size & (sizeof (int) - 1)) == 0); + + bh->free = 1; + + check_page_list (__LINE__); + + restore_flags (flags); +} + +/* Free any pages that are not in use. + Called by __get_free_pages when pages are running low. */ +static void +collect_kmalloc_pages () +{ + struct blkhdr *bh; + struct pagehdr *ph, **prev_ph; + + check_page_list (__LINE__); + + coalesce_blocks (); + + check_page_list (__LINE__); + + ph = memlist; + prev_ph = &memlist; + while (ph) + { + bh = (struct blkhdr *) (ph + 1); + if (bh->free && (void *) (bh + 1) + bh->size == (void *) ph + ph->size) + { + *prev_ph = ph->next; + free_pages ((unsigned long) ph, get_page_order (ph->size)); + ph = *prev_ph; + } + else + { + prev_ph = &ph->next; + ph = ph->next; + } + } + + check_page_list (__LINE__); +} + +/* Allocate ORDER + 1 number of physically contiguous pages. + PRIORITY and DMA are not used in Mach. + + XXX: This needs to be dynamic. To do that we need to make + the Mach page manipulation routines interrupt safe and they + must provide machine dependant hooks. */ +unsigned long +__get_free_pages (int priority, unsigned long order, int dma) +{ + int i, pages_collected = 0; + unsigned flags, bits, off, j, len; + + assert ((PAGE_SIZE << order) <= MEM_CHUNK_SIZE); + + /* Construct bitmap of contiguous pages. */ + bits = 0; + j = 0; + len = 0; + while (len < (PAGE_SIZE << order)) + { + bits |= 1 << j++; + len += PAGE_SIZE; + } + + save_flags (flags); + cli (); +again: + + /* Search each chunk for the required number of contiguous pages. */ + for (i = 0; i < MEM_CHUNKS; i++) + { + off = 0; + j = bits; + while (MEM_CHUNK_SIZE - off >= (PAGE_SIZE << order)) + { + if ((pages_free[i].bitmap & j) == j) + { + pages_free[i].bitmap &= ~j; + linux_mem_avail -= order + 1; + restore_flags (flags); + return pages_free[i].start + off; + } + j <<= 1; + off += PAGE_SIZE; + } + } + + /* Allocation failed; collect kmalloc and buffer pages + and try again. */ + if (!pages_collected) + { + num_page_collect++; + collect_kmalloc_pages (); + pages_collected = 1; + goto again; + } + + printf ("%s:%d: __get_free_pages: ran out of pages\n", __FILE__, __LINE__); + + restore_flags (flags); + return 0; +} + +/* Free ORDER + 1 number of physically + contiguous pages starting at address ADDR. */ +void +free_pages (unsigned long addr, unsigned long order) +{ + int i; + unsigned flags, bits, len, j; + + assert ((addr & PAGE_MASK) == 0); + + for (i = 0; i < MEM_CHUNKS; i++) + if (addr >= pages_free[i].start && addr < pages_free[i].end) + break; + + assert (i < MEM_CHUNKS); + + /* Contruct bitmap of contiguous pages. */ + len = 0; + j = 0; + bits = 0; + while (len < (PAGE_SIZE << order)) + { + bits |= 1 << j++; + len += PAGE_SIZE; + } + bits <<= (addr - pages_free[i].start) >> PAGE_SHIFT; + + save_flags (flags); + cli (); + + assert ((pages_free[i].bitmap & bits) == 0); + + pages_free[i].bitmap |= bits; + linux_mem_avail += order + 1; + restore_flags (flags); +} + + +/* vmalloc management routines. */ +struct vmalloc_struct +{ + struct vmalloc_struct *prev; + struct vmalloc_struct *next; + vm_offset_t start; + vm_size_t size; +}; + +static struct vmalloc_struct +vmalloc_list = { &vmalloc_list, &vmalloc_list, 0, 0 }; + +static inline void +vmalloc_list_insert (vm_offset_t start, vm_size_t size) +{ + struct vmalloc_struct *p; + + p = (struct vmalloc_struct *) kalloc (sizeof (struct vmalloc_struct)); + if (p == NULL) + panic ("kernel memory is exhausted"); + + p->prev = vmalloc_list.prev; + p->next = &vmalloc_list; + vmalloc_list.prev->next = p; + vmalloc_list.prev = p; + + p->start = start; + p->size = size; +} + +static struct vmalloc_struct * +vmalloc_list_lookup (vm_offset_t start) +{ + struct vmalloc_struct *p; + + for (p = vmalloc_list.next; p != &vmalloc_list; p = p->next) + { + if (p->start == start) + return p; + } + + return NULL; +} + +static inline void +vmalloc_list_remove (struct vmalloc_struct *p) +{ + p->next->prev = p->prev; + p->prev->next = p->next; + + kfree (p, sizeof (struct vmalloc_struct)); +} + +/* Allocate SIZE bytes of memory. The pages need not be contiguous. */ +void * +vmalloc (unsigned long size) +{ + kern_return_t ret; + vm_offset_t addr; + + ret = kmem_alloc_wired (kernel_map, &addr, round_page (size)); + if (ret != KERN_SUCCESS) + return NULL; + + vmalloc_list_insert (addr, round_page (size)); + return (void *) addr; +} + +/* Free vmalloc'ed and vremap'ed virtual address space. */ +void +vfree (void *addr) +{ + struct vmalloc_struct *p; + + p = vmalloc_list_lookup ((vm_offset_t) addr); + if (p) + panic ("vmalloc_list_lookup failure"); + + kmem_free (kernel_map, addr, p->size); + vmalloc_list_remove (p); +} + +/* XXX: Quick hacking. */ +/* Remap physical address into virtual address. */ +void * +vremap (unsigned long offset, unsigned long size) +{ + extern vm_offset_t pmap_map_bd (register vm_offset_t virt, + register vm_offset_t start, + register vm_offset_t end, + vm_prot_t prot); + vm_offset_t addr; + kern_return_t ret; + + ret = kmem_alloc_wired (kernel_map, &addr, round_page (size)); + if (ret != KERN_SUCCESS) + return NULL; + + (void) pmap_map_bd (addr, offset, offset + round_page (size), + VM_PROT_READ | VM_PROT_WRITE); + + vmalloc_list_insert (addr, round_page (size)); + return (void *) addr; +} diff --git a/linux/dev/glue/misc.c b/linux/dev/glue/misc.c new file mode 100644 index 0000000..10a9128 --- /dev/null +++ b/linux/dev/glue/misc.c @@ -0,0 +1,346 @@ +/* + * Miscellaneous routines and data for Linux emulation. + * + * Copyright (C) 1996 The University of Utah and the Computer Systems + * Laboratory at the University of Utah (CSL) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Author: Shantanu Goel, University of Utah CSL + */ + +/* + * linux/fs/proc/scsi.c + * (c) 1995 Michael Neuffer neuffer@goofy.zdv.uni-mainz.de + * + * The original version was derived from linux/fs/proc/net.c, + * which is Copyright (C) 1991, 1992 Linus Torvalds. + * Much has been rewritten, but some of the code still remains. + * + * /proc/scsi directory handling functions + * + * last change: 95/07/04 + * + * Initial version: March '95 + * 95/05/15 Added subdirectories for each driver and show every + * registered HBA as a single file. + * 95/05/30 Added rudimentary write support for parameter passing + * 95/07/04 Fixed bugs in directory handling + * 95/09/13 Update to support the new proc-dir tree + * + * TODO: Improve support to write to the driver files + * Add some more comments + */ + +/* + * linux/fs/buffer.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#include <sys/types.h> +#include <mach/vm_param.h> +#include <kern/thread.h> +#include <vm/vm_map.h> +#include <vm/vm_page.h> +#include <device/device_types.h> + +#define MACH_INCLUDE +#include <linux/types.h> +#include <linux/config.h> +#include <linux/errno.h> +#include <linux/mm.h> +#include <linux/fs.h> +#include <linux/blk.h> +#include <linux/proc_fs.h> +#include <linux/kernel_stat.h> + +extern boolean_t vm_map_lookup_entry (register vm_map_t, register vm_offset_t, + vm_map_entry_t *); +extern int printf (const char *, ...); + +int (*dispatch_scsi_info_ptr) (int ino, char *buffer, char **start, + off_t offset, int length, int inout) = 0; + +struct kernel_stat kstat; + +int +linux_to_mach_error (int err) +{ + switch (err) + { + case 0: + return D_SUCCESS; + + case -LINUX_EPERM: + return D_INVALID_OPERATION; + + case -LINUX_EIO: + return D_IO_ERROR; + + case -LINUX_ENXIO: + return D_NO_SUCH_DEVICE; + + case -LINUX_EACCES: + return D_INVALID_OPERATION; + + case -LINUX_EFAULT: + return D_INVALID_SIZE; + + case -LINUX_EBUSY: + return D_ALREADY_OPEN; + + case -LINUX_EINVAL: + return D_INVALID_SIZE; + + case -LINUX_EROFS: + return D_READ_ONLY; + + case -LINUX_EWOULDBLOCK: + return D_WOULD_BLOCK; + + case -LINUX_ENOMEM: + return D_NO_MEMORY; + + default: + printf ("linux_to_mach_error: unknown code %d\n", err); + return D_IO_ERROR; + } +} + +int +issig () +{ + return current_thread ()->wait_result != THREAD_AWAKENED; +} + +int +block_fsync (struct inode *inode, struct file *filp) +{ + return 0; +} + +int +verify_area (int rw, const void *p, unsigned long size) +{ + vm_prot_t prot = (rw == VERIFY_WRITE) ? VM_PROT_WRITE : VM_PROT_READ; + vm_offset_t addr = trunc_page ((vm_offset_t) p); + vm_size_t len = round_page ((vm_size_t) size); + vm_map_entry_t entry; + + vm_map_lock_read (current_map ()); + + while (1) + { + if (!vm_map_lookup_entry (current_map (), addr, &entry) + || (entry->protection & prot) != prot) + { + vm_map_unlock_read (current_map ()); + return -LINUX_EFAULT; + } + if (entry->vme_end - entry->vme_start >= len) + break; + len -= entry->vme_end - entry->vme_start; + addr += entry->vme_end - entry->vme_start; + } + + vm_map_unlock_read (current_map ()); + return 0; +} + +/* + * Print device name (in decimal, hexadecimal or symbolic) - + * at present hexadecimal only. + * Note: returns pointer to static data! + */ +char * +kdevname (kdev_t dev) +{ + static char buffer[32]; + linux_sprintf (buffer, "%02x:%02x", MAJOR (dev), MINOR (dev)); + return buffer; +} + +/* RO fail safe mechanism */ + +static long ro_bits[MAX_BLKDEV][8]; + +int +is_read_only (kdev_t dev) +{ + int minor, major; + + major = MAJOR (dev); + minor = MINOR (dev); + if (major < 0 || major >= MAX_BLKDEV) + return 0; + return ro_bits[major][minor >> 5] & (1 << (minor & 31)); +} + +void +set_device_ro (kdev_t dev, int flag) +{ + int minor, major; + + major = MAJOR (dev); + minor = MINOR (dev); + if (major < 0 || major >= MAX_BLKDEV) + return; + if (flag) + ro_bits[major][minor >> 5] |= 1 << (minor & 31); + else + ro_bits[major][minor >> 5] &= ~(1 << (minor & 31)); +} + +/* + * linux/lib/string.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +/* + * stupid library routines.. The optimized versions should generally be found + * as inline code in <asm-xx/string.h> + * + * These are buggy as well.. + */ + +#include <linux/types.h> +#include <linux/string.h> + +char *___strtok = NULL; + +#ifndef __HAVE_ARCH_STRSPN +size_t +strspn (const char *s, const char *accept) +{ + const char *p; + const char *a; + size_t count = 0; + + for (p = s; *p != '\0'; ++p) + { + for (a = accept; *a != '\0'; ++a) + { + if (*p == *a) + break; + } + if (*a == '\0') + return count; + ++count; + } + + return count; +} +#endif + +#ifndef __HAVE_ARCH_STRPBRK +char * +strpbrk (const char *cs, const char *ct) +{ + const char *sc1, *sc2; + + for (sc1 = cs; *sc1 != '\0'; ++sc1) + { + for (sc2 = ct; *sc2 != '\0'; ++sc2) + { + if (*sc1 == *sc2) + return (char *) sc1; + } + } + return NULL; +} +#endif + +#ifndef __HAVE_ARCH_STRTOK +char * +strtok (char *s, const char *ct) +{ + char *sbegin, *send; + + sbegin = s ? s : ___strtok; + if (!sbegin) + { + return NULL; + } + sbegin += strspn (sbegin, ct); + if (*sbegin == '\0') + { + ___strtok = NULL; + return (NULL); + } + send = strpbrk (sbegin, ct); + if (send && *send != '\0') + *send++ = '\0'; + ___strtok = send; + return (sbegin); +} +#endif + + +#ifndef __HAVE_ARCH_STRSTR +char * +strstr (const char *s1, const char *s2) +{ + int l1, l2; + + l2 = strlen (s2); + if (! l2) + return (char *) s1; + l1 = strlen (s1); + while (l1 >= l2) + { + l1--; + if (! memcmp (s1,s2,l2)) + return (char *) s1; + s1++; + } + return NULL; +} +#endif + +struct proc_dir_entry proc_scsi; +struct inode_operations proc_scsi_inode_operations; +struct proc_dir_entry proc_net; +struct inode_operations proc_net_inode_operations; + +int +proc_register (struct proc_dir_entry *xxx1, struct proc_dir_entry *xxx2) +{ + return 0; +} + +int +proc_unregister (struct proc_dir_entry *xxx1, int xxx2) +{ + return 0; +} + +void +add_blkdev_randomness (int major) +{ +} + +void +do_gettimeofday (struct timeval *tv) +{ + host_get_time (1, tv); +} + +int +dev_get_info (char *buffer, char **start, off_t offset, int length, int dummy) +{ + return 0; +} diff --git a/linux/dev/glue/net.c b/linux/dev/glue/net.c new file mode 100644 index 0000000..93556db --- /dev/null +++ b/linux/dev/glue/net.c @@ -0,0 +1,530 @@ +/* + * Linux network driver support. + * + * Copyright (C) 1996 The University of Utah and the Computer Systems + * Laboratory at the University of Utah (CSL) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Author: Shantanu Goel, University of Utah CSL + */ + +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Ethernet-type device handling. + * + * Version: @(#)eth.c 1.0.7 05/25/93 + * + * Authors: Ross Biro, <bir7@leland.Stanford.Edu> + * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> + * Mark Evans, <evansmp@uhura.aston.ac.uk> + * Florian La Roche, <rzsfl@rz.uni-sb.de> + * Alan Cox, <gw4pts@gw4pts.ampr.org> + * + * Fixes: + * Mr Linux : Arp problems + * Alan Cox : Generic queue tidyup (very tiny here) + * Alan Cox : eth_header ntohs should be htons + * Alan Cox : eth_rebuild_header missing an htons and + * minor other things. + * Tegge : Arp bug fixes. + * Florian : Removed many unnecessary functions, code cleanup + * and changes for new arp and skbuff. + * Alan Cox : Redid header building to reflect new format. + * Alan Cox : ARP only when compiled with CONFIG_INET + * Greg Page : 802.2 and SNAP stuff. + * Alan Cox : MAC layer pointers/new format. + * Paul Gortmaker : eth_copy_and_sum shouldn't csum padding. + * Alan Cox : Protect against forwarding explosions with + * older network drivers and IFF_ALLMULTI + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <sys/types.h> +#include <machine/spl.h> + +#include <mach/mach_types.h> +#include <mach/kern_return.h> +#include <mach/mig_errors.h> +#include <mach/port.h> +#include <mach/vm_param.h> +#include <mach/notify.h> + +#include <ipc/ipc_port.h> +#include <ipc/ipc_space.h> + +#include <vm/vm_map.h> +#include <vm/vm_kern.h> +#include <vm/vm_page.h> + +#include <device/device_types.h> +#include <device/device_port.h> +#include <device/if_hdr.h> +#include <device/if_ether.h> +#include <device/if_hdr.h> +#include <device/net_io.h> +#include "device_reply.h" + +#include <linux_emul.h> + +#define MACH_INCLUDE +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/string.h> +#include <linux/errno.h> +#include <linux/delay.h> +#include <linux/interrupt.h> +#include <linux/malloc.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> + +extern int linux_intr_pri; + +/* One of these is associated with each instance of a device. */ +struct net_data +{ + ipc_port_t port; /* device port */ + struct ifnet ifnet; /* Mach ifnet structure (needed for filters) */ + struct device device; /* generic device structure */ + struct linux_device *dev; /* Linux network device structure */ +}; + +/* List of sk_buffs waiting to be freed. */ +static struct sk_buff_head skb_done_list; + +/* Forward declarations. */ + +extern struct device_emulation_ops linux_net_emulation_ops; + +static int print_packet_size = 0; + +/* Linux kernel network support routines. */ + +/* Requeue packet SKB for transmission after the interface DEV + has timed out. The priority of the packet is PRI. + In Mach, we simply drop the packet like the native drivers. */ +void +dev_queue_xmit (struct sk_buff *skb, struct linux_device *dev, int pri) +{ + dev_kfree_skb (skb, FREE_WRITE); +} + +/* Close the device DEV. */ +int +dev_close (struct linux_device *dev) +{ + return 0; +} + +/* Network software interrupt handler. */ +void +net_bh (void) +{ + int len; + struct sk_buff *skb; + struct linux_device *dev; + + /* Start transmission on interfaces. */ + for (dev = dev_base; dev; dev = dev->next) + { + if (dev->base_addr && dev->base_addr != 0xffe0) + while (1) + { + skb = skb_dequeue (&dev->buffs[0]); + if (skb) + { + len = skb->len; + if ((*dev->hard_start_xmit) (skb, dev)) + { + skb_queue_head (&dev->buffs[0], skb); + mark_bh (NET_BH); + break; + } + else if (print_packet_size) + printf ("net_bh: length %d\n", len); + } + else + break; + } + } +} + +/* Free all sk_buffs on the done list. + This routine is called by the iodone thread in ds_routines.c. */ +void +free_skbuffs () +{ + struct sk_buff *skb; + + while (1) + { + skb = skb_dequeue (&skb_done_list); + if (skb) + { + if (skb->copy) + { + vm_map_copy_discard (skb->copy); + skb->copy = NULL; + } + if (IP_VALID (skb->reply)) + { + ds_device_write_reply (skb->reply, skb->reply_type, 0, skb->len); + skb->reply = IP_NULL; + } + dev_kfree_skb (skb, FREE_WRITE); + } + else + break; + } +} + +/* Allocate an sk_buff with SIZE bytes of data space. */ +struct sk_buff * +alloc_skb (unsigned int size, int priority) +{ + return dev_alloc_skb (size); +} + +/* Free SKB. */ +void +kfree_skb (struct sk_buff *skb, int priority) +{ + dev_kfree_skb (skb, priority); +} + +/* Allocate an sk_buff with SIZE bytes of data space. */ +struct sk_buff * +dev_alloc_skb (unsigned int size) +{ + struct sk_buff *skb; + unsigned char *bptr; + int len = size; + + size = (size + 15) & ~15; + size += sizeof (struct sk_buff); + + bptr = linux_kmalloc (size, GFP_KERNEL); + if (bptr == NULL) + return NULL; + + /* XXX: In Mach, a sk_buff is located at the head, + while it's located at the tail in Linux. */ + skb = bptr; + skb->dev = NULL; + skb->reply = IP_NULL; + skb->copy = NULL; + skb->len = 0; + skb->prev = skb->next = NULL; + skb->list = NULL; + skb->data = bptr + sizeof (struct sk_buff); + skb->tail = skb->data; + skb->head = skb->data; + skb->end = skb->data + len; + + return skb; +} + +/* Free the sk_buff SKB. */ +void +dev_kfree_skb (struct sk_buff *skb, int mode) +{ + unsigned flags; + extern void *io_done_list; + + /* Queue sk_buff on done list if there is a + page list attached or we need to send a reply. + Wakeup the iodone thread to process the list. */ + if (skb->copy || IP_VALID (skb->reply)) + { + skb_queue_tail (&skb_done_list, skb); + save_flags (flags); + thread_wakeup ((event_t) & io_done_list); + restore_flags (flags); + return; + } + linux_kfree (skb); +} + +/* Accept packet SKB received on an interface. */ +void +netif_rx (struct sk_buff *skb) +{ + ipc_kmsg_t kmsg; + struct ether_header *eh; + struct packet_header *ph; + struct linux_device *dev = skb->dev; + + assert (skb != NULL); + + if (print_packet_size) + printf ("netif_rx: length %ld\n", skb->len); + + /* Allocate a kernel message buffer. */ + kmsg = net_kmsg_get (); + if (!kmsg) + { + dev_kfree_skb (skb, FREE_READ); + return; + } + + /* Copy packet into message buffer. */ + eh = (struct ether_header *) (net_kmsg (kmsg)->header); + ph = (struct packet_header *) (net_kmsg (kmsg)->packet); + memcpy (eh, skb->data, sizeof (struct ether_header)); + memcpy (ph + 1, skb->data + sizeof (struct ether_header), + skb->len - sizeof (struct ether_header)); + ph->type = eh->ether_type; + ph->length = (skb->len - sizeof (struct ether_header) + + sizeof (struct packet_header)); + + dev_kfree_skb (skb, FREE_READ); + + /* Pass packet up to the microkernel. */ + net_packet (&dev->net_data->ifnet, kmsg, + ph->length, ethernet_priority (kmsg)); +} + +/* Mach device interface routines. */ + +/* Return a send right associated with network device ND. */ +static ipc_port_t +dev_to_port (void *nd) +{ + return (nd + ? ipc_port_make_send (((struct net_data *) nd)->port) + : IP_NULL); +} + +static io_return_t +device_open (ipc_port_t reply_port, mach_msg_type_name_t reply_port_type, + dev_mode_t mode, char *name, device_t *devp) +{ + io_return_t err = D_SUCCESS; + ipc_port_t notify; + struct ifnet *ifp; + struct linux_device *dev; + struct net_data *nd; + + /* Search for the device. */ + for (dev = dev_base; dev; dev = dev->next) + if (dev->base_addr + && dev->base_addr != 0xffe0 + && !strcmp (name, dev->name)) + break; + if (!dev) + return D_NO_SUCH_DEVICE; + + /* Allocate and initialize device data if this is the first open. */ + nd = dev->net_data; + if (!nd) + { + dev->net_data = nd = ((struct net_data *) + kalloc (sizeof (struct net_data))); + if (!nd) + { + err = D_NO_MEMORY; + goto out; + } + nd->dev = dev; + nd->device.emul_data = nd; + nd->device.emul_ops = &linux_net_emulation_ops; + nd->port = ipc_port_alloc_kernel (); + if (nd->port == IP_NULL) + { + err = KERN_RESOURCE_SHORTAGE; + goto out; + } + ipc_kobject_set (nd->port, (ipc_kobject_t) & nd->device, IKOT_DEVICE); + notify = ipc_port_make_sonce (nd->port); + ip_lock (nd->port); + ipc_port_nsrequest (nd->port, 1, notify, ¬ify); + assert (notify == IP_NULL); + + ifp = &nd->ifnet; + ifp->if_unit = dev->name[strlen (dev->name) - 1] - '0'; + ifp->if_flags = IFF_UP | IFF_RUNNING; + ifp->if_mtu = dev->mtu; + ifp->if_header_size = dev->hard_header_len; + ifp->if_header_format = dev->type; + ifp->if_address_size = dev->addr_len; + ifp->if_address = dev->dev_addr; + if_init_queues (ifp); + + if (dev->open) + { + linux_intr_pri = SPL6; + if ((*dev->open) (dev)) + err = D_NO_SUCH_DEVICE; + } + + out: + if (err) + { + if (nd) + { + if (nd->port != IP_NULL) + { + ipc_kobject_set (nd->port, IKO_NULL, IKOT_NONE); + ipc_port_dealloc_kernel (nd->port); + } + kfree ((vm_offset_t) nd, sizeof (struct net_data)); + nd = NULL; + dev->net_data = NULL; + } + } + else + { + dev->flags |= LINUX_IFF_UP | LINUX_IFF_RUNNING; + skb_queue_head_init (&dev->buffs[0]); + } + if (IP_VALID (reply_port)) + ds_device_open_reply (reply_port, reply_port_type, + err, dev_to_port (nd)); + return MIG_NO_REPLY; + } + + *devp = &nd->device; + return D_SUCCESS; +} + +static io_return_t +device_write (void *d, ipc_port_t reply_port, + mach_msg_type_name_t reply_port_type, dev_mode_t mode, + recnum_t bn, io_buf_ptr_t data, unsigned int count, + int *bytes_written) +{ + unsigned char *p; + int i, amt, skblen, s; + io_return_t err = 0; + vm_map_copy_t copy = (vm_map_copy_t) data; + struct net_data *nd = d; + struct linux_device *dev = nd->dev; + struct sk_buff *skb; + + if (count == 0 || count > dev->mtu + dev->hard_header_len) + return D_INVALID_SIZE; + + /* Allocate a sk_buff. */ + amt = PAGE_SIZE - (copy->offset & PAGE_MASK); + skblen = (amt >= count) ? 0 : count; + skb = dev_alloc_skb (skblen); + if (!skb) + return D_NO_MEMORY; + + /* Copy user data. This is only required if it spans multiple pages. */ + if (skblen == 0) + { + assert (copy->cpy_npages == 1); + + skb->copy = copy; + skb->data = ((void *) copy->cpy_page_list[0]->phys_addr + + (copy->offset & PAGE_MASK)); + skb->len = count; + skb->head = skb->data; + skb->tail = skb->data + skb->len; + skb->end = skb->tail; + } + else + { + skb->len = skblen; + skb->tail = skb->data + skblen; + skb->end = skb->tail; + + memcpy (skb->data, + ((void *) copy->cpy_page_list[0]->phys_addr + + (copy->offset & PAGE_MASK)), + amt); + count -= amt; + p = skb->data + amt; + for (i = 1; count > 0 && i < copy->cpy_npages; i++) + { + amt = PAGE_SIZE; + if (amt > count) + amt = count; + memcpy (p, (void *) copy->cpy_page_list[i]->phys_addr, amt); + count -= amt; + p += amt; + } + + assert (count == 0); + + vm_map_copy_discard (copy); + } + + skb->dev = dev; + skb->reply = reply_port; + skb->reply_type = reply_port_type; + + /* Queue packet for transmission and schedule a software interrupt. */ + s = splimp (); + if (dev->buffs[0].next != (struct sk_buff *) &dev->buffs[0] + || (*dev->hard_start_xmit) (skb, dev)) + { + __skb_queue_tail (&dev->buffs[0], skb); + mark_bh (NET_BH); + } + splx (s); + + return MIG_NO_REPLY; +} + +static io_return_t +device_get_status (void *d, dev_flavor_t flavor, dev_status_t status, + mach_msg_type_number_t *count) +{ + return net_getstat (&((struct net_data *) d)->ifnet, flavor, status, count); +} + +static io_return_t +device_set_filter (void *d, ipc_port_t port, int priority, + filter_t * filter, unsigned filter_count) +{ + return net_set_filter (&((struct net_data *) d)->ifnet, + port, priority, filter, filter_count); +} + +struct device_emulation_ops linux_net_emulation_ops = +{ + NULL, + NULL, + dev_to_port, + device_open, + NULL, + device_write, + NULL, + NULL, + NULL, + NULL, + device_get_status, + device_set_filter, + NULL, + NULL, + NULL, + NULL +}; + +/* Do any initialization required for network devices. */ +void +linux_net_emulation_init () +{ + skb_queue_head_init (&skb_done_list); +} |