/* * Linux block driver support. * * Copyright (C) 1996 The University of Utah and the Computer Systems * Laboratory at the University of Utah (CSL) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2, or (at your option) * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. * * Author: Shantanu Goel, University of Utah CSL */ /* * linux/drivers/block/ll_rw_blk.c * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright (C) 1994, Karl Keyte: Added support for disk statistics */ /* * linux/fs/block_dev.c * * Copyright (C) 1991, 1992 Linus Torvalds */ /* * linux/fs/buffer.c * * Copyright (C) 1991, 1992 Linus Torvalds */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "device_reply.h" #include #define MACH_INCLUDE #include #include #include #include #include #include #include #include #include #include #include extern int linux_auto_config; extern int linux_intr_pri; extern int linux_to_mach_error (int); /* This task queue is not used in Mach: just for fixing undefined symbols. */ DECLARE_TASK_QUEUE (tq_disk); /* Location of VTOC in units for sectors (512 bytes). */ #define PDLOCATION 29 /* Linux kernel variables. */ /* Temporary data allocated on the stack. */ struct temp_data { struct inode inode; struct file file; struct request req; queue_head_t pages; }; /* One of these exists for each driver associated with a major number. */ struct device_struct { const char *name; /* device name */ struct file_operations *fops; /* operations vector */ int busy:1; /* driver is being opened/closed */ int want:1; /* someone wants to open/close driver */ struct gendisk *gd; /* DOS partition information */ int default_slice; /* what slice to use when none is given */ struct disklabel **labels; /* disklabels for each DOS partition */ }; /* An entry in the Mach name to Linux major number conversion table. */ struct name_map { const char *name; /* Mach name for device */ unsigned major; /* Linux major number */ unsigned unit; /* Linux unit number */ int read_only; /* 1 if device is read only */ }; /* Driver operation table. */ static struct device_struct blkdevs[MAX_BLKDEV]; /* Driver request function table. */ struct blk_dev_struct blk_dev[MAX_BLKDEV] = { { NULL, NULL }, /* 0 no_dev */ { NULL, NULL }, /* 1 dev mem */ { NULL, NULL }, /* 2 dev fd */ { NULL, NULL }, /* 3 dev ide0 or hd */ { NULL, NULL }, /* 4 dev ttyx */ { NULL, NULL }, /* 5 dev tty */ { NULL, NULL }, /* 6 dev lp */ { NULL, NULL }, /* 7 dev pipes */ { NULL, NULL }, /* 8 dev sd */ { NULL, NULL }, /* 9 dev st */ { NULL, NULL }, /* 10 */ { NULL, NULL }, /* 11 */ { NULL, NULL }, /* 12 */ { NULL, NULL }, /* 13 */ { NULL, NULL }, /* 14 */ { NULL, NULL }, /* 15 */ { NULL, NULL }, /* 16 */ { NULL, NULL }, /* 17 */ { NULL, NULL }, /* 18 */ { NULL, NULL }, /* 19 */ { NULL, NULL }, /* 20 */ { NULL, NULL }, /* 21 */ { NULL, NULL } /* 22 dev ide1 */ }; /* * blk_size contains the size of all block-devices in units of 1024 byte * sectors: * * blk_size[MAJOR][MINOR] * * if (!blk_size[MAJOR]) then no minor size checking is done. */ int *blk_size[MAX_BLKDEV] = { NULL, NULL, }; /* * blksize_size contains the size of all block-devices: * * blksize_size[MAJOR][MINOR] * * if (!blksize_size[MAJOR]) then 1024 bytes is assumed. */ int *blksize_size[MAX_BLKDEV] = { NULL, NULL, }; /* * hardsect_size contains the size of the hardware sector of a device. * * hardsect_size[MAJOR][MINOR] * * if (!hardsect_size[MAJOR]) * then 512 bytes is assumed. * else * sector_size is hardsect_size[MAJOR][MINOR] * This is currently set by some scsi device and read by the msdos fs driver * This might be a some uses later. */ int *hardsect_size[MAX_BLKDEV] = { NULL, NULL, }; /* This specifies how many sectors to read ahead on the disk. This is unused in Mach. It is here to make drivers compile. */ int read_ahead[MAX_BLKDEV] = {0, }; /* Use to wait on when there are no free requests. This is unused in Mach. It is here to make drivers compile. */ struct wait_queue *wait_for_request = NULL; /* Map for allocating device memory. */ extern vm_map_t device_io_map; /* Initialize block drivers. */ int blk_dev_init () { #ifdef CONFIG_BLK_DEV_IDE ide_init (); #endif #ifdef CONFIG_BLK_DEV_FD floppy_init (); #else outb_p (0xc, 0x3f2); #endif return 0; } /* Return 1 if major number MAJOR corresponds to a disk device. */ static inline int disk_major (int major) { return (major == IDE0_MAJOR || major == IDE1_MAJOR || major == IDE2_MAJOR || major == IDE3_MAJOR || major == SCSI_DISK_MAJOR); } /* Linux kernel block support routines. */ /* Register a driver for major number MAJOR, with name NAME, and operations vector FOPS. */ int register_blkdev (unsigned major, const char *name, struct file_operations *fops) { int err = 0; if (major == 0) { for (major = MAX_BLKDEV - 1; major > 0; major--) if (blkdevs[major].fops == NULL) goto out; return -LINUX_EBUSY; } if (major >= MAX_BLKDEV) return -LINUX_EINVAL; if (blkdevs[major].fops && blkdevs[major].fops != fops) return -LINUX_EBUSY; out: blkdevs[major].name = name; blkdevs[major].fops = fops; blkdevs[major].busy = 0; blkdevs[major].want = 0; blkdevs[major].gd = NULL; blkdevs[major].default_slice = 0; blkdevs[major].labels = NULL; return 0; } /* Unregister the driver associated with major number MAJOR and having the name NAME. */ int unregister_blkdev (unsigned major, const char *name) { int err; if (major >= MAX_BLKDEV) return -LINUX_EINVAL; if (! blkdevs[major].fops || strcmp (blkdevs[major].name, name)) return -LINUX_EINVAL; blkdevs[major].fops = NULL; if (blkdevs[major].labels) { assert (blkdevs[major].gd); kfree ((vm_offset_t) blkdevs[major].labels, (sizeof (struct disklabel *) * blkdevs[major].gd->max_p * blkdevs[major].gd->max_nr)); } return 0; } void set_blocksize (kdev_t dev, int size) { extern int *blksize_size[]; if (! blksize_size[MAJOR (dev)]) return; switch (size) { case 512: case 1024: case 2048: case 4096: break; default: panic ("Invalid blocksize passed to set_blocksize"); break; } blksize_size[MAJOR (dev)][MINOR (dev)] = size; } /* Allocate a buffer SIZE bytes long. */ static void * alloc_buffer (int size) { vm_page_t m; struct temp_data *d; assert (size <= PAGE_SIZE); if (! linux_auto_config) { while ((m = vm_page_grab (FALSE)) == 0) VM_PAGE_WAIT (0); d = current_thread ()->pcb->data; assert (d); queue_enter (&d->pages, m, vm_page_t, pageq); return (void *) m->phys_addr; } return (void *) __get_free_pages (GFP_KERNEL, 0, ~0UL); } /* Free buffer P which is SIZE bytes long. */ static void free_buffer (void *p, int size) { int i; struct temp_data *d; vm_page_t m; assert (size <= PAGE_SIZE); if (! linux_auto_config) { d = current_thread ()->pcb->data; assert (d); queue_iterate (&d->pages, m, vm_page_t, pageq) { if (m->phys_addr == (vm_offset_t) p) { queue_remove (&d->pages, m, vm_page_t, pageq); vm_page_lock_queues (); vm_page_free (m); vm_page_lock_queues (); return; } } panic ("free_buffer"); } free_pages ((unsigned long) p, 0); } /* Allocate a buffer of SIZE bytes and associate it with block number BLOCK of device DEV. */ struct buffer_head * getblk (kdev_t dev, int block, int size) { struct buffer_head *bh; assert (size <= PAGE_SIZE); bh = (struct buffer_head *) kalloc (sizeof (struct buffer_head)); if (bh) { memset (bh, 0, sizeof (struct buffer_head)); bh->b_data = alloc_buffer (size); if (! bh->b_data) { kfree ((vm_offset_t) bh, sizeof (struct buffer_head)); return NULL; } bh->b_dev = dev; bh->b_size = size; bh->b_state = 1 << BH_Lock; bh->b_blocknr = block; } return bh; } /* Release buffer BH previously allocated by getblk. */ void __brelse (struct buffer_head *bh) { free_buffer (bh->b_data, bh->b_size); kfree ((vm_offset_t) bh, sizeof (*bh)); } /* Allocate a buffer of SIZE bytes and fill it with data from device DEV starting at block number BLOCK. */ struct buffer_head * bread (kdev_t dev, int block, int size) { int err; struct buffer_head *bh; bh = getblk (dev, block, size); if (bh) { ll_rw_block (READ, 1, &bh); wait_on_buffer (bh); if (! buffer_uptodate (bh)) { __brelse (bh); return NULL; } } return bh; } /* Return the block size for device DEV in *BSIZE and log2(block size) in *BSHIFT. */ static void get_block_size (kdev_t dev, int *bsize, int *bshift) { int i; *bsize = BLOCK_SIZE; if (blksize_size[MAJOR (dev)] && blksize_size[MAJOR (dev)][MINOR (dev)]) *bsize = blksize_size[MAJOR (dev)][MINOR (dev)]; for (i = *bsize, *bshift = 0; i != 1; i >>= 1, (*bshift)++) ; } /* Enqueue request REQ on a driver's queue. */ static inline void enqueue_request (struct request *req) { struct request *tmp; struct blk_dev_struct *dev; dev = blk_dev + MAJOR (req->rq_dev); cli (); tmp = dev->current_request; if (! tmp) { dev->current_request = req; (*dev->request_fn) (); sti (); return; } while (tmp->next) { if ((IN_ORDER (tmp, req) || ! IN_ORDER (tmp, tmp->next)) && IN_ORDER (req, tmp->next)) break; tmp = tmp->next; } req->next = tmp->next; tmp->next = req; if (scsi_blk_major (MAJOR (req->rq_dev))) (*dev->request_fn) (); sti (); } /* Perform the I/O operation RW on the buffer list BH containing NR buffers. */ void ll_rw_block (int rw, int nr, struct buffer_head **bh) { int i, bshift, bsize; unsigned major; struct request *r; static struct request req; major = MAJOR (bh[0]->b_dev); assert (major < MAX_BLKDEV); get_block_size (bh[0]->b_dev, &bsize, &bshift); if (! linux_auto_config) { assert (current_thread ()->pcb->data); r = &((struct temp_data *) current_thread ()->pcb->data)->req; } else r = &req; for (i = 0, r->nr_sectors = 0; i < nr - 1; i++) { r->nr_sectors += bh[i]->b_size >> 9; bh[i]->b_reqnext = bh[i + 1]; } r->nr_sectors += bh[i]->b_size >> 9; bh[i]->b_reqnext = NULL; r->rq_status = RQ_ACTIVE; r->rq_dev = bh[0]->b_dev; r->cmd = rw; r->errors = 0; r->sector = bh[0]->b_blocknr << (bshift - 9); r->current_nr_sectors = bh[0]->b_size >> 9; r->buffer = bh[0]->b_data; r->bh = bh[0]; r->bhtail = bh[nr - 1]; r->sem = NULL; r->next = NULL; enqueue_request (r); } #define BSIZE (1 << bshift) #define BMASK (BSIZE - 1) /* Perform read/write operation RW on device DEV starting at *off to/from buffer *BUF of size *RESID. The device block size is given by BSHIFT. *OFF and *RESID may be non-multiples of the block size. *OFF, *BUF and *RESID are updated if the operation completed successfully. */ static int rdwr_partial (int rw, kdev_t dev, loff_t *off, char **buf, int *resid, int bshift) { int c, err = 0, o; long sect, nsect; struct buffer_head bhead, *bh = &bhead; struct gendisk *gd; memset (bh, 0, sizeof (struct buffer_head)); bh->b_state = 1 << BH_Lock; bh->b_dev = dev; bh->b_blocknr = *off >> bshift; bh->b_size = BSIZE; /* Check if this device has non even number of blocks. */ for (gd = gendisk_head, nsect = -1; gd; gd = gd->next) if (gd->major == MAJOR (dev)) { nsect = gd->part[MINOR (dev)].nr_sects; break; } if (nsect > 0) { sect = bh->b_blocknr << (bshift - 9); assert ((nsect - sect) > 0); if (nsect - sect < (BSIZE >> 9)) bh->b_size = (nsect - sect) << 9; } bh->b_data = alloc_buffer (bh->b_size); if (! bh->b_data) return -LINUX_ENOMEM; ll_rw_block (READ, 1, &bh); wait_on_buffer (bh); if (buffer_uptodate (bh)) { o = *off & BMASK; c = bh->b_size - o; assert (*resid <= c); if (c > *resid) c = *resid; if (rw == READ) memcpy (*buf, bh->b_data + o, c); else { memcpy (bh->b_data + o, *buf, c); bh->b_state = (1 << BH_Dirty) | (1 << BH_Lock); ll_rw_block (WRITE, 1, &bh); wait_on_buffer (bh); if (! buffer_uptodate (bh)) { err = -LINUX_EIO; goto out; } } *buf += c; *resid -= c; *off += c; } else err = -LINUX_EIO; out: free_buffer (bh->b_data, bh->b_size); return err; } #define BH_Bounce 16 #define MAX_BUF VM_MAP_COPY_PAGE_LIST_MAX /* Perform read/write operation RW on device DEV starting at *off to/from buffer *BUF of size *RESID. The device block size is given by BSHIFT. *OFF and *RESID must be multiples of the block size. *OFF, *BUF and *RESID are updated if the operation completed successfully. */ static int rdwr_full (int rw, kdev_t dev, loff_t *off, char **buf, int *resid, int bshift) { int cc, err = 0, i, j, nb, nbuf; long blk; struct buffer_head bhead[MAX_BUF], *bh, *bhp[MAX_BUF]; assert ((*off & BMASK) == 0); nbuf = *resid >> bshift; blk = *off >> bshift; for (i = nb = 0, bh = bhead; nb < nbuf; bh++) { memset (bh, 0, sizeof (*bh)); bh->b_dev = dev; bh->b_blocknr = blk; set_bit (BH_Lock, &bh->b_state); if (rw == WRITE) set_bit (BH_Dirty, &bh->b_state); cc = PAGE_SIZE - (((int) *buf) & PAGE_MASK); if (cc >= BSIZE && ((int) *buf & 511) == 0) cc &= ~BMASK; else { cc = PAGE_SIZE; set_bit (BH_Bounce, &bh->b_state); } if (cc > ((nbuf - nb) << bshift)) cc = (nbuf - nb) << bshift; if (! test_bit (BH_Bounce, &bh->b_state)) bh->b_data = (char *) pmap_extract (vm_map_pmap (device_io_map), (((vm_offset_t) *buf) + (nb << bshift))); else { bh->b_data = alloc_buffer (cc); if (! bh->b_data) { err = -LINUX_ENOMEM; break; } if (rw == WRITE) memcpy (bh->b_data, *buf + (nb << bshift), cc); } bh->b_size = cc; bhp[i] = bh; nb += cc >> bshift; blk += nb; if (++i == MAX_BUF) break; } if (! err) { ll_rw_block (rw, i, bhp); wait_on_buffer (bhp[i - 1]); } for (bh = bhead, cc = 0, j = 0; j < i; cc += bh->b_size, bh++, j++) { if (! err && buffer_uptodate (bh) && rw == READ && test_bit (BH_Bounce, &bh->b_state)) memcpy (*buf + cc, bh->b_data, bh->b_size); else if (! err && ! buffer_uptodate (bh)) err = -LINUX_EIO; if (test_bit (BH_Bounce, &bh->b_state)) free_buffer (bh->b_data, bh->b_size); } if (! err) { *buf += cc; *resid -= cc; *off += cc; } return err; } /* Perform read/write operation RW on device DEV starting at *off to/from buffer BUF of size COUNT. *OFF is updated if the operation completed successfully. */ static int do_rdwr (int rw, kdev_t dev, loff_t *off, char *buf, int count) { int bsize, bshift, err = 0, resid = count; get_block_size (dev, &bsize, &bshift); if (*off & BMASK) err = rdwr_partial (rw, dev, off, &buf, &resid, bshift); while (resid >= bsize && ! err) err = rdwr_full (rw, dev, off, &buf, &resid, bshift); if (! err && resid) err = rdwr_partial (rw, dev, off, &buf, &resid, bshift); return err ? err : count - resid; } int block_write (struct inode *inode, struct file *filp, const char *buf, int count) { return do_rdwr (WRITE, inode->i_rdev, &filp->f_pos, (char *) buf, count); } int block_read (struct inode *inode, struct file *filp, char *buf, int count) { return do_rdwr (READ, inode->i_rdev, &filp->f_pos, buf, count); } /* * This routine checks whether a removable media has been changed, * and invalidates all buffer-cache-entries in that case. This * is a relatively slow routine, so we have to try to minimize using * it. Thus it is called only upon a 'mount' or 'open'. This * is the best way of combining speed and utility, I think. * People changing diskettes in the middle of an operation deserve * to loose :-) */ int check_disk_change (kdev_t dev) { unsigned i; struct file_operations * fops; i = MAJOR(dev); if (i >= MAX_BLKDEV || (fops = blkdevs[i].fops) == NULL) return 0; if (fops->check_media_change == NULL) return 0; if (! (*fops->check_media_change) (dev)) return 0; /* printf ("Disk change detected on device %s\n", kdevname(dev));*/ if (fops->revalidate) (*fops->revalidate) (dev); return 1; } /* Mach device interface routines. */ /* Mach name to Linux major/minor number mapping table. */ static struct name_map name_to_major[] = { /* IDE disks */ { "hd0", IDE0_MAJOR, 0, 0 }, { "hd1", IDE0_MAJOR, 1, 0 }, { "hd2", IDE1_MAJOR, 0, 0 }, { "hd3", IDE1_MAJOR, 1, 0 }, { "hd4", IDE2_MAJOR, 0, 0 }, { "hd5", IDE2_MAJOR, 1, 0 }, { "hd6", IDE3_MAJOR, 0, 0 }, { "hd7", IDE3_MAJOR, 1, 0 }, /* IDE CDROMs */ { "wcd0", IDE0_MAJOR, 0, 1 }, { "wcd1", IDE0_MAJOR, 1, 1 }, { "wcd2", IDE1_MAJOR, 0, 1 }, { "wcd3", IDE1_MAJOR, 1, 1 }, { "wcd4", IDE2_MAJOR, 0, 1 }, { "wcd5", IDE2_MAJOR, 1, 1 }, { "wcd6", IDE3_MAJOR, 0, 1 }, { "wcd7", IDE3_MAJOR, 1, 1 }, /* SCSI disks */ { "sd0", SCSI_DISK_MAJOR, 0, 0 }, { "sd1", SCSI_DISK_MAJOR, 1, 0 }, { "sd2", SCSI_DISK_MAJOR, 2, 0 }, { "sd3", SCSI_DISK_MAJOR, 3, 0 }, { "sd4", SCSI_DISK_MAJOR, 4, 0 }, { "sd5", SCSI_DISK_MAJOR, 5, 0 }, { "sd6", SCSI_DISK_MAJOR, 6, 0 }, { "sd7", SCSI_DISK_MAJOR, 7, 0 }, /* SCSI CDROMs */ { "cd0", SCSI_CDROM_MAJOR, 0, 1 }, { "cd1", SCSI_CDROM_MAJOR, 1, 1 }, /* Floppy disks */ { "fd0", FLOPPY_MAJOR, 0, 0 }, { "fd1", FLOPPY_MAJOR, 1, 0 }, }; #define NUM_NAMES (sizeof (name_to_major) / sizeof (name_to_major[0])) /* One of these is associated with each open instance of a device. */ struct block_data { const char *name; /* Mach name for device */ int want:1; /* someone is waiting for I/O to complete */ int open_count; /* number of opens */ int iocount; /* number of pending I/O operations */ int part; /* BSD partition number (-1 if none) */ int flags; /* Linux file flags */ int mode; /* Linux file mode */ kdev_t dev; /* Linux device number */ ipc_port_t port; /* port representing device */ struct device_struct *ds; /* driver operation table entry */ struct device device; /* generic device header */ struct name_map *np; /* name to inode map */ struct block_data *next; /* forward link */ }; /* List of open devices. */ static struct block_data *open_list; /* Forward declarations. */ extern struct device_emulation_ops linux_block_emulation_ops; static io_return_t device_close (void *); /* Return a send right for block device BD. */ static ipc_port_t dev_to_port (void *bd) { return (bd ? ipc_port_make_send (((struct block_data *) bd)->port) : IP_NULL); } /* Return 1 if C is a letter of the alphabet. */ static inline int isalpha (int c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); } /* Return 1 if C is a digit. */ static inline int isdigit (int c) { return c >= '0' && c <= '9'; } /* Find the name map entry for device NAME. Set *SLICE to be the DOS partition and *PART the BSD/Mach partition, if any. */ static struct name_map * find_name (char *name, int *slice, int *part) { char *p, *q; int i, len; struct name_map *np; /* Parse name into name, unit, DOS partition (slice) and partition. */ for (*slice = 0, *part = -1, p = name; isalpha (*p); p++) ; if (p == name || ! isdigit (*p)) return NULL; do p++; while (isdigit (*p)); if (*p) { q = p; if (*q == 's' && isdigit (*(q + 1))) { q++; do *slice = *slice * 10 + *q++ - '0'; while (isdigit (*q)); if (! *q) goto find_major; } if (! isalpha (*q) || *(q + 1)) return NULL; *part = *q - 'a'; } find_major: /* Convert name to major number. */ for (i = 0, np = name_to_major; i < NUM_NAMES; i++, np++) { len = strlen (np->name); if (len == (p - name) && ! strncmp (np->name, name, len)) return np; } return NULL; } /* Attempt to read a BSD disklabel from device DEV. */ static struct disklabel * read_bsd_label (kdev_t dev) { int bsize, bshift; struct buffer_head *bh; struct disklabel *dlp, *lp = NULL; get_block_size (dev, &bsize, &bshift); bh = bread (dev, LBLLOC >> (bshift - 9), bsize); if (bh) { dlp = (struct disklabel *) (bh->b_data + ((LBLLOC << 9) & (bsize - 1))); if (dlp->d_magic == DISKMAGIC && dlp->d_magic2 == DISKMAGIC) { lp = (struct disklabel *) kalloc (sizeof (*lp)); assert (lp); memcpy (lp, dlp, sizeof (*lp)); } __brelse (bh); } return lp; } /* Attempt to read a VTOC from device DEV. */ static struct disklabel * read_vtoc (kdev_t dev) { int bshift, bsize, i; struct buffer_head *bh; struct evtoc *evp; struct disklabel *lp = NULL; get_block_size (dev, &bsize, &bshift); bh = bread (dev, PDLOCATION >> (bshift - 9), bsize); if (bh) { evp = (struct evtoc *) (bh->b_data + ((PDLOCATION << 9) & (bsize - 1))); if (evp->sanity == VTOC_SANE) { lp = (struct disklabel *) kalloc (sizeof (*lp)); assert (lp); lp->d_npartitions = evp->nparts; if (lp->d_npartitions > MAXPARTITIONS) lp->d_npartitions = MAXPARTITIONS; for (i = 0; i < lp->d_npartitions; i++) { lp->d_partitions[i].p_size = evp->part[i].p_size; lp->d_partitions[i].p_offset = evp->part[i].p_start; lp->d_partitions[i].p_fstype = FS_BSDFFS; } } __brelse (bh); } return lp; } /* Initialize BSD/Mach partition table for device specified by NP, DS and *DEV. Check SLICE and *PART for validity. */ static kern_return_t init_partition (struct name_map *np, kdev_t *dev, struct device_struct *ds, int slice, int *part) { int err, i, j; struct disklabel *lp; struct gendisk *gd = ds->gd; struct partition *p; struct temp_data *d = current_thread ()->pcb->data; if (! gd) { *part = -1; return 0; } if (ds->labels) goto check; ds->labels = (struct disklabel **) kalloc (sizeof (struct disklabel *) * gd->max_nr * gd->max_p); if (! ds->labels) return D_NO_MEMORY; memset ((void *) ds->labels, 0, sizeof (struct disklabel *) * gd->max_nr * gd->max_p); for (i = 1; i < gd->max_p; i++) { d->inode.i_rdev = *dev | i; if (gd->part[MINOR (d->inode.i_rdev)].nr_sects <= 0 || gd->part[MINOR (d->inode.i_rdev)].start_sect < 0) continue; linux_intr_pri = SPL5; d->file.f_flags = 0; d->file.f_mode = O_RDONLY; if (ds->fops->open && (*ds->fops->open) (&d->inode, &d->file)) continue; lp = read_bsd_label (d->inode.i_rdev); if (! lp && gd->part[MINOR (d->inode.i_rdev)].nr_sects > PDLOCATION) lp = read_vtoc (d->inode.i_rdev); if (ds->fops->release) (*ds->fops->release) (&d->inode, &d->file); if (lp) { if (ds->default_slice == 0) ds->default_slice = i; for (j = 0, p = lp->d_partitions; j < lp->d_npartitions; j++, p++) { if (p->p_offset < 0 || p->p_size <= 0) continue; /* Sanity check. */ if (p->p_size > gd->part[MINOR (d->inode.i_rdev)].nr_sects) p->p_size = gd->part[MINOR (d->inode.i_rdev)].nr_sects; } } ds->labels[MINOR (d->inode.i_rdev)] = lp; } check: if (*part >= 0 && slice == 0) slice = ds->default_slice; if (*part >= 0 && slice == 0) return D_NO_SUCH_DEVICE; *dev = MKDEV (MAJOR (*dev), MINOR (*dev) | slice); if (slice >= gd->max_p || gd->part[MINOR (*dev)].start_sect < 0 || gd->part[MINOR (*dev)].nr_sects <= 0) return D_NO_SUCH_DEVICE; if (*part >= 0) { lp = ds->labels[MINOR (*dev)]; if (! lp || *part >= lp->d_npartitions || lp->d_partitions[*part].p_offset < 0 || lp->d_partitions[*part].p_size <= 0) return D_NO_SUCH_DEVICE; } return 0; } #define DECL_DATA struct temp_data td #define INIT_DATA() \ { \ queue_init (&td.pages); \ td.inode.i_rdev = bd->dev; \ td.file.f_mode = bd->mode; \ td.file.f_flags = bd->flags; \ current_thread ()->pcb->data = &td; \ } static io_return_t device_open (ipc_port_t reply_port, mach_msg_type_name_t reply_port_type, dev_mode_t mode, char *name, device_t *devp) { int part, slice, err; unsigned major, minor; kdev_t dev; ipc_port_t notify; struct block_data *bd = NULL, *bdp; struct device_struct *ds; struct gendisk *gd; struct name_map *np; DECL_DATA; np = find_name (name, &slice, &part); if (! np) return D_NO_SUCH_DEVICE; major = np->major; ds = &blkdevs[major]; /* Check that driver exists. */ if (! ds->fops) return D_NO_SUCH_DEVICE; /* Wait for any other open/close calls to finish. */ ds = &blkdevs[major]; while (ds->busy) { ds->want = 1; assert_wait ((event_t) ds, FALSE); schedule (); } ds->busy = 1; /* Compute minor number. */ if (! ds->gd) { for (gd = gendisk_head; gd && gd->major != major; gd = gd->next) ; ds->gd = gd; } minor = np->unit; gd = ds->gd; if (gd) minor <<= gd->minor_shift; dev = MKDEV (major, minor); queue_init (&td.pages); current_thread ()->pcb->data = &td; /* Check partition. */ err = init_partition (np, &dev, ds, slice, &part); if (err) goto out; /* Initialize file structure. */ switch (mode & (D_READ|D_WRITE)) { case D_WRITE: td.file.f_mode = O_WRONLY; break; case D_READ|D_WRITE: td.file.f_mode = O_RDWR; break; default: td.file.f_mode = O_RDONLY; break; } td.file.f_flags = (mode & D_NODELAY) ? O_NDELAY : 0; /* Check if the device is currently open. */ for (bdp = open_list; bdp; bdp = bdp->next) if (bdp->dev == dev && bdp->part == part && bdp->mode == td.file.f_mode && bdp->flags == td.file.f_flags) { bd = bdp; goto out; } /* Open the device. */ if (ds->fops->open) { td.inode.i_rdev = dev; linux_intr_pri = SPL5; err = (*ds->fops->open) (&td.inode, &td.file); if (err) { err = linux_to_mach_error (err); goto out; } } /* Allocate and initialize device data. */ bd = (struct block_data *) kalloc (sizeof (struct block_data)); if (! bd) { err = D_NO_MEMORY; goto bad; } bd->want = 0; bd->open_count = 0; bd->iocount = 0; bd->part = part; bd->ds = ds; bd->device.emul_data = bd; bd->device.emul_ops = &linux_block_emulation_ops; bd->dev = dev; bd->mode = td.file.f_mode; bd->flags = td.file.f_flags; bd->port = ipc_port_alloc_kernel (); if (bd->port == IP_NULL) { err = KERN_RESOURCE_SHORTAGE; goto bad; } ipc_kobject_set (bd->port, (ipc_kobject_t) &bd->device, IKOT_DEVICE); notify = ipc_port_make_sonce (bd->port); ip_lock (bd->port); ipc_port_nsrequest (bd->port, 1, notify, ¬ify); assert (notify == IP_NULL); goto out; bad: if (ds->fops->release) (*ds->fops->release) (&td.inode, &td.file); out: ds->busy = 0; if (ds->want) { ds->want = 0; thread_wakeup ((event_t) ds); } if (bd && bd->open_count > 0) { if (err) *devp = NULL; else { *devp = &bd->device; bd->open_count++; } return err; } if (err) { if (bd) { if (bd->port != IP_NULL) { ipc_kobject_set (bd->port, IKO_NULL, IKOT_NONE); ipc_port_dealloc_kernel (bd->port); } kfree ((vm_offset_t) bd, sizeof (struct block_data)); bd = NULL; } } else { bd->open_count = 1; bd->next = open_list; open_list = bd; } if (IP_VALID (reply_port)) ds_device_open_reply (reply_port, reply_port_type, err, dev_to_port (bd)); else if (! err) device_close (bd); return MIG_NO_REPLY; } static io_return_t device_close (void *d) { struct block_data *bd = d, *bdp, **prev; struct device_struct *ds = bd->ds; DECL_DATA; INIT_DATA (); /* Wait for any other open/close to complete. */ while (ds->busy) { ds->want = 1; assert_wait ((event_t) ds, FALSE); schedule (); } ds->busy = 1; if (--bd->open_count == 0) { /* Wait for pending I/O to complete. */ while (bd->iocount > 0) { bd->want = 1; assert_wait ((event_t) bd, FALSE); schedule (); } /* Remove device from open list. */ prev = &open_list; bdp = open_list; while (bdp) { if (bdp == bd) { *prev = bdp->next; break; } prev = &bdp->next; bdp = bdp->next; } assert (bdp == bd); if (ds->fops->release) (*ds->fops->release) (&td.inode, &td.file); ipc_kobject_set (bd->port, IKO_NULL, IKOT_NONE); ipc_port_dealloc_kernel (bd->port); kfree ((vm_offset_t) bd, sizeof (struct block_data)); } ds->busy = 0; if (ds->want) { ds->want = 0; thread_wakeup ((event_t) ds); } return D_SUCCESS; } #define MAX_COPY (VM_MAP_COPY_PAGE_LIST_MAX << PAGE_SHIFT) /* Check block BN and size COUNT for I/O validity to from device BD. Set *OFF to the byte offset where I/O is to begin and return the size of transfer. */ static int check_limit (struct block_data *bd, loff_t *off, long bn, int count) { int major, minor; long maxsz, sz; struct disklabel *lp = NULL; if (count <= 0) return count; major = MAJOR (bd->dev); minor = MINOR (bd->dev); if (bd->ds->gd) { if (bd->part >= 0) { assert (bd->ds->labels); assert (bd->ds->labels[minor]); lp = bd->ds->labels[minor]; maxsz = lp->d_partitions[bd->part].p_size; } else maxsz = bd->ds->gd->part[minor].nr_sects; } else { assert (blk_size[major]); maxsz = blk_size[major][minor] << (BLOCK_SIZE_BITS - 9); } assert (maxsz > 0); sz = maxsz - bn; if (sz <= 0) return sz; if (sz < ((count + 511) >> 9)) count = sz << 9; if (lp) bn += (lp->d_partitions[bd->part].p_offset - bd->ds->gd->part[minor].start_sect); *off = (loff_t) bn << 9; bd->iocount++; return count; } static io_return_t device_write (void *d, ipc_port_t reply_port, mach_msg_type_name_t reply_port_type, dev_mode_t mode, recnum_t bn, io_buf_ptr_t data, unsigned int orig_count, int *bytes_written) { int resid, amt, i; int count = (int) orig_count; io_return_t err = 0; vm_map_copy_t copy; vm_offset_t addr, uaddr; vm_size_t len, size; struct block_data *bd = d; DECL_DATA; INIT_DATA (); *bytes_written = 0; if (bd->mode == O_RDONLY) return D_INVALID_OPERATION; if (! bd->ds->fops->write) return D_READ_ONLY; count = check_limit (bd, &td.file.f_pos, bn, count); if (count < 0) return D_INVALID_SIZE; if (count == 0) { vm_map_copy_discard (copy); return 0; } resid = count; copy = (vm_map_copy_t) data; uaddr = copy->offset; /* Allocate a kernel buffer. */ size = round_page (uaddr + count) - trunc_page (uaddr); if (size > MAX_COPY) size = MAX_COPY; addr = vm_map_min (device_io_map); err = vm_map_enter (device_io_map, &addr, size, 0, TRUE, NULL, 0, FALSE, VM_PROT_READ|VM_PROT_WRITE, VM_PROT_READ|VM_PROT_WRITE, VM_INHERIT_NONE); if (err) { vm_map_copy_discard (copy); goto out; } /* Determine size of I/O this time around. */ len = size - (uaddr & PAGE_MASK); if (len > resid) len = resid; while (1) { /* Map user pages. */ for (i = 0; i < copy->cpy_npages; i++) pmap_enter (vm_map_pmap (device_io_map), addr + (i << PAGE_SHIFT), copy->cpy_page_list[i]->phys_addr, VM_PROT_READ|VM_PROT_WRITE, TRUE); /* Do the write. */ amt = (*bd->ds->fops->write) (&td.inode, &td.file, (char *) addr + (uaddr & PAGE_MASK), len); /* Unmap pages and deallocate copy. */ pmap_remove (vm_map_pmap (device_io_map), addr, addr + (copy->cpy_npages << PAGE_SHIFT)); vm_map_copy_discard (copy); /* Check result of write. */ if (amt > 0) { resid -= amt; if (resid == 0) break; uaddr += amt; } else { if (amt < 0) err = linux_to_mach_error (amt); break; } /* Determine size of I/O this time around and copy in pages. */ len = round_page (uaddr + resid) - trunc_page (uaddr); if (len > MAX_COPY) len = MAX_COPY; len -= uaddr & PAGE_MASK; if (len > resid) len = resid; err = vm_map_copyin_page_list (current_map (), uaddr, len, FALSE, FALSE, ©, FALSE); if (err) break; } /* Delete kernel buffer. */ vm_map_remove (device_io_map, addr, addr + size); out: if (--bd->iocount == 0 && bd->want) { bd->want = 0; thread_wakeup ((event_t) bd); } if (IP_VALID (reply_port)) ds_device_write_reply (reply_port, reply_port_type, err, count - resid); return MIG_NO_REPLY; } static io_return_t device_read (void *d, ipc_port_t reply_port, mach_msg_type_name_t reply_port_type, dev_mode_t mode, recnum_t bn, int count, io_buf_ptr_t *data, unsigned *bytes_read) { boolean_t dirty; int resid, amt; io_return_t err = 0; queue_head_t pages; vm_map_copy_t copy; vm_offset_t addr, offset, alloc_offset, o; vm_object_t object; vm_page_t m; vm_size_t len, size; struct block_data *bd = d; DECL_DATA; INIT_DATA (); *data = 0; *bytes_read = 0; if (! bd->ds->fops->read) return D_INVALID_OPERATION; count = check_limit (bd, &td.file.f_pos, bn, count); if (count < 0) return D_INVALID_SIZE; if (count == 0) return 0; /* Allocate an object to hold the data. */ size = round_page (count); object = vm_object_allocate (size); if (! object) { err = D_NO_MEMORY; goto out; } alloc_offset = offset = 0; resid = count; /* Allocate a kernel buffer. */ addr = vm_map_min (device_io_map); if (size > MAX_COPY) size = MAX_COPY; err = vm_map_enter (device_io_map, &addr, size, 0, TRUE, NULL, 0, FALSE, VM_PROT_READ|VM_PROT_WRITE, VM_PROT_READ|VM_PROT_WRITE, VM_INHERIT_NONE); if (err) goto out; queue_init (&pages); while (resid) { /* Determine size of I/O this time around. */ len = round_page (offset + resid) - trunc_page (offset); if (len > MAX_COPY) len = MAX_COPY; /* Map any pages left from previous operation. */ o = trunc_page (offset); queue_iterate (&pages, m, vm_page_t, pageq) { pmap_enter (vm_map_pmap (device_io_map), addr + o - trunc_page (offset), m->phys_addr, VM_PROT_READ|VM_PROT_WRITE, TRUE); o += PAGE_SIZE; } assert (o == alloc_offset); /* Allocate and map pages. */ while (alloc_offset < trunc_page (offset) + len) { while ((m = vm_page_grab (FALSE)) == 0) VM_PAGE_WAIT (0); assert (! m->active && ! m->inactive); m->busy = TRUE; queue_enter (&pages, m, vm_page_t, pageq); pmap_enter (vm_map_pmap (device_io_map), addr + alloc_offset - trunc_page (offset), m->phys_addr, VM_PROT_READ|VM_PROT_WRITE, TRUE); alloc_offset += PAGE_SIZE; } /* Do the read. */ amt = len - (offset & PAGE_MASK); if (amt > resid) amt = resid; amt = (*bd->ds->fops->read) (&td.inode, &td.file, (char *) addr + (offset & PAGE_MASK), amt); /* Compute number of pages to insert in object. */ o = trunc_page (offset); if (amt > 0) { dirty = TRUE; resid -= amt; if (resid == 0) { /* Zero any unused space. */ if (offset + amt < o + len) memset ((void *) (addr + offset - o + amt), 0, o + len - offset - amt); offset = o + len; } else offset += amt; } else { dirty = FALSE; offset = o + len; } /* Unmap pages and add them to the object. */ pmap_remove (vm_map_pmap (device_io_map), addr, addr + len); vm_object_lock (object); while (o < trunc_page (offset)) { m = (vm_page_t) queue_first (&pages); assert (! queue_end (&pages, (queue_entry_t) m)); queue_remove (&pages, m, vm_page_t, pageq); assert (m->busy); vm_page_lock_queues (); if (dirty) { PAGE_WAKEUP_DONE (m); m->dirty = TRUE; vm_page_insert (m, object, o); } else vm_page_free (m); vm_page_unlock_queues (); o += PAGE_SIZE; } vm_object_unlock (object); if (amt <= 0) { if (amt < 0) err = linux_to_mach_error (amt); break; } } /* Delete kernel buffer. */ vm_map_remove (device_io_map, addr, addr + size); assert (queue_empty (&pages)); out: if (! err) err = vm_map_copyin_object (object, 0, round_page (count), ©); if (! err) { *data = (io_buf_ptr_t) copy; *bytes_read = count - resid; } else vm_object_deallocate (object); if (--bd->iocount == 0 && bd->want) { bd->want = 0; thread_wakeup ((event_t) bd); } return err; } static io_return_t device_get_status (void *d, dev_flavor_t flavor, dev_status_t status, mach_msg_type_number_t *status_count) { struct block_data *bd = d; switch (flavor) { case DEV_GET_SIZE: if (disk_major (MAJOR (bd->dev))) { assert (bd->ds->gd); if (bd->part >= 0) { struct disklabel *lp; assert (bd->ds->labels); lp = bd->ds->labels[MINOR (bd->dev)]; assert (lp); (status[DEV_GET_SIZE_DEVICE_SIZE] = lp->d_partitions[bd->part].p_size << 9); } else (status[DEV_GET_SIZE_DEVICE_SIZE] = bd->ds->gd->part[MINOR (bd->dev)].nr_sects << 9); } else { assert (blk_size[MAJOR (bd->dev)]); (status[DEV_GET_SIZE_DEVICE_SIZE] = (blk_size[MAJOR (bd->dev)][MINOR (bd->dev)] << BLOCK_SIZE_BITS)); } /* It would be nice to return the block size as reported by the driver, but a lot of user level code assumes the sector size to be 512. */ status[DEV_GET_SIZE_RECORD_SIZE] = 512; /* Always return DEV_GET_SIZE_COUNT. This is what all native Mach drivers do, and makes it possible to detect the absence of the call by setting it to a different value on input. MiG makes sure that we will never return more integers than the user asked for. */ *status_count = DEV_GET_SIZE_COUNT; break; case DEV_GET_RECORDS: if (disk_major (MAJOR (bd->dev))) { assert (bd->ds->gd); if (bd->part >= 0) { struct disklabel *lp; assert (bd->ds->labels); lp = bd->ds->labels[MINOR (bd->dev)]; assert (lp); (status[DEV_GET_RECORDS_DEVICE_RECORDS] = lp->d_partitions[bd->part].p_size); } else (status[DEV_GET_RECORDS_DEVICE_RECORDS] = bd->ds->gd->part[MINOR (bd->dev)].nr_sects); } else { assert (blk_size[MAJOR (bd->dev)]); status[DEV_GET_RECORDS_DEVICE_RECORDS] = (blk_size[MAJOR (bd->dev)][MINOR (bd->dev)] << (BLOCK_SIZE_BITS - 9)); } /* It would be nice to return the block size as reported by the driver, but a lot of user level code assumes the sector size to be 512. */ status[DEV_GET_SIZE_RECORD_SIZE] = 512; /* Always return DEV_GET_RECORDS_COUNT. This is what all native Mach drivers do, and makes it possible to detect the absence of the call by setting it to a different value on input. MiG makes sure that we will never return more integers than the user asked for. */ *status_count = DEV_GET_RECORDS_COUNT; break; case V_GETPARMS: if (*status_count < (sizeof (struct disk_parms) / sizeof (int))) return D_INVALID_OPERATION; else { struct disk_parms *dp = status; struct hd_geometry hg; DECL_DATA; INIT_DATA(); if ((*bd->ds->fops->ioctl) (&td.inode, &td.file, HDIO_GETGEO, &hg)) return D_INVALID_OPERATION; dp->dp_type = DPT_WINI; /* XXX: It may be a floppy... */ dp->dp_heads = hg.heads; dp->dp_cyls = hg.cylinders; dp->dp_sectors = hg.sectors; dp->dp_dosheads = hg.heads; dp->dp_doscyls = hg.cylinders; dp->dp_dossectors = hg.sectors; dp->dp_secsiz = 512; /* XXX */ dp->dp_ptag = 0; dp->dp_pflag = 0; /* XXX */ dp->dp_pstartsec = -1; dp->dp_pnumsec = -1; *status_count = sizeof (struct disk_parms) / sizeof (int); } break; default: return D_INVALID_OPERATION; } return D_SUCCESS; } struct device_emulation_ops linux_block_emulation_ops = { NULL, NULL, dev_to_port, device_open, device_close, device_write, NULL, device_read, NULL, NULL, device_get_status, NULL, NULL, NULL, NULL, NULL };