summaryrefslogtreecommitdiff
path: root/pfinet.old
diff options
context:
space:
mode:
Diffstat (limited to 'pfinet.old')
-rw-r--r--pfinet.old/ethernet.c~298
-rw-r--r--pfinet.old/io-ops.c~616
-rw-r--r--pfinet.old/linux-src/net/core/dev.c~2092
-rw-r--r--pfinet.old/linux-src/net/ipv4/ip_output.c~1000
-rw-r--r--pfinet.old/linux-src/net/ipv4/tcp_input.c~2449
-rw-r--r--pfinet.old/linux-src/net/ipv4/tcp_output.c~1150
-rw-r--r--pfinet.old/pfinet.patch~31
-rw-r--r--pfinet.old/sched.c~81
-rw-r--r--pfinet.old/socket-ops.c~546
-rw-r--r--pfinet.old/tmp.patch~146
-rw-r--r--pfinet.old/tunnel.c~636
11 files changed, 0 insertions, 9045 deletions
diff --git a/pfinet.old/ethernet.c~ b/pfinet.old/ethernet.c~
deleted file mode 100644
index 24adcc87..00000000
--- a/pfinet.old/ethernet.c~
+++ /dev/null
@@ -1,298 +0,0 @@
-/*
- Copyright (C) 1995, 1996, 1998, 1999, 2000, 2002, 2007
- Free Software Foundation, Inc.
-
- Written by Michael I. Bushnell, p/BSG.
-
- This file is part of the GNU Hurd.
-
- The GNU Hurd is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2, or (at
- your option) any later version.
-
- The GNU Hurd is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. */
-
-#include "pfinet.h"
-
-#include <device/device.h>
-#include <device/net_status.h>
-#include <netinet/in.h>
-#include <string.h>
-#define _HACK_ERRNO_H
-#include <errno.h>
-#include <error.h>
-
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/if_arp.h>
-#include <device/bpf.h>
-
-
-struct port_class *etherreadclass;
-
-struct ether_device
-{
- struct ether_device *next;
- device_t ether_port;
- struct port_info *readpt;
- mach_port_t readptname;
- struct device dev;
-};
-
-/* Linked list of all ethernet devices. */
-struct ether_device *ether_dev;
-
-struct enet_statistics retbuf;
-
-
-/* Mach doesn't provide this. DAMN. */
-struct enet_statistics *
-ethernet_get_stats (struct device *dev)
-{
- return &retbuf;
-}
-
-int
-ethernet_stop (struct device *dev)
-{
- return 0;
-}
-
-void
-ethernet_set_multi (struct device *dev)
-{
-}
-
-/* The BPF instruction allows IP and ARP packets */
-static struct bpf_insn ether_filter[] =
-{
- {NETF_IN|NETF_BPF, /* Header. */ 0, 0, 0},
- {40, 0, 0, 12},
- {21, 1, 0, 2054},
- {21, 0, 1, 2048},
- {6, 0, 0, 1500},
- {6, 0, 0, 0}
-};
-static int ether_filter_len = sizeof (ether_filter) / sizeof (short);
-
-static struct port_bucket *etherport_bucket;
-
-
-static any_t
-ethernet_thread (any_t arg)
-{
- ports_manage_port_operations_one_thread (etherport_bucket,
- ethernet_demuxer,
- 0);
- return 0;
-}
-
-int
-ethernet_demuxer (mach_msg_header_t *inp,
- mach_msg_header_t *outp)
-{
- static int count = 0;
- struct net_rcv_msg *msg = (struct net_rcv_msg *) inp;
- struct sk_buff *skb;
- int datalen;
- struct ether_device *edev;
- struct device *dev = 0;
-
- if (inp->msgh_id != NET_RCV_MSG_ID)
- return 0;
-
- for (edev = ether_dev; edev; edev = edev->next)
- if (inp->msgh_local_port == edev->readptname)
- dev = &edev->dev;
-
- if (! dev)
- {
- if (inp->msgh_remote_port != MACH_PORT_NULL)
- mach_port_deallocate (mach_task_self (), inp->msgh_remote_port);
- return 1;
- }
-
- fprintf (stderr, "pfinet receives the %dst packet.\n", ++count);
- fflush (stderr);
-
- datalen = ETH_HLEN
- + msg->packet_type.msgt_number - sizeof (struct packet_header);
-
- __mutex_lock (&net_bh_lock);
- skb = alloc_skb (datalen, GFP_ATOMIC);
- skb_put (skb, datalen);
- skb->dev = dev;
-
- char *str = "pfinet enters net_bh_lock.\n";
- write (fileno (stderr), str, strlen (str) + 1);
- fflush (stderr);
-
- /* Copy the two parts of the frame into the buffer. */
- bcopy (msg->header, skb->data, ETH_HLEN);
- bcopy (msg->packet + sizeof (struct packet_header),
- skb->data + ETH_HLEN,
- datalen - ETH_HLEN);
-
- /* Drop it on the queue. */
- skb->protocol = eth_type_trans (skb, dev);
- netif_rx (skb);
- __mutex_unlock (&net_bh_lock);
-
- fprintf (stderr, "pfinet delivered the packet.\n");
- fflush (stderr);
-
- return 1;
-}
-
-
-void
-ethernet_initialize (void)
-{
- etherport_bucket = ports_create_bucket ();
- etherreadclass = ports_create_class (0, 0);
-
- cthread_detach (cthread_fork (ethernet_thread, 0));
-}
-
-int
-ethernet_open (struct device *dev)
-{
- error_t err;
- device_t master_device;
- struct ether_device *edev = (struct ether_device *) dev->priv;
-
- assert (edev->ether_port == MACH_PORT_NULL);
-
- err = ports_create_port (etherreadclass, etherport_bucket,
- sizeof (struct port_info), &edev->readpt);
- assert_perror (err);
- edev->readptname = ports_get_right (edev->readpt);
- mach_port_insert_right (mach_task_self (), edev->readptname, edev->readptname,
- MACH_MSG_TYPE_MAKE_SEND);
-
- mach_port_set_qlimit (mach_task_self (), edev->readptname, MACH_PORT_QLIMIT_MAX);
-
- /* The device name here is the path of a device file. */
- master_device = file_name_lookup (dev->name, 0, 0);
- if (master_device == MACH_PORT_NULL)
- error (2, errno, "file_name_lookup %s", dev->name);
-
- err = device_open (master_device, D_WRITE | D_READ, "eth", &edev->ether_port);
- mach_port_deallocate (mach_task_self (), master_device);
- if (err)
- error (2, err, "%s", dev->name);
-
- err = device_set_filter (edev->ether_port, ports_get_right (edev->readpt),
- MACH_MSG_TYPE_MAKE_SEND, 0,
- ether_filter, ether_filter_len);
- if (err)
- error (2, err, "%s", dev->name);
- return 0;
-}
-
-
-/* Transmit an ethernet frame */
-int
-ethernet_xmit (struct sk_buff *skb, struct device *dev)
-{
- error_t err;
- struct ether_device *edev = (struct ether_device *) dev->priv;
- u_int count;
- char *str1 = "pfinet: ethernet_xmit check point 1.\n";
- char *str2 = "pfinet: ethernet_xmit check point 2.\n";
- int stderr_fd = fileno (stderr);
-
- write (stderr_fd, str1, strlen (str1) + 1);
- fflush (stderr);
- err = device_write (edev->ether_port, D_NOWAIT, 0, skb->data, skb->len, &count);
- write (stderr_fd, str2, strlen (str1) + 1);
- fflush (stderr);
- assert_perror (err);
- assert (count == skb->len);
- dev_kfree_skb (skb);
- return 0;
-}
-
-void
-setup_ethernet_device (char *name, struct device **device)
-{
- struct net_status netstat;
- size_t count;
- int net_address[2];
- error_t err;
- struct ether_device *edev;
- struct device *dev;
-
- edev = calloc (1, sizeof (struct ether_device));
- if (!edev)
- error (2, ENOMEM, "%s", name);
- edev->next = ether_dev;
- ether_dev = edev;
-
- *device = dev = &edev->dev;
-
- dev->name = strdup (name);
- /* Functions. These ones are the true "hardware layer" in Linux. */
- dev->open = 0; /* We set up before calling dev_open. */
- dev->stop = ethernet_stop;
- dev->hard_start_xmit = ethernet_xmit;
- dev->get_stats = ethernet_get_stats;
- dev->set_multicast_list = ethernet_set_multi;
-
- /* These are the ones set by drivers/net/net_init.c::ether_setup. */
- dev->hard_header = eth_header;
- dev->rebuild_header = eth_rebuild_header;
- dev->hard_header_cache = eth_header_cache;
- dev->header_cache_update = eth_header_cache_update;
- dev->hard_header_parse = eth_header_parse;
- /* We can't do these two (and we never try anyway). */
- /* dev->change_mtu = eth_change_mtu; */
- /* dev->set_mac_address = eth_mac_addr; */
-
- /* Some more fields */
- dev->priv = edev; /* For reverse lookup. */
- dev->type = ARPHRD_ETHER;
- dev->hard_header_len = ETH_HLEN;
- dev->addr_len = ETH_ALEN;
- memset (dev->broadcast, 0xff, ETH_ALEN);
- dev->flags = IFF_BROADCAST | IFF_MULTICAST;
- dev_init_buffers (dev);
-
- ethernet_open (dev);
-
- /* Fetch hardware information */
- count = NET_STATUS_COUNT;
- err = device_get_status (edev->ether_port, NET_STATUS,
- (dev_status_t) &netstat, &count);
- if (err)
- error (2, err, "%s: Cannot get device status", name);
- dev->mtu = netstat.max_packet_size - dev->hard_header_len;
- assert (netstat.header_format == HDR_ETHERNET);
- assert (netstat.header_size == ETH_HLEN);
- assert (netstat.address_size == ETH_ALEN);
-
- count = 2;
- assert (count * sizeof (int) >= ETH_ALEN);
- err = device_get_status (edev->ether_port, NET_ADDRESS, net_address, &count);
- if (err)
- error (2, err, "%s: Cannot get hardware Ethernet address", name);
- net_address[0] = ntohl (net_address[0]);
- net_address[1] = ntohl (net_address[1]);
- bcopy (net_address, dev->dev_addr, ETH_ALEN);
-
- /* That should be enough. */
-
- /* This call adds the device to the `dev_base' chain,
- initializes its `ifindex' member (which matters!),
- and tells the protocol stacks about the device. */
- err = - register_netdevice (dev);
- assert_perror (err);
-}
diff --git a/pfinet.old/io-ops.c~ b/pfinet.old/io-ops.c~
deleted file mode 100644
index 4653097c..00000000
--- a/pfinet.old/io-ops.c~
+++ /dev/null
@@ -1,616 +0,0 @@
-/*
- Copyright (C) 1995,96,97,98,99,2000,02 Free Software Foundation, Inc.
- Written by Michael I. Bushnell, p/BSG.
-
- This file is part of the GNU Hurd.
-
- The GNU Hurd is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2, or (at
- your option) any later version.
-
- The GNU Hurd is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. */
-
-#include "pfinet.h"
-
-#include <linux/wait.h>
-#include <linux/socket.h>
-#include <linux/net.h>
-#include <net/sock.h>
-
-#include "io_S.h"
-#include <netinet/in.h>
-#include <fcntl.h>
-#include <string.h>
-#include <unistd.h>
-#include <mach/notify.h>
-#include <sys/mman.h>
-
-error_t
-S_io_write (struct sock_user *user,
- char *data,
- size_t datalen,
- off_t offset,
- mach_msg_type_number_t *amount)
-{
- error_t err;
- struct iovec iov = { data, datalen };
- struct msghdr m = { msg_name: 0, msg_namelen: 0, msg_flags: 0,
- msg_controllen: 0, msg_iov: &iov, msg_iovlen: 1 };
- char *str1 = "pfinet io_write check point 1.\n";
- int stderr_fd = fileno (stderr);
-
- if (!user)
- return EOPNOTSUPP;
-
- fprintf (stderr, "pfinet io_write before locking global_lock.\n");
- fflush (stderr);
-
- __mutex_lock (&global_lock);
- become_task (user);
- if (user->sock->flags & O_NONBLOCK)
- m.msg_flags |= MSG_DONTWAIT;
- err = (*user->sock->ops->sendmsg) (user->sock, &m, datalen, 0);
- __mutex_unlock (&global_lock);
-
- fprintf (stderr, "pfinet io_write after unlocking global_lock.\n");
- fflush (stderr);
-
- if (err < 0)
- err = -err;
- else
- {
- *amount = err;
- err = 0;
- }
-
- return err;
-}
-
-error_t
-S_io_read (struct sock_user *user,
- char **data,
- size_t *datalen,
- off_t offset,
- mach_msg_type_number_t amount)
-{
- error_t err;
- int alloced = 0;
- struct iovec iov;
- struct msghdr m = { msg_name: 0, msg_namelen: 0, msg_flags: 0,
- msg_controllen: 0, msg_iov: &iov, msg_iovlen: 1 };
-
- if (!user)
- return EOPNOTSUPP;
-
- /* Instead of this, we should peek and the socket and only
- allocate as much as necessary. */
- if (amount > *datalen)
- {
- *data = mmap (0, amount, PROT_READ|PROT_WRITE, MAP_ANON, 0, 0);
- alloced = 1;
- }
-
- iov.iov_base = *data;
- iov.iov_len = amount;
-
- __mutex_lock (&global_lock);
- become_task (user);
- err = (*user->sock->ops->recvmsg) (user->sock, &m, amount,
- ((user->sock->flags & O_NONBLOCK)
- ? MSG_DONTWAIT : 0),
- 0);
- __mutex_unlock (&global_lock);
-
- if (err < 0)
- err = -err;
- else
- {
- *datalen = err;
- if (alloced && round_page (*datalen) < round_page (amount))
- munmap (*data + round_page (*datalen),
- round_page (amount) - round_page (*datalen));
- err = 0;
- }
- return err;
-}
-
-error_t
-S_io_seek (struct sock_user *user,
- off_t offset,
- int whence,
- off_t *newp)
-{
- return user ? ESPIPE : EOPNOTSUPP;
-}
-
-error_t
-S_io_readable (struct sock_user *user,
- mach_msg_type_number_t *amount)
-{
- struct sock *sk;
- error_t err;
-
- if (!user)
- return EOPNOTSUPP;
-
- __mutex_lock (&global_lock);
- become_task (user);
-
- /* We need to avoid calling the Linux ioctl routines,
- so here is a rather ugly break of modularity. */
-
- sk = user->sock->sk;
- err = 0;
-
- /* Linux's af_inet.c ioctl routine just calls the protocol-specific
- ioctl routine; it's those routines that we need to simulate. So
- this switch corresponds to the initialization of SK->prot in
- af_inet.c:inet_create. */
- switch (user->sock->type)
- {
- case SOCK_STREAM:
- case SOCK_SEQPACKET:
- err = tcp_tiocinq (sk, amount);
- break;
-
- case SOCK_DGRAM:
- /* These guts are copied from udp.c:udp_ioctl (TIOCINQ). */
- if (sk->state == TCP_LISTEN)
- err = EINVAL;
- else
- /* Boy, I really love the C language. */
- *amount = (skb_peek (&sk->receive_queue)
- ? : &((struct sk_buff){}))->len;
- break;
-
- case SOCK_RAW:
- default:
- err = EOPNOTSUPP;
- break;
- }
-
- __mutex_unlock (&global_lock);
- return err;
-}
-
-error_t
-S_io_set_all_openmodes (struct sock_user *user,
- int bits)
-{
- if (!user)
- return EOPNOTSUPP;
-
- __mutex_lock (&global_lock);
- if (bits & O_NONBLOCK)
- user->sock->flags |= O_NONBLOCK;
- else
- user->sock->flags &= ~O_NONBLOCK;
- __mutex_unlock (&global_lock);
- return 0;
-}
-
-error_t
-S_io_get_openmodes (struct sock_user *user,
- int *bits)
-{
- struct sock *sk;
-
- if (!user)
- return EOPNOTSUPP;
-
- __mutex_lock (&global_lock);
- sk = user->sock->sk;
-
- *bits = 0;
- if (!(sk->shutdown & SEND_SHUTDOWN))
- *bits |= O_WRITE;
- if (!(sk->shutdown & RCV_SHUTDOWN))
- *bits |= O_READ;
- if (user->sock->flags & O_NONBLOCK)
- *bits |= O_NONBLOCK;
-
- __mutex_unlock (&global_lock);
- return 0;
-}
-
-error_t
-S_io_set_some_openmodes (struct sock_user *user,
- int bits)
-{
- if (!user)
- return EOPNOTSUPP;
-
- __mutex_lock (&global_lock);
- if (bits & O_NONBLOCK)
- user->sock->flags |= O_NONBLOCK;
- __mutex_unlock (&global_lock);
- return 0;
-}
-
-error_t
-S_io_clear_some_openmodes (struct sock_user *user,
- int bits)
-{
- if (!user)
- return EOPNOTSUPP;
-
- __mutex_lock (&global_lock);
- if (bits & O_NONBLOCK)
- user->sock->flags &= ~O_NONBLOCK;
- __mutex_unlock (&global_lock);
- return 0;
-}
-
-error_t
-S_io_select (struct sock_user *user,
- mach_port_t reply,
- mach_msg_type_name_t reply_type,
- int *select_type)
-{
- const int want = *select_type;
- int avail;
-
- if (!user)
- return EOPNOTSUPP;
-
- __mutex_lock (&global_lock);
- become_task (user);
-
- /* In Linux, this means (supposedly) that I/O will never be possible.
- That's a lose, so prevent it from happening. */
- assert (user->sock->ops->poll);
-
- avail = (*user->sock->ops->poll) ((void *) 0xdeadbeef,
- user->sock,
- (void *) 0xdeadbead);
- if ((avail & want) == 0)
- {
- ports_interrupt_self_on_notification (user, reply,
- MACH_NOTIFY_DEAD_NAME);
-
- do
- {
- /* Block until we are woken or cancelled. */
- interruptible_sleep_on (user->sock->sk->sleep);
- if (signal_pending (current)) /* This means we were cancelled. */
- {
- __mutex_unlock (&global_lock);
- return EINTR;
- }
- avail = (*user->sock->ops->poll) ((void *) 0xdeadbeef,
- user->sock,
- (void *) 0xdeadbead);
- }
- while ((avail & want) == 0);
- }
-
- /* We got something. */
- *select_type = avail;
-
- __mutex_unlock (&global_lock);
-
- return 0;
-}
-
-error_t
-S_io_stat (struct sock_user *user,
- struct stat *st)
-{
- if (!user)
- return EOPNOTSUPP;
-
- bzero (st, sizeof (struct stat));
-
- st->st_fstype = FSTYPE_SOCKET;
- st->st_fsid = getpid ();
- st->st_ino = user->sock->st_ino;
-
- st->st_mode = S_IFSOCK | ACCESSPERMS;
- st->st_blksize = 512; /* ???? */
-
- return 0;
-}
-
-error_t
-S_io_reauthenticate (struct sock_user *user,
- mach_port_t rend)
-{
- struct sock_user *newuser;
- uid_t gubuf[20], ggbuf[20], aubuf[20], agbuf[20];
- uid_t *gen_uids, *gen_gids, *aux_uids, *aux_gids;
- size_t genuidlen, gengidlen, auxuidlen, auxgidlen;
- error_t err;
- size_t i, j;
- auth_t auth;
- mach_port_t newright;
-
- if (!user)
- return EOPNOTSUPP;
-
- genuidlen = gengidlen = auxuidlen = auxgidlen = 20;
- gen_uids = gubuf;
- gen_gids = ggbuf;
- aux_uids = aubuf;
- aux_gids = agbuf;
-
- __mutex_lock (&global_lock);
- newuser = make_sock_user (user->sock, 0, 1, 0);
-
- auth = getauth ();
- newright = ports_get_send_right (newuser);
- assert (newright != MACH_PORT_NULL);
- do
- err = auth_server_authenticate (auth,
- rend,
- MACH_MSG_TYPE_COPY_SEND,
- newright,
- MACH_MSG_TYPE_COPY_SEND,
- &gen_uids, &genuidlen,
- &aux_uids, &auxuidlen,
- &gen_gids, &gengidlen,
- &aux_gids, &auxgidlen);
- while (err == EINTR);
- mach_port_deallocate (mach_task_self (), rend);
- mach_port_deallocate (mach_task_self (), newright);
- mach_port_deallocate (mach_task_self (), auth);
-
- if (err)
- newuser->isroot = 0;
- else
- /* Check permission as fshelp_isowner would do. */
- for (i = 0; i < genuidlen; i++)
- {
- if (gen_uids[i] == 0 || gen_uids[i] == pfinet_owner)
- newuser->isroot = 1;
- if (gen_uids[i] == pfinet_group)
- for (j = 0; j < gengidlen; j++)
- if (gen_gids[j] == pfinet_group)
- newuser->isroot = 1;
- }
-
- mach_port_move_member (mach_task_self (), newuser->pi.port_right,
- pfinet_bucket->portset);
-
- __mutex_unlock (&global_lock);
-
- ports_port_deref (newuser);
-
- if (gubuf != gen_uids)
- munmap (gen_uids, genuidlen * sizeof (uid_t));
- if (ggbuf != gen_gids)
- munmap (gen_gids, gengidlen * sizeof (uid_t));
- if (aubuf != aux_uids)
- munmap (aux_uids, auxuidlen * sizeof (uid_t));
- if (agbuf != aux_gids)
- munmap (aux_gids, auxgidlen * sizeof (uid_t));
-
- return 0;
-}
-
-error_t
-S_io_restrict_auth (struct sock_user *user,
- mach_port_t *newobject,
- mach_msg_type_name_t *newobject_type,
- uid_t *uids, size_t uidslen,
- uid_t *gids, size_t gidslen)
-{
- struct sock_user *newuser;
- int i, j;
- int isroot;
-
- if (!user)
- return EOPNOTSUPP;
-
- __mutex_lock (&global_lock);
-
- isroot = 0;
- if (user->isroot)
- /* Check permission as fshelp_isowner would do. */
- for (i = 0; i < uidslen; i++)
- {
- if (uids[i] == 0 || uids[i] == pfinet_owner)
- isroot = 1;
- if (uids[i] == pfinet_group)
- for (j = 0; j < gidslen; j++)
- if (gids[j] == pfinet_group)
- isroot = 1;
- }
-
- newuser = make_sock_user (user->sock, isroot, 0, 0);
- *newobject = ports_get_right (newuser);
- *newobject_type = MACH_MSG_TYPE_MAKE_SEND;
- ports_port_deref (newuser);
- __mutex_unlock (&global_lock);
- return 0;
-}
-
-error_t
-S_io_duplicate (struct sock_user *user,
- mach_port_t *newobject,
- mach_msg_type_name_t *newobject_type)
-{
- struct sock_user *newuser;
- if (!user)
- return EOPNOTSUPP;
-
- __mutex_lock (&global_lock);
- newuser = make_sock_user (user->sock, user->isroot, 0, 0);
- *newobject = ports_get_right (newuser);
- *newobject_type = MACH_MSG_TYPE_MAKE_SEND;
- ports_port_deref (newuser);
- __mutex_unlock (&global_lock);
- return 0;
-}
-
-error_t
-S_io_identity (struct sock_user *user,
- mach_port_t *id,
- mach_msg_type_name_t *idtype,
- mach_port_t *fsys,
- mach_msg_type_name_t *fsystype,
- ino_t *fileno)
-{
- error_t err;
-
- if (!user)
- return EOPNOTSUPP;
-
- __mutex_lock (&global_lock);
- if (user->sock->identity == MACH_PORT_NULL)
- {
- err = mach_port_allocate (mach_task_self (), MACH_PORT_RIGHT_RECEIVE,
- &user->sock->identity);
- if (err)
- {
- __mutex_unlock (&global_lock);
- return err;
- }
- }
-
- *id = user->sock->identity;
- *idtype = MACH_MSG_TYPE_MAKE_SEND;
- *fsys = fsys_identity;
- *fsystype = MACH_MSG_TYPE_MAKE_SEND;
- *fileno = user->sock->st_ino;
-
- __mutex_unlock (&global_lock);
- return 0;
-}
-
-error_t
-S_io_revoke (struct sock_user *user)
-{
- /* XXX maybe we should try */
- return EOPNOTSUPP;
-}
-
-
-
-error_t
-S_io_async (struct sock_user *user,
- mach_port_t notify,
- mach_port_t *id,
- mach_msg_type_name_t *idtype)
-{
- return EOPNOTSUPP;
-}
-
-error_t
-S_io_mod_owner (struct sock_user *user,
- pid_t owner)
-{
- return EOPNOTSUPP;
-}
-
-error_t
-S_io_get_owner (struct sock_user *user,
- pid_t *owner)
-{
- return EOPNOTSUPP;
-}
-
-error_t
-S_io_get_icky_async_id (struct sock_user *user,
- mach_port_t *id,
- mach_msg_type_name_t *idtype)
-{
- return EOPNOTSUPP;
-}
-
-error_t
-S_io_server_version (struct sock_user *user,
- char *name,
- int *major,
- int *minor,
- int *edit)
-{
- return EOPNOTSUPP;
-}
-
-error_t
-S_io_pathconf (struct sock_user *user,
- int name,
- int *value)
-{
- return EOPNOTSUPP;
-}
-
-
-
-error_t
-S_io_map (struct sock_user *user,
- mach_port_t *rdobj,
- mach_msg_type_name_t *rdobj_type,
- mach_port_t *wrobj,
- mach_msg_type_name_t *wrobj_type)
-{
- return EOPNOTSUPP;
-}
-
-error_t
-S_io_map_cntl (struct sock_user *user,
- mach_port_t *obj,
- mach_msg_type_name_t *obj_type)
-{
- return EOPNOTSUPP;
-}
-
-error_t
-S_io_get_conch (struct sock_user *user)
-{
- return EOPNOTSUPP;
-}
-
-error_t
-S_io_release_conch (struct sock_user *user)
-{
- return EOPNOTSUPP;
-}
-
-error_t
-S_io_eofnotify (struct sock_user *user)
-{
- return EOPNOTSUPP;
-}
-
-error_t
-S_io_prenotify (struct sock_user *user,
- vm_offset_t start,
- vm_offset_t end)
-{
- return EOPNOTSUPP;
-}
-
-error_t
-S_io_postnotify (struct sock_user *user,
- vm_offset_t start,
- vm_offset_t end)
-{
- return EOPNOTSUPP;
-}
-
-error_t
-S_io_readnotify (struct sock_user *user)
-{
- return EOPNOTSUPP;
-}
-
-error_t
-S_io_readsleep (struct sock_user *user)
-{
- return EOPNOTSUPP;
-}
-
-error_t
-S_io_sigio (struct sock_user *user)
-{
- return EOPNOTSUPP;
-}
diff --git a/pfinet.old/linux-src/net/core/dev.c~ b/pfinet.old/linux-src/net/core/dev.c~
deleted file mode 100644
index 7d0658c5..00000000
--- a/pfinet.old/linux-src/net/core/dev.c~
+++ /dev/null
@@ -1,2092 +0,0 @@
-/*
- * NET3 Protocol independent device support routines.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * Derived from the non IP parts of dev.c 1.0.19
- * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
- * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
- * Mark Evans, <evansmp@uhura.aston.ac.uk>
- *
- * Additional Authors:
- * Florian la Roche <rzsfl@rz.uni-sb.de>
- * Alan Cox <gw4pts@gw4pts.ampr.org>
- * David Hinds <dhinds@allegro.stanford.edu>
- * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
- * Adam Sulmicki <adam@cfar.umd.edu>
- *
- * Changes:
- * Marcelo Tosatti <marcelo@conectiva.com.br> : dont accept mtu 0 or <
- * Alan Cox : device private ioctl copies fields back.
- * Alan Cox : Transmit queue code does relevant stunts to
- * keep the queue safe.
- * Alan Cox : Fixed double lock.
- * Alan Cox : Fixed promisc NULL pointer trap
- * ???????? : Support the full private ioctl range
- * Alan Cox : Moved ioctl permission check into drivers
- * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI
- * Alan Cox : 100 backlog just doesn't cut it when
- * you start doing multicast video 8)
- * Alan Cox : Rewrote net_bh and list manager.
- * Alan Cox : Fix ETH_P_ALL echoback lengths.
- * Alan Cox : Took out transmit every packet pass
- * Saved a few bytes in the ioctl handler
- * Alan Cox : Network driver sets packet type before calling netif_rx. Saves
- * a function call a packet.
- * Alan Cox : Hashed net_bh()
- * Richard Kooijman: Timestamp fixes.
- * Alan Cox : Wrong field in SIOCGIFDSTADDR
- * Alan Cox : Device lock protection.
- * Alan Cox : Fixed nasty side effect of device close changes.
- * Rudi Cilibrasi : Pass the right thing to set_mac_address()
- * Dave Miller : 32bit quantity for the device lock to make it work out
- * on a Sparc.
- * Bjorn Ekwall : Added KERNELD hack.
- * Alan Cox : Cleaned up the backlog initialise.
- * Craig Metz : SIOCGIFCONF fix if space for under
- * 1 device.
- * Thomas Bogendoerfer : Return ENODEV for dev_open, if there
- * is no device open function.
- * Andi Kleen : Fix error reporting for SIOCGIFCONF
- * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF
- * Cyrus Durgin : Cleaned for KMOD
- * Adam Sulmicki : Bug Fix : Network Device Unload
- * A network device unload needs to purge
- * the backlog queue.
- * Paul Rusty Russel : SIOCSIFNAME
- * Andrea Arcangeli : dev_clear_backlog() needs the
- * skb_queue_lock held.
- */
-
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <asm/bitops.h>
-#include <linux/config.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/if_ether.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/notifier.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <linux/rtnetlink.h>
-#include <net/slhc.h>
-#include <linux/proc_fs.h>
-#include <linux/stat.h>
-#include <net/br.h>
-#include <net/dst.h>
-#include <net/pkt_sched.h>
-#include <net/profile.h>
-#include <linux/init.h>
-#include <linux/kmod.h>
-#ifdef CONFIG_NET_RADIO
-#include <linux/wireless.h>
-#endif /* CONFIG_NET_RADIO */
-#ifdef CONFIG_PLIP
-extern int plip_init(void);
-#endif
-
-NET_PROFILE_DEFINE(dev_queue_xmit)
-NET_PROFILE_DEFINE(net_bh)
-NET_PROFILE_DEFINE(net_bh_skb)
-
-
-const char *if_port_text[] = {
- "unknown",
- "BNC",
- "10baseT",
- "AUI",
- "100baseT",
- "100baseTX",
- "100baseFX"
-};
-
-/*
- * The list of packet types we will receive (as opposed to discard)
- * and the routines to invoke.
- *
- * Why 16. Because with 16 the only overlap we get on a hash of the
- * low nibble of the protocol value is RARP/SNAP/X.25.
- *
- * 0800 IP
- * 0001 802.3
- * 0002 AX.25
- * 0004 802.2
- * 8035 RARP
- * 0005 SNAP
- * 0805 X.25
- * 0806 ARP
- * 8137 IPX
- * 0009 Localtalk
- * 86DD IPv6
- */
-
-struct packet_type *ptype_base[16]; /* 16 way hashed list */
-struct packet_type *ptype_all = NULL; /* Taps */
-
-/*
- * Device list lock. Setting it provides that interface
- * will not disappear unexpectedly while kernel sleeps.
- */
-
-atomic_t dev_lockct = ATOMIC_INIT(0);
-
-/*
- * Our notifier list
- */
-
-#ifdef _HURD_
-struct notifier_block *netdev_chain=NULL;
-#else
-static struct notifier_block *netdev_chain=NULL;
-#endif
-
-/*
- * Device drivers call our routines to queue packets here. We empty the
- * queue in the bottom half handler.
- */
-
-static struct sk_buff_head backlog;
-
-#ifdef CONFIG_NET_FASTROUTE
-int netdev_fastroute;
-int netdev_fastroute_obstacles;
-struct net_fastroute_stats dev_fastroute_stat;
-#endif
-
-static void dev_clear_backlog(struct device *dev);
-
-
-/******************************************************************************************
-
- Protocol management and registration routines
-
-*******************************************************************************************/
-
-/*
- * For efficiency
- */
-
-int netdev_nit=0;
-
-/*
- * Add a protocol ID to the list. Now that the input handler is
- * smarter we can dispense with all the messy stuff that used to be
- * here.
- *
- * BEWARE!!! Protocol handlers, mangling input packets,
- * MUST BE last in hash buckets and checking protocol handlers
- * MUST start from promiscous ptype_all chain in net_bh.
- * It is true now, do not change it.
- * Explantion follows: if protocol handler, mangling packet, will
- * be the first on list, it is not able to sense, that packet
- * is cloned and should be copied-on-write, so that it will
- * change it and subsequent readers will get broken packet.
- * --ANK (980803)
- */
-
-void dev_add_pack(struct packet_type *pt)
-{
- int hash;
-#ifdef CONFIG_NET_FASTROUTE
- /* Hack to detect packet socket */
- if (pt->data) {
- netdev_fastroute_obstacles++;
- dev_clear_fastroute(pt->dev);
- }
-#endif
- if(pt->type==htons(ETH_P_ALL))
- {
- netdev_nit++;
- pt->next=ptype_all;
- ptype_all=pt;
- }
- else
- {
- hash=ntohs(pt->type)&15;
- pt->next = ptype_base[hash];
- ptype_base[hash] = pt;
- }
-}
-
-
-/*
- * Remove a protocol ID from the list.
- */
-
-void dev_remove_pack(struct packet_type *pt)
-{
- struct packet_type **pt1;
- if(pt->type==htons(ETH_P_ALL))
- {
- netdev_nit--;
- pt1=&ptype_all;
- }
- else
- pt1=&ptype_base[ntohs(pt->type)&15];
- for(; (*pt1)!=NULL; pt1=&((*pt1)->next))
- {
- if(pt==(*pt1))
- {
- *pt1=pt->next;
- synchronize_bh();
-#ifdef CONFIG_NET_FASTROUTE
- if (pt->data)
- netdev_fastroute_obstacles--;
-#endif
- return;
- }
- }
- printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
-}
-
-/*****************************************************************************************
-
- Device Interface Subroutines
-
-******************************************************************************************/
-
-/*
- * Find an interface by name.
- */
-
-struct device *dev_get(const char *name)
-{
- struct device *dev;
-
- for (dev = dev_base; dev != NULL; dev = dev->next)
- {
- if (strcmp(dev->name, name) == 0)
- return(dev);
- }
- return NULL;
-}
-
-struct device * dev_get_by_index(int ifindex)
-{
- struct device *dev;
-
- for (dev = dev_base; dev != NULL; dev = dev->next)
- {
- if (dev->ifindex == ifindex)
- return(dev);
- }
- return NULL;
-}
-
-struct device *dev_getbyhwaddr(unsigned short type, char *ha)
-{
- struct device *dev;
-
- for (dev = dev_base; dev != NULL; dev = dev->next)
- {
- if (dev->type == type &&
- memcmp(dev->dev_addr, ha, dev->addr_len) == 0)
- return(dev);
- }
- return(NULL);
-}
-
-/*
- * Passed a format string - eg "lt%d" it will try and find a suitable
- * id. Not efficient for many devices, not called a lot..
- */
-
-int dev_alloc_name(struct device *dev, const char *name)
-{
- int i;
- /*
- * If you need over 100 please also fix the algorithm...
- */
- for(i=0;i<100;i++)
- {
- sprintf(dev->name,name,i);
- if(dev_get(dev->name)==NULL)
- return i;
- }
- return -ENFILE; /* Over 100 of the things .. bail out! */
-}
-
-struct device *dev_alloc(const char *name, int *err)
-{
- struct device *dev=kmalloc(sizeof(struct device)+16, GFP_KERNEL);
- if(dev==NULL)
- {
- *err=-ENOBUFS;
- return NULL;
- }
- dev->name=(char *)(dev+1); /* Name string space */
- *err=dev_alloc_name(dev,name);
- if(*err<0)
- {
- kfree(dev);
- return NULL;
- }
- return dev;
-}
-
-void netdev_state_change(struct device *dev)
-{
- if (dev->flags&IFF_UP)
- notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
-}
-
-
-/*
- * Find and possibly load an interface.
- */
-
-#ifdef CONFIG_KMOD
-
-void dev_load(const char *name)
-{
- if(!dev_get(name) && capable(CAP_SYS_MODULE))
- request_module(name);
-}
-
-#else
-
-extern inline void dev_load(const char *unused){;}
-
-#endif
-
-static int default_rebuild_header(struct sk_buff *skb)
-{
- printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n", skb->dev ? skb->dev->name : "NULL!!!");
- kfree_skb(skb);
- return 1;
-}
-
-/*
- * Prepare an interface for use.
- */
-
-int dev_open(struct device *dev)
-{
- int ret = 0;
-
- /*
- * Is it already up?
- */
-
- if (dev->flags&IFF_UP)
- return 0;
-
- /*
- * Call device private open method
- */
-
- if (dev->open)
- ret = dev->open(dev);
-
- /*
- * If it went open OK then:
- */
-
- if (ret == 0)
- {
- /*
- * nil rebuild_header routine,
- * that should be never called and used as just bug trap.
- */
-
- if (dev->rebuild_header == NULL)
- dev->rebuild_header = default_rebuild_header;
-
- /*
- * Set the flags.
- */
- dev->flags |= (IFF_UP | IFF_RUNNING);
-
- /*
- * Initialize multicasting status
- */
- dev_mc_upload(dev);
-
- /*
- * Wakeup transmit queue engine
- */
- dev_activate(dev);
-
- /*
- * ... and announce new interface.
- */
- notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
-
- }
- return(ret);
-}
-
-#ifdef CONFIG_NET_FASTROUTE
-
-static __inline__ void dev_do_clear_fastroute(struct device *dev)
-{
- if (dev->accept_fastpath) {
- int i;
-
- for (i=0; i<=NETDEV_FASTROUTE_HMASK; i++)
- dst_release_irqwait(xchg(dev->fastpath+i, NULL));
- }
-}
-
-void dev_clear_fastroute(struct device *dev)
-{
- if (dev) {
- dev_do_clear_fastroute(dev);
- } else {
- for (dev = dev_base; dev; dev = dev->next)
- dev_do_clear_fastroute(dev);
- }
-}
-#endif
-
-/*
- * Completely shutdown an interface.
- */
-
-int dev_close(struct device *dev)
-{
- if (!(dev->flags&IFF_UP))
- return 0;
-
- dev_deactivate(dev);
-
- dev_lock_wait();
-
- /*
- * Call the device specific close. This cannot fail.
- * Only if device is UP
- */
-
- if (dev->stop)
- dev->stop(dev);
-
- if (dev->start)
- printk("dev_close: bug %s still running\n", dev->name);
-
- /*
- * Device is now down.
- */
- dev_clear_backlog(dev);
-
- dev->flags&=~(IFF_UP|IFF_RUNNING);
-#ifdef CONFIG_NET_FASTROUTE
- dev_clear_fastroute(dev);
-#endif
-
- /*
- * Tell people we are going down
- */
- notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
-
- return(0);
-}
-
-
-/*
- * Device change register/unregister. These are not inline or static
- * as we export them to the world.
- */
-
-int register_netdevice_notifier(struct notifier_block *nb)
-{
- return notifier_chain_register(&netdev_chain, nb);
-}
-
-int unregister_netdevice_notifier(struct notifier_block *nb)
-{
- return notifier_chain_unregister(&netdev_chain,nb);
-}
-
-/*
- * Support routine. Sends outgoing frames to any network
- * taps currently in use.
- */
-
-void dev_queue_xmit_nit(struct sk_buff *skb, struct device *dev)
-{
- struct packet_type *ptype;
- get_fast_time(&skb->stamp);
-
- for (ptype = ptype_all; ptype!=NULL; ptype = ptype->next)
- {
- /* Never send packets back to the socket
- * they originated from - MvS (miquels@drinkel.ow.org)
- */
- if ((ptype->dev == dev || !ptype->dev) &&
- ((struct sock *)ptype->data != skb->sk))
- {
- struct sk_buff *skb2;
- if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL)
- break;
-
- /* Code, following below is wrong.
-
- The only reason, why it does work is that
- ONLY packet sockets receive outgoing
- packets. If such a packet will be (occasionally)
- received by normal packet handler, which expects
- that mac header is pulled...
- */
-
- /* More sensible variant. skb->nh should be correctly
- set by sender, so that the second statement is
- just protection against buggy protocols.
- */
- skb2->mac.raw = skb2->data;
-
- if (skb2->nh.raw < skb2->data || skb2->nh.raw >= skb2->tail) {
- if (net_ratelimit())
- printk(KERN_DEBUG "protocol %04x is buggy, dev %s\n", skb2->protocol, dev->name);
- skb2->nh.raw = skb2->data;
- if (dev->hard_header)
- skb2->nh.raw += dev->hard_header_len;
- }
-
- skb2->h.raw = skb2->nh.raw;
- skb2->pkt_type = PACKET_OUTGOING;
- ptype->func(skb2, skb->dev, ptype);
- }
- }
-}
-
-/*
- * Fast path for loopback frames.
- */
-
-void dev_loopback_xmit(struct sk_buff *skb)
-{
- struct sk_buff *newskb=skb_clone(skb, GFP_ATOMIC);
- if (newskb==NULL)
- return;
-
- newskb->mac.raw = newskb->data;
- skb_pull(newskb, newskb->nh.raw - newskb->data);
- newskb->pkt_type = PACKET_LOOPBACK;
- newskb->ip_summed = CHECKSUM_UNNECESSARY;
- if (newskb->dst==NULL)
- printk(KERN_DEBUG "BUG: packet without dst looped back 1\n");
- netif_rx(newskb);
-}
-
-int dev_queue_xmit(struct sk_buff *skb)
-{
- struct device *dev = skb->dev;
- struct Qdisc *q;
- char *str1 = "pfinet: dev_queue_xmit check point 1.\n";
- char *str2 = "pfinet: dev_queue_xmit check point 2.\n";
- char *str3 = "pfinet: dev_queue_xmit check point 3.\n";
- char *str4 = "pfinet: dev_queue_xmit check point 4.\n";
- int stderr_fd = fileno (stderr);
-
- write (stderr_fd, str1, strlen (str1) + 1);
- fflush (stderr);
-
-#ifdef CONFIG_NET_PROFILE
- start_bh_atomic();
- NET_PROFILE_ENTER(dev_queue_xmit);
-#endif
-
- start_bh_atomic();
- q = dev->qdisc;
- write (stderr_fd, str2, strlen (str2) + 1);
- fflush (stderr);
- if (q->enqueue) {
- q->enqueue(skb, q);
- qdisc_wakeup(dev);
- end_bh_atomic();
-
-#ifdef CONFIG_NET_PROFILE
- NET_PROFILE_LEAVE(dev_queue_xmit);
- end_bh_atomic();
-#endif
-
- return 0;
- }
- write (stderr_fd, str3, strlen (str3) + 1);
- fflush (stderr);
-
- /* The device has no queue. Common case for software devices:
- loopback, all the sorts of tunnels...
-
- Really, it is unlikely that bh protection is necessary here:
- virtual devices do not generate EOI events.
- However, it is possible, that they rely on bh protection
- made by us here.
- */
- if (dev->flags&IFF_UP) {
- write (stderr_fd, str4, strlen (str4) + 1);
- fflush (stderr);
- if (netdev_nit)
- dev_queue_xmit_nit(skb,dev);
- write (stderr_fd, str5, strlen (str5) + 1);
- fflush (stderr);
- if (dev->hard_start_xmit(skb, dev) == 0) {
- end_bh_atomic();
-
-#ifdef CONFIG_NET_PROFILE
- NET_PROFILE_LEAVE(dev_queue_xmit);
- end_bh_atomic();
-#endif
- write (stderr_fd, str6, strlen (str6) + 1);
- fflush (stderr);
-
- return 0;
- }
- if (net_ratelimit())
- printk(KERN_DEBUG "Virtual device %s asks to queue packet!\n", dev->name);
- }
- end_bh_atomic();
- write (stderr_fd, str6, strlen (str6) + 1);
- fflush (stderr);
-
- kfree_skb(skb);
-
-#ifdef CONFIG_NET_PROFILE
- NET_PROFILE_LEAVE(dev_queue_xmit);
- end_bh_atomic();
-#endif
-
- return 0;
-}
-
-
-/*=======================================================================
- Receiver rotutines
- =======================================================================*/
-
-int netdev_dropping = 0;
-int netdev_max_backlog = 300;
-atomic_t netdev_rx_dropped;
-#ifdef CONFIG_CPU_IS_SLOW
-int net_cpu_congestion;
-#endif
-
-#ifdef CONFIG_NET_HW_FLOWCONTROL
-int netdev_throttle_events;
-static unsigned long netdev_fc_mask = 1;
-unsigned long netdev_fc_xoff = 0;
-
-static struct
-{
- void (*stimul)(struct device *);
- struct device *dev;
-} netdev_fc_slots[32];
-
-int netdev_register_fc(struct device *dev, void (*stimul)(struct device *dev))
-{
- int bit = 0;
- unsigned long flags;
-
- save_flags(flags);
- cli();
- if (netdev_fc_mask != ~0UL) {
- bit = ffz(netdev_fc_mask);
- netdev_fc_slots[bit].stimul = stimul;
- netdev_fc_slots[bit].dev = dev;
- set_bit(bit, &netdev_fc_mask);
- clear_bit(bit, &netdev_fc_xoff);
- }
- restore_flags(flags);
- return bit;
-}
-
-void netdev_unregister_fc(int bit)
-{
- unsigned long flags;
-
- save_flags(flags);
- cli();
- if (bit > 0) {
- netdev_fc_slots[bit].stimul = NULL;
- netdev_fc_slots[bit].dev = NULL;
- clear_bit(bit, &netdev_fc_mask);
- clear_bit(bit, &netdev_fc_xoff);
- }
- restore_flags(flags);
-}
-
-static void netdev_wakeup(void)
-{
- unsigned long xoff;
-
- cli();
- xoff = netdev_fc_xoff;
- netdev_fc_xoff = 0;
- netdev_dropping = 0;
- netdev_throttle_events++;
- while (xoff) {
- int i = ffz(~xoff);
- xoff &= ~(1<<i);
- netdev_fc_slots[i].stimul(netdev_fc_slots[i].dev);
- }
- sti();
-}
-#endif
-
-static void dev_clear_backlog(struct device *dev)
-{
- struct sk_buff *curr;
- unsigned long flags;
-
- /*
- *
- * Let now clear backlog queue. -AS
- *
- * We are competing here both with netif_rx() and net_bh().
- * We don't want either of those to mess with skb ptrs
- * while we work on them, thus we must grab the
- * skb_queue_lock.
- */
-
- if (backlog.qlen) {
- repeat:
- spin_lock_irqsave(&skb_queue_lock, flags);
- for (curr = backlog.next;
- curr != (struct sk_buff *)(&backlog);
- curr = curr->next)
- if (curr->dev == dev)
- {
- __skb_unlink(curr, &backlog);
- spin_unlock_irqrestore(&skb_queue_lock, flags);
- kfree_skb(curr);
- goto repeat;
- }
- spin_unlock_irqrestore(&skb_queue_lock, flags);
-#ifdef CONFIG_NET_HW_FLOWCONTROL
- if (netdev_dropping)
- netdev_wakeup();
-#else
- netdev_dropping = 0;
-#endif
- }
-}
-
-/*
- * Receive a packet from a device driver and queue it for the upper
- * (protocol) levels. It always succeeds.
- */
-
-void netif_rx(struct sk_buff *skb)
-{
-#ifndef CONFIG_CPU_IS_SLOW
- if(skb->stamp.tv_sec==0)
- get_fast_time(&skb->stamp);
-#else
- skb->stamp = xtime;
-#endif
-
- /* The code is rearranged so that the path is the most
- short when CPU is congested, but is still operating.
- */
-
- if (backlog.qlen <= netdev_max_backlog) {
- if (backlog.qlen) {
- if (netdev_dropping == 0) {
- skb_queue_tail(&backlog,skb);
- mark_bh(NET_BH);
- return;
- }
- atomic_inc(&netdev_rx_dropped);
- kfree_skb(skb);
- return;
- }
-#ifdef CONFIG_NET_HW_FLOWCONTROL
- if (netdev_dropping)
- netdev_wakeup();
-#else
- netdev_dropping = 0;
-#endif
- skb_queue_tail(&backlog,skb);
- mark_bh(NET_BH);
- return;
- }
- netdev_dropping = 1;
- atomic_inc(&netdev_rx_dropped);
- kfree_skb(skb);
-}
-
-#ifdef CONFIG_BRIDGE
-static inline void handle_bridge(struct sk_buff *skb, unsigned short type)
-{
- /*
- * The br_stats.flags is checked here to save the expense of a
- * function call.
- */
- if ((br_stats.flags & BR_UP) && br_call_bridge(skb, type))
- {
- /*
- * We pass the bridge a complete frame. This means
- * recovering the MAC header first.
- */
-
- int offset;
-
- skb=skb_clone(skb, GFP_ATOMIC);
- if(skb==NULL)
- return;
-
- offset=skb->data-skb->mac.raw;
- skb_push(skb,offset); /* Put header back on for bridge */
-
- if(br_receive_frame(skb))
- return;
- kfree_skb(skb);
- }
- return;
-}
-#endif
-
-/*
- * When we are called the queue is ready to grab, the interrupts are
- * on and hardware can interrupt and queue to the receive queue as we
- * run with no problems.
- * This is run as a bottom half after an interrupt handler that does
- * mark_bh(NET_BH);
- */
-
-void net_bh(void)
-{
- struct packet_type *ptype;
- struct packet_type *pt_prev;
- unsigned short type;
-#ifndef _HURD_
- unsigned long start_time = jiffies;
-#ifdef CONFIG_CPU_IS_SLOW
- static unsigned long start_busy = 0;
- static unsigned long ave_busy = 0;
-
- if (start_busy == 0)
- start_busy = start_time;
- net_cpu_congestion = ave_busy>>8;
-#endif
-#endif
-
- NET_PROFILE_ENTER(net_bh);
- /*
- * Can we send anything now? We want to clear the
- * decks for any more sends that get done as we
- * process the input. This also minimises the
- * latency on a transmit interrupt bh.
- */
-
- if (qdisc_head.forw != &qdisc_head)
- qdisc_run_queues();
-
- /*
- * Any data left to process. This may occur because a
- * mark_bh() is done after we empty the queue including
- * that from the device which does a mark_bh() just after
- */
-
- /*
- * While the queue is not empty..
- *
- * Note that the queue never shrinks due to
- * an interrupt, so we can do this test without
- * disabling interrupts.
- */
-
- while (!skb_queue_empty(&backlog))
- {
- struct sk_buff * skb;
-
-#ifndef _HURD_
- /* Give chance to other bottom halves to run */
- if (jiffies - start_time > 1)
- goto net_bh_break;
-#endif
-
- /*
- * We have a packet. Therefore the queue has shrunk
- */
- skb = skb_dequeue(&backlog);
-
-#ifndef _HURD_
-#ifdef CONFIG_CPU_IS_SLOW
- if (ave_busy > 128*16) {
- kfree_skb(skb);
- while ((skb = skb_dequeue(&backlog)) != NULL)
- kfree_skb(skb);
- break;
- }
-#endif
-#endif
-
-
-#if 0
- NET_PROFILE_SKB_PASSED(skb, net_bh_skb);
-#endif
-#ifdef CONFIG_NET_FASTROUTE
- if (skb->pkt_type == PACKET_FASTROUTE) {
- dev_queue_xmit(skb);
- continue;
- }
-#endif
-
- /*
- * Bump the pointer to the next structure.
- *
- * On entry to the protocol layer. skb->data and
- * skb->nh.raw point to the MAC and encapsulated data
- */
-
- /* XXX until we figure out every place to modify.. */
- skb->h.raw = skb->nh.raw = skb->data;
-
- if (skb->mac.raw < skb->head || skb->mac.raw > skb->data) {
- printk(KERN_CRIT "%s: wrong mac.raw ptr, proto=%04x\n", skb->dev->name, skb->protocol);
- kfree_skb(skb);
- continue;
- }
-
- /*
- * Fetch the packet protocol ID.
- */
-
- type = skb->protocol;
-
-#ifdef CONFIG_BRIDGE
- /*
- * If we are bridging then pass the frame up to the
- * bridging code (if this protocol is to be bridged).
- * If it is bridged then move on
- */
- handle_bridge(skb, type);
-#endif
-
- /*
- * We got a packet ID. Now loop over the "known protocols"
- * list. There are two lists. The ptype_all list of taps (normally empty)
- * and the main protocol list which is hashed perfectly for normal protocols.
- */
-
- pt_prev = NULL;
- for (ptype = ptype_all; ptype!=NULL; ptype=ptype->next)
- {
- if (!ptype->dev || ptype->dev == skb->dev) {
- if(pt_prev)
- {
- struct sk_buff *skb2=skb_clone(skb, GFP_ATOMIC);
- if(skb2)
- pt_prev->func(skb2,skb->dev, pt_prev);
- }
- pt_prev=ptype;
- }
- }
-
- for (ptype = ptype_base[ntohs(type)&15]; ptype != NULL; ptype = ptype->next)
- {
- if (ptype->type == type && (!ptype->dev || ptype->dev==skb->dev))
- {
- /*
- * We already have a match queued. Deliver
- * to it and then remember the new match
- */
- if(pt_prev)
- {
- struct sk_buff *skb2;
-
- skb2=skb_clone(skb, GFP_ATOMIC);
-
- /*
- * Kick the protocol handler. This should be fast
- * and efficient code.
- */
-
- if(skb2)
- pt_prev->func(skb2, skb->dev, pt_prev);
- }
- /* Remember the current last to do */
- pt_prev=ptype;
- }
- } /* End of protocol list loop */
-
- /*
- * Is there a last item to send to ?
- */
-
- if(pt_prev) {
- pt_prev->func(skb, skb->dev, pt_prev);
- }
- /*
- * Has an unknown packet has been received ?
- */
-
- else {
- kfree_skb(skb);
- }
- } /* End of queue loop */
-
- /*
- * We have emptied the queue
- */
-
- /*
- * One last output flush.
- */
-
- if (qdisc_head.forw != &qdisc_head)
- qdisc_run_queues();
-
-#ifndef _HURD_
-#ifdef CONFIG_CPU_IS_SLOW
- if (1) {
- unsigned long start_idle = jiffies;
- ave_busy += ((start_idle - start_busy)<<3) - (ave_busy>>4);
- start_busy = 0;
- }
-#endif
-#endif
-#ifdef CONFIG_NET_HW_FLOWCONTROL
- if (netdev_dropping)
- netdev_wakeup();
-#else
- netdev_dropping = 0;
-#endif
- NET_PROFILE_LEAVE(net_bh);
- return;
-
-#ifndef _HURD_
-net_bh_break:
- mark_bh(NET_BH);
- NET_PROFILE_LEAVE(net_bh);
- return;
-#endif
-}
-
-/* Protocol dependent address dumping routines */
-
-static gifconf_func_t * gifconf_list [NPROTO];
-
-int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
-{
- if (family>=NPROTO)
- return -EINVAL;
- gifconf_list[family] = gifconf;
- return 0;
-}
-
-
-/*
- * Map an interface index to its name (SIOCGIFNAME)
- */
-
-/*
- * This call is useful, but I'd remove it too.
- *
- * The reason is purely aestetical, it is the only call
- * from SIOC* family using struct ifreq in reversed manner.
- * Besides that, it is pretty silly to put "drawing" facility
- * to kernel, it is useful only to print ifindices
- * in readable form, is not it? --ANK
- *
- * We need this ioctl for efficient implementation of the
- * if_indextoname() function required by the IPv6 API. Without
- * it, we would have to search all the interfaces to find a
- * match. --pb
- */
-
-static int dev_ifname(struct ifreq *arg)
-{
- struct device *dev;
- struct ifreq ifr;
- int err;
-
- /*
- * Fetch the caller's info block.
- */
-
- err = copy_from_user(&ifr, arg, sizeof(struct ifreq));
- if (err)
- return -EFAULT;
-
- dev = dev_get_by_index(ifr.ifr_ifindex);
- if (!dev)
- return -ENODEV;
-
- strcpy(ifr.ifr_name, dev->name);
-
- err = copy_to_user(arg, &ifr, sizeof(struct ifreq));
- return (err)?-EFAULT:0;
-}
-
-/*
- * Perform a SIOCGIFCONF call. This structure will change
- * size eventually, and there is nothing I can do about it.
- * Thus we will need a 'compatibility mode'.
- */
-
-#ifdef _HURD_
-int dev_ifconf(char *arg)
-#else
-static int dev_ifconf(char *arg)
-#endif
-{
- struct ifconf ifc;
- struct device *dev;
- char *pos;
- int len;
- int total;
- int i;
-
- /*
- * Fetch the caller's info block.
- */
-
- if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
- return -EFAULT;
-
- pos = ifc.ifc_buf;
- len = ifc.ifc_len;
-
- /*
- * Loop over the interfaces, and write an info block for each.
- */
-
- total = 0;
- for (dev = dev_base; dev != NULL; dev = dev->next) {
- for (i=0; i<NPROTO; i++) {
- if (gifconf_list[i]) {
- int done;
- if (pos==NULL) {
- done = gifconf_list[i](dev, NULL, 0);
- } else {
- done = gifconf_list[i](dev, pos+total, len-total);
- }
- if (done<0)
- return -EFAULT;
- total += done;
- }
- }
- }
-
- /*
- * All done. Write the updated control block back to the caller.
- */
- ifc.ifc_len = total;
-
- if (copy_to_user(arg, &ifc, sizeof(struct ifconf)))
- return -EFAULT;
-
- /*
- * Both BSD and Solaris return 0 here, so we do too.
- */
- return 0;
-}
-
-/*
- * This is invoked by the /proc filesystem handler to display a device
- * in detail.
- */
-
-#ifdef CONFIG_PROC_FS
-static int sprintf_stats(char *buffer, struct device *dev)
-{
- struct net_device_stats *stats = (dev->get_stats ? dev->get_stats(dev): NULL);
- int size;
-
- if (stats)
- size = sprintf(buffer, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu %8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
- dev->name,
- stats->rx_bytes,
- stats->rx_packets, stats->rx_errors,
- stats->rx_dropped + stats->rx_missed_errors,
- stats->rx_fifo_errors,
- stats->rx_length_errors + stats->rx_over_errors
- + stats->rx_crc_errors + stats->rx_frame_errors,
- stats->rx_compressed, stats->multicast,
- stats->tx_bytes,
- stats->tx_packets, stats->tx_errors, stats->tx_dropped,
- stats->tx_fifo_errors, stats->collisions,
- stats->tx_carrier_errors + stats->tx_aborted_errors
- + stats->tx_window_errors + stats->tx_heartbeat_errors,
- stats->tx_compressed);
- else
- size = sprintf(buffer, "%6s: No statistics available.\n", dev->name);
-
- return size;
-}
-
-/*
- * Called from the PROCfs module. This now uses the new arbitrary sized /proc/net interface
- * to create /proc/net/dev
- */
-
-int dev_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
-{
- int len=0;
- off_t begin=0;
- off_t pos=0;
- int size;
-
- struct device *dev;
-
-
- size = sprintf(buffer,
- "Inter-| Receive | Transmit\n"
- " face |bytes packets errs drop fifo frame compressed multicast|bytes packets errs drop fifo colls carrier compressed\n");
-
- pos+=size;
- len+=size;
-
-
- for (dev = dev_base; dev != NULL; dev = dev->next)
- {
- size = sprintf_stats(buffer+len, dev);
- len+=size;
- pos=begin+len;
-
- if(pos<offset)
- {
- len=0;
- begin=pos;
- }
- if(pos>offset+length)
- break;
- }
-
- *start=buffer+(offset-begin); /* Start of wanted data */
- len-=(offset-begin); /* Start slop */
- if(len>length)
- len=length; /* Ending slop */
- return len;
-}
-
-static int dev_proc_stats(char *buffer, char **start, off_t offset,
- int length, int *eof, void *data)
-{
- int len;
-
- len = sprintf(buffer, "%08x %08x %08x %08x %08x\n",
- atomic_read(&netdev_rx_dropped),
-#ifdef CONFIG_NET_HW_FLOWCONTROL
- netdev_throttle_events,
-#else
- 0,
-#endif
-#ifdef CONFIG_NET_FASTROUTE
- dev_fastroute_stat.hits,
- dev_fastroute_stat.succeed,
- dev_fastroute_stat.deferred
-#else
- 0, 0, 0
-#endif
- );
-
- len -= offset;
-
- if (len > length)
- len = length;
- if(len < 0)
- len = 0;
-
- *start = buffer + offset;
- *eof = 1;
-
- return len;
-}
-
-#endif /* CONFIG_PROC_FS */
-
-
-#ifdef CONFIG_NET_RADIO
-#ifdef CONFIG_PROC_FS
-
-/*
- * Print one entry of /proc/net/wireless
- * This is a clone of /proc/net/dev (just above)
- */
-static int sprintf_wireless_stats(char *buffer, struct device *dev)
-{
- /* Get stats from the driver */
- struct iw_statistics *stats = (dev->get_wireless_stats ?
- dev->get_wireless_stats(dev) :
- (struct iw_statistics *) NULL);
- int size;
-
- if(stats != (struct iw_statistics *) NULL)
- {
- size = sprintf(buffer,
- "%6s: %04x %3d%c %3d%c %3d%c %6d %6d %6d\n",
- dev->name,
- stats->status,
- stats->qual.qual,
- stats->qual.updated & 1 ? '.' : ' ',
- stats->qual.level,
- stats->qual.updated & 2 ? '.' : ' ',
- stats->qual.noise,
- stats->qual.updated & 4 ? '.' : ' ',
- stats->discard.nwid,
- stats->discard.code,
- stats->discard.misc);
- stats->qual.updated = 0;
- }
- else
- size = 0;
-
- return size;
-}
-
-/*
- * Print info for /proc/net/wireless (print all entries)
- * This is a clone of /proc/net/dev (just above)
- */
-int dev_get_wireless_info(char * buffer, char **start, off_t offset,
- int length, int dummy)
-{
- int len = 0;
- off_t begin = 0;
- off_t pos = 0;
- int size;
-
- struct device * dev;
-
- size = sprintf(buffer,
- "Inter-| sta-| Quality | Discarded packets\n"
- " face | tus | link level noise | nwid crypt misc\n"
- );
-
- pos+=size;
- len+=size;
-
- for(dev = dev_base; dev != NULL; dev = dev->next)
- {
- size = sprintf_wireless_stats(buffer+len, dev);
- len+=size;
- pos=begin+len;
-
- if(pos < offset)
- {
- len=0;
- begin=pos;
- }
- if(pos > offset + length)
- break;
- }
-
- *start = buffer + (offset - begin); /* Start of wanted data */
- len -= (offset - begin); /* Start slop */
- if(len > length)
- len = length; /* Ending slop */
-
- return len;
-}
-#endif /* CONFIG_PROC_FS */
-#endif /* CONFIG_NET_RADIO */
-
-void dev_set_promiscuity(struct device *dev, int inc)
-{
- unsigned short old_flags = dev->flags;
-
- dev->flags |= IFF_PROMISC;
- if ((dev->promiscuity += inc) == 0)
- dev->flags &= ~IFF_PROMISC;
- if (dev->flags^old_flags) {
-#ifdef CONFIG_NET_FASTROUTE
- if (dev->flags&IFF_PROMISC) {
- netdev_fastroute_obstacles++;
- dev_clear_fastroute(dev);
- } else
- netdev_fastroute_obstacles--;
-#endif
- dev_mc_upload(dev);
- printk(KERN_INFO "device %s %s promiscuous mode\n",
- dev->name, (dev->flags&IFF_PROMISC) ? "entered" : "left");
- }
-}
-
-void dev_set_allmulti(struct device *dev, int inc)
-{
- unsigned short old_flags = dev->flags;
-
- dev->flags |= IFF_ALLMULTI;
- if ((dev->allmulti += inc) == 0)
- dev->flags &= ~IFF_ALLMULTI;
- if (dev->flags^old_flags)
- dev_mc_upload(dev);
-}
-
-int dev_change_flags(struct device *dev, unsigned flags)
-{
- int ret;
- int old_flags = dev->flags;
-
- /*
- * Set the flags on our device.
- */
-
- dev->flags = (flags & (IFF_DEBUG|IFF_NOTRAILERS|IFF_RUNNING|IFF_NOARP|
- IFF_SLAVE|IFF_MASTER|IFF_DYNAMIC|
- IFF_MULTICAST|IFF_PORTSEL|IFF_AUTOMEDIA)) |
- (dev->flags & (IFF_UP|IFF_VOLATILE|IFF_PROMISC|IFF_ALLMULTI));
-
- /*
- * Load in the correct multicast list now the flags have changed.
- */
-
- dev_mc_upload(dev);
-
- /*
- * Have we downed the interface. We handle IFF_UP ourselves
- * according to user attempts to set it, rather than blindly
- * setting it.
- */
-
- ret = 0;
- if ((old_flags^flags)&IFF_UP) /* Bit is different ? */
- {
- ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
-
- if (ret == 0)
- dev_mc_upload(dev);
- }
-
- if (dev->flags&IFF_UP &&
- ((old_flags^dev->flags)&~(IFF_UP|IFF_RUNNING|IFF_PROMISC|IFF_ALLMULTI|IFF_VOLATILE)))
- notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
-
- if ((flags^dev->gflags)&IFF_PROMISC) {
- int inc = (flags&IFF_PROMISC) ? +1 : -1;
- dev->gflags ^= IFF_PROMISC;
- dev_set_promiscuity(dev, inc);
- }
-
- /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
- is important. Some (broken) drivers set IFF_PROMISC, when
- IFF_ALLMULTI is requested not asking us and not reporting.
- */
- if ((flags^dev->gflags)&IFF_ALLMULTI) {
- int inc = (flags&IFF_ALLMULTI) ? +1 : -1;
- dev->gflags ^= IFF_ALLMULTI;
- dev_set_allmulti(dev, inc);
- }
-
- return ret;
-}
-
-#ifdef _HURD_
-
-#define dev_ioctl 0
-
-#else
-
-/*
- * Perform the SIOCxIFxxx calls.
- */
-
-static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
-{
- struct device *dev;
- int err;
-
- if ((dev = dev_get(ifr->ifr_name)) == NULL)
- return -ENODEV;
-
- switch(cmd)
- {
- case SIOCGIFFLAGS: /* Get interface flags */
- ifr->ifr_flags = (dev->flags&~(IFF_PROMISC|IFF_ALLMULTI))
- |(dev->gflags&(IFF_PROMISC|IFF_ALLMULTI));
- return 0;
-
- case SIOCSIFFLAGS: /* Set interface flags */
- return dev_change_flags(dev, ifr->ifr_flags);
-
- case SIOCGIFMETRIC: /* Get the metric on the interface (currently unused) */
- ifr->ifr_metric = 0;
- return 0;
-
- case SIOCSIFMETRIC: /* Set the metric on the interface (currently unused) */
- return -EOPNOTSUPP;
-
- case SIOCGIFMTU: /* Get the MTU of a device */
- ifr->ifr_mtu = dev->mtu;
- return 0;
-
- case SIOCSIFMTU: /* Set the MTU of a device */
- if (ifr->ifr_mtu == dev->mtu)
- return 0;
-
- /*
- * MTU must be positive.
- */
-
- if (ifr->ifr_mtu<=0)
- return -EINVAL;
-
- if (dev->change_mtu)
- err = dev->change_mtu(dev, ifr->ifr_mtu);
- else {
- dev->mtu = ifr->ifr_mtu;
- err = 0;
- }
- if (!err && dev->flags&IFF_UP)
- notifier_call_chain(&netdev_chain, NETDEV_CHANGEMTU, dev);
- return err;
-
- case SIOCGIFHWADDR:
- memcpy(ifr->ifr_hwaddr.sa_data,dev->dev_addr, MAX_ADDR_LEN);
- ifr->ifr_hwaddr.sa_family=dev->type;
- return 0;
-
- case SIOCSIFHWADDR:
- if(dev->set_mac_address==NULL)
- return -EOPNOTSUPP;
- if(ifr->ifr_hwaddr.sa_family!=dev->type)
- return -EINVAL;
- err=dev->set_mac_address(dev,&ifr->ifr_hwaddr);
- if (!err)
- notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev);
- return err;
-
- case SIOCSIFHWBROADCAST:
- if(ifr->ifr_hwaddr.sa_family!=dev->type)
- return -EINVAL;
- memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, MAX_ADDR_LEN);
- notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev);
- return 0;
-
- case SIOCGIFMAP:
- ifr->ifr_map.mem_start=dev->mem_start;
- ifr->ifr_map.mem_end=dev->mem_end;
- ifr->ifr_map.base_addr=dev->base_addr;
- ifr->ifr_map.irq=dev->irq;
- ifr->ifr_map.dma=dev->dma;
- ifr->ifr_map.port=dev->if_port;
- return 0;
-
- case SIOCSIFMAP:
- if (dev->set_config)
- return dev->set_config(dev,&ifr->ifr_map);
- return -EOPNOTSUPP;
-
- case SIOCADDMULTI:
- if(dev->set_multicast_list==NULL ||
- ifr->ifr_hwaddr.sa_family!=AF_UNSPEC)
- return -EINVAL;
- dev_mc_add(dev,ifr->ifr_hwaddr.sa_data, dev->addr_len, 1);
- return 0;
-
- case SIOCDELMULTI:
- if(dev->set_multicast_list==NULL ||
- ifr->ifr_hwaddr.sa_family!=AF_UNSPEC)
- return -EINVAL;
- dev_mc_delete(dev,ifr->ifr_hwaddr.sa_data,dev->addr_len, 1);
- return 0;
-
- case SIOCGIFINDEX:
- ifr->ifr_ifindex = dev->ifindex;
- return 0;
-
- case SIOCGIFTXQLEN:
- ifr->ifr_qlen = dev->tx_queue_len;
- return 0;
-
- case SIOCSIFTXQLEN:
- if(ifr->ifr_qlen<0)
- return -EINVAL;
- dev->tx_queue_len = ifr->ifr_qlen;
- return 0;
-
- case SIOCSIFNAME:
- if (dev->flags&IFF_UP)
- return -EBUSY;
- if (dev_get(ifr->ifr_newname))
- return -EEXIST;
- memcpy(dev->name, ifr->ifr_newname, IFNAMSIZ);
- dev->name[IFNAMSIZ-1] = 0;
- notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
- return 0;
-
- /*
- * Unknown or private ioctl
- */
-
- default:
- if(cmd >= SIOCDEVPRIVATE &&
- cmd <= SIOCDEVPRIVATE + 15) {
- if (dev->do_ioctl)
- return dev->do_ioctl(dev, ifr, cmd);
- return -EOPNOTSUPP;
- }
-
-#ifdef CONFIG_NET_RADIO
- if(cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
- if (dev->do_ioctl)
- return dev->do_ioctl(dev, ifr, cmd);
- return -EOPNOTSUPP;
- }
-#endif /* CONFIG_NET_RADIO */
-
- }
- return -EINVAL;
-}
-
-
-/*
- * This function handles all "interface"-type I/O control requests. The actual
- * 'doing' part of this is dev_ifsioc above.
- */
-
-int dev_ioctl(unsigned int cmd, void *arg)
-{
- struct ifreq ifr;
- int ret;
- char *colon;
-
- /* One special case: SIOCGIFCONF takes ifconf argument
- and requires shared lock, because it sleeps writing
- to user space.
- */
-
- if (cmd == SIOCGIFCONF) {
- rtnl_shlock();
- ret = dev_ifconf((char *) arg);
- rtnl_shunlock();
- return ret;
- }
- if (cmd == SIOCGIFNAME) {
- return dev_ifname((struct ifreq *)arg);
- }
-
- if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
- return -EFAULT;
-
- ifr.ifr_name[IFNAMSIZ-1] = 0;
-
- colon = strchr(ifr.ifr_name, ':');
- if (colon)
- *colon = 0;
-
- /*
- * See which interface the caller is talking about.
- */
-
- switch(cmd)
- {
- /*
- * These ioctl calls:
- * - can be done by all.
- * - atomic and do not require locking.
- * - return a value
- */
-
- case SIOCGIFFLAGS:
- case SIOCGIFMETRIC:
- case SIOCGIFMTU:
- case SIOCGIFHWADDR:
- case SIOCGIFSLAVE:
- case SIOCGIFMAP:
- case SIOCGIFINDEX:
- case SIOCGIFTXQLEN:
- dev_load(ifr.ifr_name);
- ret = dev_ifsioc(&ifr, cmd);
- if (!ret) {
- if (colon)
- *colon = ':';
- if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
- return -EFAULT;
- }
- return ret;
-
- /*
- * These ioctl calls:
- * - require superuser power.
- * - require strict serialization.
- * - do not return a value
- */
-
- case SIOCSIFFLAGS:
- case SIOCSIFMETRIC:
- case SIOCSIFMTU:
- case SIOCSIFMAP:
- case SIOCSIFHWADDR:
- case SIOCSIFSLAVE:
- case SIOCADDMULTI:
- case SIOCDELMULTI:
- case SIOCSIFHWBROADCAST:
- case SIOCSIFTXQLEN:
- case SIOCSIFNAME:
- if (!capable(CAP_NET_ADMIN))
- return -EPERM;
- dev_load(ifr.ifr_name);
- rtnl_lock();
- ret = dev_ifsioc(&ifr, cmd);
- rtnl_unlock();
- return ret;
-
- case SIOCGIFMEM:
- /* Get the per device memory space. We can add this but currently
- do not support it */
- case SIOCSIFMEM:
- /* Set the per device memory buffer space. Not applicable in our case */
- case SIOCSIFLINK:
- return -EINVAL;
-
- /*
- * Unknown or private ioctl.
- */
-
- default:
- if (cmd >= SIOCDEVPRIVATE &&
- cmd <= SIOCDEVPRIVATE + 15) {
- dev_load(ifr.ifr_name);
- rtnl_lock();
- ret = dev_ifsioc(&ifr, cmd);
- rtnl_unlock();
- if (!ret && copy_to_user(arg, &ifr, sizeof(struct ifreq)))
- return -EFAULT;
- return ret;
- }
-#ifdef CONFIG_NET_RADIO
- if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
- dev_load(ifr.ifr_name);
- if (IW_IS_SET(cmd)) {
- if (!suser())
- return -EPERM;
- rtnl_lock();
- }
- ret = dev_ifsioc(&ifr, cmd);
- if (IW_IS_SET(cmd))
- rtnl_unlock();
- if (!ret && IW_IS_GET(cmd) &&
- copy_to_user(arg, &ifr, sizeof(struct ifreq)))
- return -EFAULT;
- return ret;
- }
-#endif /* CONFIG_NET_RADIO */
- return -EINVAL;
- }
-}
-
-#endif
-
-int dev_new_index(void)
-{
- static int ifindex;
- for (;;) {
- if (++ifindex <= 0)
- ifindex=1;
- if (dev_get_by_index(ifindex) == NULL)
- return ifindex;
- }
-}
-
-static int dev_boot_phase = 1;
-
-
-int register_netdevice(struct device *dev)
-{
- struct device *d, **dp;
-
- if (dev_boot_phase) {
- /* This is NOT bug, but I am not sure, that all the
- devices, initialized before netdev module is started
- are sane.
-
- Now they are chained to device boot list
- and probed later. If a module is initialized
- before netdev, but assumes that dev->init
- is really called by register_netdev(), it will fail.
-
- So that this message should be printed for a while.
- */
- printk(KERN_INFO "early initialization of device %s is deferred\n", dev->name);
-
- /* Check for existence, and append to tail of chain */
- for (dp=&dev_base; (d=*dp) != NULL; dp=&d->next) {
- if (d == dev || strcmp(d->name, dev->name) == 0)
- return -EEXIST;
- }
- dev->next = NULL;
- *dp = dev;
- return 0;
- }
-
- dev->iflink = -1;
-
- /* Init, if this function is available */
- if (dev->init && dev->init(dev) != 0)
- return -EIO;
-
- /* Check for existence, and append to tail of chain */
- for (dp=&dev_base; (d=*dp) != NULL; dp=&d->next) {
- if (d == dev || strcmp(d->name, dev->name) == 0)
- return -EEXIST;
- }
- dev->next = NULL;
- dev_init_scheduler(dev);
- dev->ifindex = dev_new_index();
- if (dev->iflink == -1)
- dev->iflink = dev->ifindex;
- *dp = dev;
-
- /* Notify protocols, that a new device appeared. */
- notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
-
- return 0;
-}
-
-int unregister_netdevice(struct device *dev)
-{
- struct device *d, **dp;
-
- if (dev_boot_phase == 0) {
- /* If device is running, close it.
- It is very bad idea, really we should
- complain loudly here, but random hackery
- in linux/drivers/net likes it.
- */
- if (dev->flags & IFF_UP)
- dev_close(dev);
-
-#ifdef CONFIG_NET_FASTROUTE
- dev_clear_fastroute(dev);
-#endif
-
- /* Shutdown queueing discipline. */
- dev_shutdown(dev);
-
- /* Notify protocols, that we are about to destroy
- this device. They should clean all the things.
- */
- notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
-
- /*
- * Flush the multicast chain
- */
- dev_mc_discard(dev);
-
- /* To avoid pointers looking to nowhere,
- we wait for end of critical section */
- dev_lock_wait();
- }
-
- /* And unlink it from device chain. */
- for (dp = &dev_base; (d=*dp) != NULL; dp=&d->next) {
- if (d == dev) {
- *dp = d->next;
- synchronize_bh();
- d->next = NULL;
-
- if (dev->destructor)
- dev->destructor(dev);
- return 0;
- }
- }
- return -ENODEV;
-}
-
-
-/*
- * Initialize the DEV module. At boot time this walks the device list and
- * unhooks any devices that fail to initialise (normally hardware not
- * present) and leaves us with a valid list of present and active devices.
- *
- */
-extern int lance_init(void);
-extern int bpq_init(void);
-extern int scc_init(void);
-extern void sdla_setup(void);
-extern void sdla_c_setup(void);
-extern void dlci_setup(void);
-extern int dmascc_init(void);
-extern int sm_init(void);
-
-extern int baycom_ser_fdx_init(void);
-extern int baycom_ser_hdx_init(void);
-extern int baycom_par_init(void);
-
-extern int lapbeth_init(void);
-extern int comx_init(void);
-extern void arcnet_init(void);
-extern void ip_auto_config(void);
-#ifdef CONFIG_8xx
-extern int cpm_enet_init(void);
-#endif /* CONFIG_8xx */
-
-#ifdef CONFIG_PROC_FS
-static struct proc_dir_entry proc_net_dev = {
- PROC_NET_DEV, 3, "dev",
- S_IFREG | S_IRUGO, 1, 0, 0,
- 0, &proc_net_inode_operations,
- dev_get_info
-};
-#endif
-
-#ifdef CONFIG_NET_RADIO
-#ifdef CONFIG_PROC_FS
-static struct proc_dir_entry proc_net_wireless = {
- PROC_NET_WIRELESS, 8, "wireless",
- S_IFREG | S_IRUGO, 1, 0, 0,
- 0, &proc_net_inode_operations,
- dev_get_wireless_info
-};
-#endif /* CONFIG_PROC_FS */
-#endif /* CONFIG_NET_RADIO */
-
-__initfunc(int net_dev_init(void))
-{
- struct device *dev, **dp;
-
-#ifdef CONFIG_NET_SCHED
- pktsched_init();
-#endif
-
- /*
- * Initialise the packet receive queue.
- */
-
- skb_queue_head_init(&backlog);
-
- /*
- * The bridge has to be up before the devices
- */
-
-#ifdef CONFIG_BRIDGE
- br_init();
-#endif
-
- /*
- * This is Very Ugly(tm).
- *
- * Some devices want to be initialized early..
- */
-
-#if defined(CONFIG_SCC)
- scc_init();
-#endif
-#if defined(CONFIG_DMASCC)
- dmascc_init();
-#endif
-#if defined(CONFIG_BPQETHER)
- bpq_init();
-#endif
-#if defined(CONFIG_DLCI)
- dlci_setup();
-#endif
-#if defined(CONFIG_SDLA)
- sdla_c_setup();
-#endif
-#if defined(CONFIG_BAYCOM_PAR)
- baycom_par_init();
-#endif
-#if defined(CONFIG_BAYCOM_SER_FDX)
- baycom_ser_fdx_init();
-#endif
-#if defined(CONFIG_BAYCOM_SER_HDX)
- baycom_ser_hdx_init();
-#endif
-#if defined(CONFIG_SOUNDMODEM)
- sm_init();
-#endif
-#if defined(CONFIG_LAPBETHER)
- lapbeth_init();
-#endif
-#if defined(CONFIG_PLIP)
- plip_init();
-#endif
-#if defined(CONFIG_ARCNET)
- arcnet_init();
-#endif
-#if defined(CONFIG_8xx)
- cpm_enet_init();
-#endif
-#if defined(CONFIG_COMX)
- comx_init();
-#endif
- /*
- * SLHC if present needs attaching so other people see it
- * even if not opened.
- */
-
-#ifdef CONFIG_INET
-#if (defined(CONFIG_SLIP) && defined(CONFIG_SLIP_COMPRESSED)) \
- || defined(CONFIG_PPP) \
- || (defined(CONFIG_ISDN) && defined(CONFIG_ISDN_PPP))
- slhc_install();
-#endif
-#endif
-
-#ifdef CONFIG_NET_PROFILE
- net_profile_init();
- NET_PROFILE_REGISTER(dev_queue_xmit);
- NET_PROFILE_REGISTER(net_bh);
-#if 0
- NET_PROFILE_REGISTER(net_bh_skb);
-#endif
-#endif
- /*
- * Add the devices.
- * If the call to dev->init fails, the dev is removed
- * from the chain disconnecting the device until the
- * next reboot.
- */
-
- dp = &dev_base;
- while ((dev = *dp) != NULL)
- {
- dev->iflink = -1;
- if (dev->init && dev->init(dev))
- {
- /*
- * It failed to come up. Unhook it.
- */
- *dp = dev->next;
- synchronize_bh();
- }
- else
- {
- dp = &dev->next;
- dev->ifindex = dev_new_index();
- if (dev->iflink == -1)
- dev->iflink = dev->ifindex;
- dev_init_scheduler(dev);
- }
- }
-
-#ifdef CONFIG_PROC_FS
- proc_net_register(&proc_net_dev);
- {
- struct proc_dir_entry *ent = create_proc_entry("net/dev_stat", 0, 0);
- ent->read_proc = dev_proc_stats;
- }
-#endif
-
-#ifdef CONFIG_NET_RADIO
-#ifdef CONFIG_PROC_FS
- proc_net_register(&proc_net_wireless);
-#endif /* CONFIG_PROC_FS */
-#endif /* CONFIG_NET_RADIO */
-
- init_bh(NET_BH, net_bh);
-
- dev_boot_phase = 0;
-
- dev_mcast_init();
-
-#ifdef CONFIG_BRIDGE
- /*
- * Register any statically linked ethernet devices with the bridge
- */
- br_spacedevice_register();
-#endif
-
-#ifdef CONFIG_IP_PNP
- ip_auto_config();
-#endif
-
- return 0;
-}
diff --git a/pfinet.old/linux-src/net/ipv4/ip_output.c~ b/pfinet.old/linux-src/net/ipv4/ip_output.c~
deleted file mode 100644
index 89272d6b..00000000
--- a/pfinet.old/linux-src/net/ipv4/ip_output.c~
+++ /dev/null
@@ -1,1000 +0,0 @@
-/*
- * INET An implementation of the TCP/IP protocol suite for the LINUX
- * operating system. INET is implemented using the BSD Socket
- * interface as the means of communication with the user level.
- *
- * The Internet Protocol (IP) output module.
- *
- * Version: $Id: ip_output.c,v 1.67.2.1 1999/09/07 02:25:23 davem Exp $
- *
- * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
- * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
- * Donald Becker, <becker@super.org>
- * Alan Cox, <Alan.Cox@linux.org>
- * Richard Underwood
- * Stefan Becker, <stefanb@yello.ping.de>
- * Jorge Cwik, <jorge@laser.satlink.net>
- * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
- *
- * See ip_input.c for original log
- *
- * Fixes:
- * Alan Cox : Missing nonblock feature in ip_build_xmit.
- * Mike Kilburn : htons() missing in ip_build_xmit.
- * Bradford Johnson: Fix faulty handling of some frames when
- * no route is found.
- * Alexander Demenshin: Missing sk/skb free in ip_queue_xmit
- * (in case if packet not accepted by
- * output firewall rules)
- * Mike McLagan : Routing by source
- * Alexey Kuznetsov: use new route cache
- * Andi Kleen: Fix broken PMTU recovery and remove
- * some redundant tests.
- * Vitaly E. Lavrov : Transparent proxy revived after year coma.
- * Andi Kleen : Replace ip_reply with ip_send_reply.
- * Andi Kleen : Split fast and slow ip_build_xmit path
- * for decreased register pressure on x86
- * and more readibility.
- * Marc Boucher : When call_out_firewall returns FW_QUEUE,
- * silently drop skb instead of failing with -EPERM.
- */
-
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/config.h>
-
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/in.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/proc_fs.h>
-#include <linux/stat.h>
-#include <linux/init.h>
-
-#include <net/snmp.h>
-#include <net/ip.h>
-#include <net/protocol.h>
-#include <net/route.h>
-#include <net/tcp.h>
-#include <net/udp.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <net/arp.h>
-#include <net/icmp.h>
-#include <net/raw.h>
-#include <net/checksum.h>
-#include <linux/igmp.h>
-#include <linux/ip_fw.h>
-#include <linux/firewall.h>
-#include <linux/mroute.h>
-#include <linux/netlink.h>
-
-/*
- * Shall we try to damage output packets if routing dev changes?
- */
-
-int sysctl_ip_dynaddr = 0;
-
-
-int ip_id_count = 0;
-
-/* Generate a checksum for an outgoing IP datagram. */
-__inline__ void ip_send_check(struct iphdr *iph)
-{
- iph->check = 0;
- iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
-}
-
-/*
- * Add an ip header to a skbuff and send it out.
- */
-void ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
- u32 saddr, u32 daddr, struct ip_options *opt)
-{
- struct rtable *rt = (struct rtable *)skb->dst;
- struct iphdr *iph;
- struct device *dev;
-
- /* Build the IP header. */
- if (opt)
- iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr) + opt->optlen);
- else
- iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr));
-
- iph->version = 4;
- iph->ihl = 5;
- iph->tos = sk->ip_tos;
- iph->frag_off = 0;
- if (ip_dont_fragment(sk, &rt->u.dst))
- iph->frag_off |= htons(IP_DF);
- iph->ttl = sk->ip_ttl;
- iph->daddr = rt->rt_dst;
- iph->saddr = rt->rt_src;
- iph->protocol = sk->protocol;
- iph->tot_len = htons(skb->len);
- iph->id = htons(ip_id_count++);
- skb->nh.iph = iph;
-
- if (opt && opt->optlen) {
- iph->ihl += opt->optlen>>2;
- ip_options_build(skb, opt, daddr, rt, 0);
- }
-
- dev = rt->u.dst.dev;
-
-#ifdef CONFIG_FIREWALL
- /* Now we have no better mechanism to notify about error. */
- switch (call_out_firewall(PF_INET, dev, iph, NULL, &skb)) {
- case FW_REJECT:
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
- /* Fall thru... */
- case FW_BLOCK:
- case FW_QUEUE:
- kfree_skb(skb);
- return;
- }
-#endif
-
- ip_send_check(iph);
-
- /* Send it out. */
- skb->dst->output(skb);
- return;
-}
-
-int __ip_finish_output(struct sk_buff *skb)
-{
- return ip_finish_output(skb);
-}
-
-int ip_mc_output(struct sk_buff *skb)
-{
- struct sock *sk = skb->sk;
- struct rtable *rt = (struct rtable*)skb->dst;
- struct device *dev = rt->u.dst.dev;
-
- /*
- * If the indicated interface is up and running, send the packet.
- */
-
- ip_statistics.IpOutRequests++;
-#ifdef CONFIG_IP_ROUTE_NAT
- if (rt->rt_flags & RTCF_NAT)
- ip_do_nat(skb);
-#endif
-
- skb->dev = dev;
- skb->protocol = __constant_htons(ETH_P_IP);
-
- /*
- * Multicasts are looped back for other local users
- */
-
- if (rt->rt_flags&RTCF_MULTICAST && (!sk || sk->ip_mc_loop)) {
-#ifdef CONFIG_IP_MROUTE
- /* Small optimization: do not loopback not local frames,
- which returned after forwarding; they will be dropped
- by ip_mr_input in any case.
- Note, that local frames are looped back to be delivered
- to local recipients.
-
- This check is duplicated in ip_mr_input at the moment.
- */
- if ((rt->rt_flags&RTCF_LOCAL) || !(IPCB(skb)->flags&IPSKB_FORWARDED))
-#endif
- dev_loopback_xmit(skb);
-
- /* Multicasts with ttl 0 must not go beyond the host */
-
- if (skb->nh.iph->ttl == 0) {
- kfree_skb(skb);
- return 0;
- }
- }
-
- if (rt->rt_flags&RTCF_BROADCAST)
- dev_loopback_xmit(skb);
-
- return ip_finish_output(skb);
-}
-
-int ip_output(struct sk_buff *skb)
-{
- char *str1 = "pfinet ip_output check point 1\n";
- char *str2 = "pfinet ip_output check point 2\n";
- int stderr_fd = fileno (stderr);
- int ret;
-
-#ifdef CONFIG_IP_ROUTE_NAT
- struct rtable *rt = (struct rtable*)skb->dst;
-#endif
-
- ip_statistics.IpOutRequests++;
-
-#ifdef CONFIG_IP_ROUTE_NAT
- if (rt->rt_flags&RTCF_NAT)
- ip_do_nat(skb);
-#endif
-
- write (stderr_fd, str1, strlen (str1) + 1);
- fflush (stderr);
- ret = ip_finish_output(skb);
- return ret;
-}
-
-/* Queues a packet to be sent, and starts the transmitter if necessary.
- * This routine also needs to put in the total length and compute the
- * checksum. We use to do this in two stages, ip_build_header() then
- * this, but that scheme created a mess when routes disappeared etc.
- * So we do it all here, and the TCP send engine has been changed to
- * match. (No more unroutable FIN disasters, etc. wheee...) This will
- * most likely make other reliable transport layers above IP easier
- * to implement under Linux.
- */
-void ip_queue_xmit(struct sk_buff *skb)
-{
- struct sock *sk = skb->sk;
- struct ip_options *opt = sk->opt;
- struct rtable *rt;
- struct device *dev;
- struct iphdr *iph;
- unsigned int tot_len;
-
- /* Make sure we can route this packet. */
- rt = (struct rtable *) sk->dst_cache;
- if(rt == NULL || rt->u.dst.obsolete) {
- u32 daddr;
-
- sk->dst_cache = NULL;
- ip_rt_put(rt);
-
- /* Use correct destination address if we have options. */
- daddr = sk->daddr;
- if(opt && opt->srr)
- daddr = opt->faddr;
-
- /* If this fails, retransmit mechanism of transport layer will
- * keep trying until route appears or the connection times itself
- * out.
- */
- if(ip_route_output(&rt, daddr, sk->saddr,
- RT_TOS(sk->ip_tos) | RTO_CONN | sk->localroute,
- sk->bound_dev_if))
- goto drop;
- sk->dst_cache = &rt->u.dst;
- }
- if(opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
- goto no_route;
-
- /* We have a route, so grab a reference. */
- skb->dst = dst_clone(sk->dst_cache);
-
- /* OK, we know where to send it, allocate and build IP header. */
- iph = (struct iphdr *) skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
- iph->version = 4;
- iph->ihl = 5;
- iph->tos = sk->ip_tos;
- iph->frag_off = 0;
- iph->ttl = sk->ip_ttl;
- iph->daddr = rt->rt_dst;
- iph->saddr = rt->rt_src;
- iph->protocol = sk->protocol;
- skb->nh.iph = iph;
- /* Transport layer set skb->h.foo itself. */
-
- if(opt && opt->optlen) {
- iph->ihl += opt->optlen >> 2;
- ip_options_build(skb, opt, sk->daddr, rt, 0);
- }
-
- tot_len = skb->len;
- iph->tot_len = htons(tot_len);
- iph->id = htons(ip_id_count++);
-
- dev = rt->u.dst.dev;
-
-#ifdef CONFIG_FIREWALL
- /* Now we have no better mechanism to notify about error. */
- switch (call_out_firewall(PF_INET, dev, iph, NULL, &skb)) {
- case FW_REJECT:
- start_bh_atomic();
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
- end_bh_atomic();
- /* Fall thru... */
- case FW_BLOCK:
- case FW_QUEUE:
- goto drop;
- }
-#endif
-
- /* This can happen when the transport layer has segments queued
- * with a cached route, and by the time we get here things are
- * re-routed to a device with a different MTU than the original
- * device. Sick, but we must cover it.
- */
- if (skb_headroom(skb) < dev->hard_header_len && dev->hard_header) {
- struct sk_buff *skb2;
-
- skb2 = skb_realloc_headroom(skb, (dev->hard_header_len + 15) & ~15);
- kfree_skb(skb);
- if (skb2 == NULL)
- return;
- if (sk)
- skb_set_owner_w(skb2, sk);
- skb = skb2;
- iph = skb->nh.iph;
- }
-
- /* Do we need to fragment. Again this is inefficient. We
- * need to somehow lock the original buffer and use bits of it.
- */
- if (tot_len > rt->u.dst.pmtu)
- goto fragment;
-
- if (ip_dont_fragment(sk, &rt->u.dst))
- iph->frag_off |= __constant_htons(IP_DF);
-
- /* Add an IP checksum. */
- ip_send_check(iph);
-
- skb->priority = sk->priority;
- skb->dst->output(skb);
- return;
-
-fragment:
- if (ip_dont_fragment(sk, &rt->u.dst) &&
- tot_len > (iph->ihl<<2) + sizeof(struct tcphdr)+16) {
- /* Reject packet ONLY if TCP might fragment
- it itself, if were careful enough.
- Test is not precise (f.e. it does not take sacks
- into account). Actually, tcp should make it. --ANK (980801)
- */
- iph->frag_off |= __constant_htons(IP_DF);
- NETDEBUG(printk(KERN_DEBUG "sending pkt_too_big to self\n"));
-
- /* icmp_send is not reenterable, so that bh_atomic... --ANK */
- start_bh_atomic();
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
- htonl(rt->u.dst.pmtu));
- end_bh_atomic();
- goto drop;
- }
- ip_fragment(skb, skb->dst->output);
- return;
-
-no_route:
- sk->dst_cache = NULL;
- ip_rt_put(rt);
- ip_statistics.IpOutNoRoutes++;
- /* Fall through... */
-drop:
- kfree_skb(skb);
-}
-
-/*
- * Build and send a packet, with as little as one copy
- *
- * Doesn't care much about ip options... option length can be
- * different for fragment at 0 and other fragments.
- *
- * Note that the fragment at the highest offset is sent first,
- * so the getfrag routine can fill in the TCP/UDP checksum header
- * field in the last fragment it sends... actually it also helps
- * the reassemblers, they can put most packets in at the head of
- * the fragment queue, and they know the total size in advance. This
- * last feature will measurably improve the Linux fragment handler one
- * day.
- *
- * The callback has five args, an arbitrary pointer (copy of frag),
- * the source IP address (may depend on the routing table), the
- * destination address (char *), the offset to copy from, and the
- * length to be copied.
- */
-
-int ip_build_xmit_slow(struct sock *sk,
- int getfrag (const void *,
- char *,
- unsigned int,
- unsigned int),
- const void *frag,
- unsigned length,
- struct ipcm_cookie *ipc,
- struct rtable *rt,
- int flags)
-{
- unsigned int fraglen, maxfraglen, fragheaderlen;
- int err;
- int offset, mf;
- int mtu;
- unsigned short id;
-
- int hh_len = (rt->u.dst.dev->hard_header_len + 15)&~15;
- int nfrags=0;
- struct ip_options *opt = ipc->opt;
- int df = 0;
-
- mtu = rt->u.dst.pmtu;
- if (ip_dont_fragment(sk, &rt->u.dst))
- df = htons(IP_DF);
-
- length -= sizeof(struct iphdr);
-
- if (opt) {
- fragheaderlen = sizeof(struct iphdr) + opt->optlen;
- maxfraglen = ((mtu-sizeof(struct iphdr)-opt->optlen) & ~7) + fragheaderlen;
- } else {
- fragheaderlen = sizeof(struct iphdr);
-
- /*
- * Fragheaderlen is the size of 'overhead' on each buffer. Now work
- * out the size of the frames to send.
- */
-
- maxfraglen = ((mtu-sizeof(struct iphdr)) & ~7) + fragheaderlen;
- }
-
- if (length + fragheaderlen > 0xFFFF) {
- ip_local_error(sk, EMSGSIZE, rt->rt_dst, sk->dport, mtu);
- return -EMSGSIZE;
- }
-
- /*
- * Start at the end of the frame by handling the remainder.
- */
-
- offset = length - (length % (maxfraglen - fragheaderlen));
-
- /*
- * Amount of memory to allocate for final fragment.
- */
-
- fraglen = length - offset + fragheaderlen;
-
- if (length-offset==0) {
- fraglen = maxfraglen;
- offset -= maxfraglen-fragheaderlen;
- }
-
-
- /*
- * The last fragment will not have MF (more fragments) set.
- */
-
- mf = 0;
-
- /*
- * Don't fragment packets for path mtu discovery.
- */
-
- if (offset > 0 && df) {
- ip_local_error(sk, EMSGSIZE, rt->rt_dst, sk->dport, mtu);
- return(-EMSGSIZE);
- }
-
- /*
- * Lock the device lists.
- */
-
- dev_lock_list();
-
- /*
- * Get an identifier
- */
-
- id = htons(ip_id_count++);
-
- /*
- * Begin outputting the bytes.
- */
-
- do {
- char *data;
- struct sk_buff * skb;
-
- /*
- * Get the memory we require with some space left for alignment.
- */
-
- skb = sock_alloc_send_skb(sk, fraglen+hh_len+15, 0, flags&MSG_DONTWAIT, &err);
- if (skb == NULL)
- goto error;
-
- /*
- * Fill in the control structures
- */
-
- skb->priority = sk->priority;
- skb->dst = dst_clone(&rt->u.dst);
- skb_reserve(skb, hh_len);
-
- /*
- * Find where to start putting bytes.
- */
-
- data = skb_put(skb, fraglen);
- skb->nh.iph = (struct iphdr *)data;
-
- /*
- * Only write IP header onto non-raw packets
- */
-
- {
- struct iphdr *iph = (struct iphdr *)data;
-
- iph->version = 4;
- iph->ihl = 5;
- if (opt) {
- iph->ihl += opt->optlen>>2;
- ip_options_build(skb, opt,
- ipc->addr, rt, offset);
- }
- iph->tos = sk->ip_tos;
- iph->tot_len = htons(fraglen - fragheaderlen + iph->ihl*4);
- iph->id = id;
- iph->frag_off = htons(offset>>3);
- iph->frag_off |= mf|df;
- if (rt->rt_type == RTN_MULTICAST)
- iph->ttl = sk->ip_mc_ttl;
- else
- iph->ttl = sk->ip_ttl;
- iph->protocol = sk->protocol;
- iph->check = 0;
- iph->saddr = rt->rt_src;
- iph->daddr = rt->rt_dst;
- iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
- data += iph->ihl*4;
-
- /*
- * Any further fragments will have MF set.
- */
-
- mf = htons(IP_MF);
- }
-
- /*
- * User data callback
- */
-
- if (getfrag(frag, data, offset, fraglen-fragheaderlen)) {
- err = -EFAULT;
- kfree_skb(skb);
- goto error;
- }
-
- offset -= (maxfraglen-fragheaderlen);
- fraglen = maxfraglen;
-
- nfrags++;
-
-#ifdef CONFIG_FIREWALL
- switch (call_out_firewall(PF_INET, rt->u.dst.dev, skb->nh.iph, NULL, &skb)) {
- case FW_QUEUE:
- kfree_skb(skb);
- continue;
- case FW_BLOCK:
- case FW_REJECT:
- kfree_skb(skb);
- err = -EPERM;
- goto error;
- }
-#endif
-
- err = -ENETDOWN;
- if (rt->u.dst.output(skb))
- goto error;
- } while (offset >= 0);
-
- if (nfrags>1)
- ip_statistics.IpFragCreates += nfrags;
- dev_unlock_list();
- return 0;
-
-error:
- ip_statistics.IpOutDiscards++;
- if (nfrags>1)
- ip_statistics.IpFragCreates += nfrags;
- dev_unlock_list();
- return err;
-}
-
-
-/*
- * Fast path for unfragmented packets.
- */
-int ip_build_xmit(struct sock *sk,
- int getfrag (const void *,
- char *,
- unsigned int,
- unsigned int),
- const void *frag,
- unsigned length,
- struct ipcm_cookie *ipc,
- struct rtable *rt,
- int flags)
-{
- int err;
- struct sk_buff *skb;
- int df;
- struct iphdr *iph;
-
- /*
- * Try the simple case first. This leaves fragmented frames, and by
- * choice RAW frames within 20 bytes of maximum size(rare) to the long path
- */
-
- if (!sk->ip_hdrincl) {
- length += sizeof(struct iphdr);
-
- /*
- * Check for slow path.
- */
- if (length > rt->u.dst.pmtu || ipc->opt != NULL)
- return ip_build_xmit_slow(sk,getfrag,frag,length,ipc,rt,flags);
- } else {
- if (length > rt->u.dst.dev->mtu) {
- ip_local_error(sk, EMSGSIZE, rt->rt_dst, sk->dport, rt->u.dst.dev->mtu);
- return -EMSGSIZE;
- }
- }
-
- /*
- * Do path mtu discovery if needed.
- */
- df = 0;
- if (ip_dont_fragment(sk, &rt->u.dst))
- df = htons(IP_DF);
-
- /*
- * Fast path for unfragmented frames without options.
- */
- {
- int hh_len = (rt->u.dst.dev->hard_header_len + 15)&~15;
-
- skb = sock_alloc_send_skb(sk, length+hh_len+15,
- 0, flags&MSG_DONTWAIT, &err);
- if(skb==NULL)
- goto error;
- skb_reserve(skb, hh_len);
- }
-
- skb->priority = sk->priority;
- skb->dst = dst_clone(&rt->u.dst);
-
- skb->nh.iph = iph = (struct iphdr *)skb_put(skb, length);
-
- dev_lock_list();
-
- if(!sk->ip_hdrincl) {
- iph->version=4;
- iph->ihl=5;
- iph->tos=sk->ip_tos;
- iph->tot_len = htons(length);
- iph->id=htons(ip_id_count++);
- iph->frag_off = df;
- iph->ttl=sk->ip_mc_ttl;
- if (rt->rt_type != RTN_MULTICAST)
- iph->ttl=sk->ip_ttl;
- iph->protocol=sk->protocol;
- iph->saddr=rt->rt_src;
- iph->daddr=rt->rt_dst;
- iph->check=0;
- iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
- err = getfrag(frag, ((char *)iph)+iph->ihl*4,0, length-iph->ihl*4);
- }
- else
- err = getfrag(frag, (void *)iph, 0, length);
-
- dev_unlock_list();
-
- if (err)
- goto error_fault;
-
-#ifdef CONFIG_FIREWALL
- switch (call_out_firewall(PF_INET, rt->u.dst.dev, iph, NULL, &skb)) {
- case FW_QUEUE:
- kfree_skb(skb);
- return 0;
- case FW_BLOCK:
- case FW_REJECT:
- kfree_skb(skb);
- err = -EPERM;
- goto error;
- }
-#endif
-
- return rt->u.dst.output(skb);
-
-error_fault:
- err = -EFAULT;
- kfree_skb(skb);
-error:
- ip_statistics.IpOutDiscards++;
- return err;
-}
-
-
-
-/*
- * This IP datagram is too large to be sent in one piece. Break it up into
- * smaller pieces (each of size equal to IP header plus
- * a block of the data of the original IP data part) that will yet fit in a
- * single device frame, and queue such a frame for sending.
- *
- * Yes this is inefficient, feel free to submit a quicker one.
- */
-
-void ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
-{
- struct iphdr *iph;
- unsigned char *raw;
- unsigned char *ptr;
- struct device *dev;
- struct sk_buff *skb2;
- unsigned int mtu, hlen, left, len;
- int offset;
- int not_last_frag;
- struct rtable *rt = (struct rtable*)skb->dst;
-
- dev = rt->u.dst.dev;
-
- /*
- * Point into the IP datagram header.
- */
-
- raw = skb->nh.raw;
- iph = (struct iphdr*)raw;
-
- /*
- * Setup starting values.
- */
-
- hlen = iph->ihl * 4;
- left = ntohs(iph->tot_len) - hlen; /* Space per frame */
- mtu = rt->u.dst.pmtu - hlen; /* Size of data space */
- ptr = raw + hlen; /* Where to start from */
-
- /*
- * The protocol doesn't seem to say what to do in the case that the
- * frame + options doesn't fit the mtu. As it used to fall down dead
- * in this case we were fortunate it didn't happen
- *
- * It is impossible, because mtu>=68. --ANK (980801)
- */
-
-#ifdef CONFIG_NET_PARANOIA
- if (mtu<8)
- goto fail;
-#endif
-
- /*
- * Fragment the datagram.
- */
-
- offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3;
- not_last_frag = iph->frag_off & htons(IP_MF);
-
- /*
- * Keep copying data until we run out.
- */
-
- while(left > 0) {
- len = left;
- /* IF: it doesn't fit, use 'mtu' - the data space left */
- if (len > mtu)
- len = mtu;
- /* IF: we are not sending upto and including the packet end
- then align the next start on an eight byte boundary */
- if (len < left) {
- len &= ~7;
- }
- /*
- * Allocate buffer.
- */
-
- if ((skb2 = alloc_skb(len+hlen+dev->hard_header_len+15,GFP_ATOMIC)) == NULL) {
- NETDEBUG(printk(KERN_INFO "IP: frag: no memory for new fragment!\n"));
- goto fail;
- }
-
- /*
- * Set up data on packet
- */
-
- skb2->pkt_type = skb->pkt_type;
- skb2->priority = skb->priority;
- skb_reserve(skb2, (dev->hard_header_len+15)&~15);
- skb_put(skb2, len + hlen);
- skb2->nh.raw = skb2->data;
- skb2->h.raw = skb2->data + hlen;
-
- /*
- * Charge the memory for the fragment to any owner
- * it might possess
- */
-
- if (skb->sk)
- skb_set_owner_w(skb2, skb->sk);
- skb2->dst = dst_clone(skb->dst);
-
- /*
- * Copy the packet header into the new buffer.
- */
-
- memcpy(skb2->nh.raw, raw, hlen);
-
- /*
- * Copy a block of the IP datagram.
- */
- memcpy(skb2->h.raw, ptr, len);
- left -= len;
-
- /*
- * Fill in the new header fields.
- */
- iph = skb2->nh.iph;
- iph->frag_off = htons((offset >> 3));
-
- /* ANK: dirty, but effective trick. Upgrade options only if
- * the segment to be fragmented was THE FIRST (otherwise,
- * options are already fixed) and make it ONCE
- * on the initial skb, so that all the following fragments
- * will inherit fixed options.
- */
- if (offset == 0)
- ip_options_fragment(skb);
-
- /*
- * Added AC : If we are fragmenting a fragment that's not the
- * last fragment then keep MF on each bit
- */
- if (left > 0 || not_last_frag)
- iph->frag_off |= htons(IP_MF);
- ptr += len;
- offset += len;
-
- /*
- * Put this fragment into the sending queue.
- */
-
- ip_statistics.IpFragCreates++;
-
- iph->tot_len = htons(len + hlen);
-
- ip_send_check(iph);
-
- output(skb2);
- }
- kfree_skb(skb);
- ip_statistics.IpFragOKs++;
- return;
-
-fail:
- kfree_skb(skb);
- ip_statistics.IpFragFails++;
-}
-
-/*
- * Fetch data from kernel space and fill in checksum if needed.
- */
-static int ip_reply_glue_bits(const void *dptr, char *to, unsigned int offset,
- unsigned int fraglen)
-{
- struct ip_reply_arg *dp = (struct ip_reply_arg*)dptr;
- u16 *pktp = (u16 *)to;
- struct iovec *iov;
- int len;
- int hdrflag = 1;
-
- iov = &dp->iov[0];
- if (offset >= iov->iov_len) {
- offset -= iov->iov_len;
- iov++;
- hdrflag = 0;
- }
- len = iov->iov_len - offset;
- if (fraglen > len) { /* overlapping. */
- dp->csum = csum_partial_copy_nocheck(iov->iov_base+offset, to, len,
- dp->csum);
- offset = 0;
- fraglen -= len;
- to += len;
- iov++;
- }
-
- dp->csum = csum_partial_copy_nocheck(iov->iov_base+offset, to, fraglen,
- dp->csum);
-
- if (hdrflag && dp->csumoffset)
- *(pktp + dp->csumoffset) = csum_fold(dp->csum); /* fill in checksum */
- return 0;
-}
-
-/*
- * Generic function to send a packet as reply to another packet.
- * Used to send TCP resets so far. ICMP should use this function too.
- *
- * Should run single threaded per socket because it uses the sock
- * structure to pass arguments.
- */
-void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg,
- unsigned int len)
-{
- struct {
- struct ip_options opt;
- char data[40];
- } replyopts;
- struct ipcm_cookie ipc;
- u32 daddr;
- struct rtable *rt = (struct rtable*)skb->dst;
-
- if (ip_options_echo(&replyopts.opt, skb))
- return;
-
- sk->ip_tos = skb->nh.iph->tos;
- sk->priority = skb->priority;
- sk->protocol = skb->nh.iph->protocol;
-
- daddr = ipc.addr = rt->rt_src;
- ipc.opt = &replyopts.opt;
-
- if (ipc.opt->srr)
- daddr = replyopts.opt.faddr;
- if (ip_route_output(&rt, daddr, rt->rt_spec_dst, RT_TOS(skb->nh.iph->tos), 0))
- return;
-
- /* And let IP do all the hard work. */
- ip_build_xmit(sk, ip_reply_glue_bits, arg, len, &ipc, rt, MSG_DONTWAIT);
- ip_rt_put(rt);
-}
-
-/*
- * IP protocol layer initialiser
- */
-
-static struct packet_type ip_packet_type =
-{
- __constant_htons(ETH_P_IP),
- NULL, /* All devices */
- ip_rcv,
- NULL,
- NULL,
-};
-
-
-
-#ifdef CONFIG_PROC_FS
-#ifdef CONFIG_IP_MULTICAST
-static struct proc_dir_entry proc_net_igmp = {
- PROC_NET_IGMP, 4, "igmp",
- S_IFREG | S_IRUGO, 1, 0, 0,
- 0, &proc_net_inode_operations,
- ip_mc_procinfo
-};
-#endif
-#endif
-
-/*
- * IP registers the packet type and then calls the subprotocol initialisers
- */
-
-__initfunc(void ip_init(void))
-{
- dev_add_pack(&ip_packet_type);
-
- ip_rt_init();
-
-#ifdef CONFIG_PROC_FS
-#ifdef CONFIG_IP_MULTICAST
- proc_net_register(&proc_net_igmp);
-#endif
-#endif
-}
-
diff --git a/pfinet.old/linux-src/net/ipv4/tcp_input.c~ b/pfinet.old/linux-src/net/ipv4/tcp_input.c~
deleted file mode 100644
index c5095624..00000000
--- a/pfinet.old/linux-src/net/ipv4/tcp_input.c~
+++ /dev/null
@@ -1,2449 +0,0 @@
-/*
- * INET An implementation of the TCP/IP protocol suite for the LINUX
- * operating system. INET is implemented using the BSD Socket
- * interface as the means of communication with the user level.
- *
- * Implementation of the Transmission Control Protocol(TCP).
- *
- * Version: $Id: tcp_input.c,v 1.164.2.8 1999/09/23 19:21:23 davem Exp $
- *
- * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
- * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
- * Mark Evans, <evansmp@uhura.aston.ac.uk>
- * Corey Minyard <wf-rch!minyard@relay.EU.net>
- * Florian La Roche, <flla@stud.uni-sb.de>
- * Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
- * Linus Torvalds, <torvalds@cs.helsinki.fi>
- * Alan Cox, <gw4pts@gw4pts.ampr.org>
- * Matthew Dillon, <dillon@apollo.west.oic.com>
- * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
- * Jorge Cwik, <jorge@laser.satlink.net>
- */
-
-/*
- * Changes:
- * Pedro Roque : Fast Retransmit/Recovery.
- * Two receive queues.
- * Retransmit queue handled by TCP.
- * Better retransmit timer handling.
- * New congestion avoidance.
- * Header prediction.
- * Variable renaming.
- *
- * Eric : Fast Retransmit.
- * Randy Scott : MSS option defines.
- * Eric Schenk : Fixes to slow start algorithm.
- * Eric Schenk : Yet another double ACK bug.
- * Eric Schenk : Delayed ACK bug fixes.
- * Eric Schenk : Floyd style fast retrans war avoidance.
- * David S. Miller : Don't allow zero congestion window.
- * Eric Schenk : Fix retransmitter so that it sends
- * next packet on ack of previous packet.
- * Andi Kleen : Moved open_request checking here
- * and process RSTs for open_requests.
- * Andi Kleen : Better prune_queue, and other fixes.
- * Andrey Savochkin: Fix RTT measurements in the presnce of
- * timestamps.
- * Andrey Savochkin: Check sequence numbers correctly when
- * removing SACKs due to in sequence incoming
- * data segments.
- * Andi Kleen: Make sure we never ack data there is not
- * enough room for. Also make this condition
- * a fatal error if it might still happen.
- * Andi Kleen: Add tcp_measure_rcv_mss to make
- * connections with MSS<min(MTU,ann. MSS)
- * work without delayed acks.
- * Andi Kleen: Process packets with PSH set in the
- * fast path.
- */
-
-#include <linux/config.h>
-#include <linux/mm.h>
-#include <linux/sysctl.h>
-#include <net/tcp.h>
-#include <linux/ipsec.h>
-
-#ifdef CONFIG_SYSCTL
-#define SYNC_INIT 0 /* let the user enable it */
-#else
-#define SYNC_INIT 1
-#endif
-
-extern int sysctl_tcp_fin_timeout;
-
-/* These are on by default so the code paths get tested.
- * For the final 2.2 this may be undone at our discretion. -DaveM
- */
-int sysctl_tcp_timestamps = 1;
-int sysctl_tcp_window_scaling = 1;
-int sysctl_tcp_sack = 1;
-
-int sysctl_tcp_syncookies = SYNC_INIT;
-int sysctl_tcp_stdurg;
-int sysctl_tcp_rfc1337;
-
-static int prune_queue(struct sock *sk);
-
-/* There is something which you must keep in mind when you analyze the
- * behavior of the tp->ato delayed ack timeout interval. When a
- * connection starts up, we want to ack as quickly as possible. The
- * problem is that "good" TCP's do slow start at the beginning of data
- * transmission. The means that until we send the first few ACK's the
- * sender will sit on his end and only queue most of his data, because
- * he can only send snd_cwnd unacked packets at any given time. For
- * each ACK we send, he increments snd_cwnd and transmits more of his
- * queue. -DaveM
- */
-static void tcp_delack_estimator(struct tcp_opt *tp)
-{
- if(tp->ato == 0) {
- tp->lrcvtime = tcp_time_stamp;
-
- /* Help sender leave slow start quickly,
- * and also makes sure we do not take this
- * branch ever again for this connection.
- */
- tp->ato = 1;
- tcp_enter_quickack_mode(tp);
- } else {
- int m = tcp_time_stamp - tp->lrcvtime;
-
- tp->lrcvtime = tcp_time_stamp;
- if(m <= 0)
- m = 1;
- if(m > tp->rto)
- tp->ato = tp->rto;
- else {
- /* This funny shift makes sure we
- * clear the "quick ack mode" bit.
- */
- tp->ato = ((tp->ato << 1) >> 2) + m;
- }
- }
-}
-
-/*
- * Remember to send an ACK later.
- */
-static __inline__ void tcp_remember_ack(struct tcp_opt *tp, struct tcphdr *th,
- struct sk_buff *skb)
-{
- tp->delayed_acks++;
-
- /* Tiny-grams with PSH set artifically deflate our
- * ato measurement, but with a lower bound.
- */
- if(th->psh && (skb->len < (tp->mss_cache >> 1))) {
- /* Preserve the quickack state. */
- if((tp->ato & 0x7fffffff) > HZ/50)
- tp->ato = ((tp->ato & 0x80000000) |
- (HZ/50));
- }
-}
-
-/* Called to compute a smoothed rtt estimate. The data fed to this
- * routine either comes from timestamps, or from segments that were
- * known _not_ to have been retransmitted [see Karn/Partridge
- * Proceedings SIGCOMM 87]. The algorithm is from the SIGCOMM 88
- * piece by Van Jacobson.
- * NOTE: the next three routines used to be one big routine.
- * To save cycles in the RFC 1323 implementation it was better to break
- * it up into three procedures. -- erics
- */
-
-static __inline__ void tcp_rtt_estimator(struct tcp_opt *tp, __u32 mrtt)
-{
- long m = mrtt; /* RTT */
-
- /* The following amusing code comes from Jacobson's
- * article in SIGCOMM '88. Note that rtt and mdev
- * are scaled versions of rtt and mean deviation.
- * This is designed to be as fast as possible
- * m stands for "measurement".
- *
- * On a 1990 paper the rto value is changed to:
- * RTO = rtt + 4 * mdev
- */
- if(m == 0)
- m = 1;
- if (tp->srtt != 0) {
- m -= (tp->srtt >> 3); /* m is now error in rtt est */
- tp->srtt += m; /* rtt = 7/8 rtt + 1/8 new */
- if (m < 0)
- m = -m; /* m is now abs(error) */
- m -= (tp->mdev >> 2); /* similar update on mdev */
- tp->mdev += m; /* mdev = 3/4 mdev + 1/4 new */
- } else {
- /* no previous measure. */
- tp->srtt = m<<3; /* take the measured time to be rtt */
- tp->mdev = m<<2; /* make sure rto = 3*rtt */
- }
-}
-
-/* Calculate rto without backoff. This is the second half of Van Jacobson's
- * routine referred to above.
- */
-
-static __inline__ void tcp_set_rto(struct tcp_opt *tp)
-{
- tp->rto = (tp->srtt >> 3) + tp->mdev;
- tp->rto += (tp->rto >> 2) + (tp->rto >> (tp->snd_cwnd-1));
-}
-
-
-/* Keep the rto between HZ/5 and 120*HZ. 120*HZ is the upper bound
- * on packet lifetime in the internet. We need the HZ/5 lower
- * bound to behave correctly against BSD stacks with a fixed
- * delayed ack.
- * FIXME: It's not entirely clear this lower bound is the best
- * way to avoid the problem. Is it possible to drop the lower
- * bound and still avoid trouble with BSD stacks? Perhaps
- * some modification to the RTO calculation that takes delayed
- * ack bias into account? This needs serious thought. -- erics
- */
-static __inline__ void tcp_bound_rto(struct tcp_opt *tp)
-{
- if (tp->rto > 120*HZ)
- tp->rto = 120*HZ;
- if (tp->rto < HZ/5)
- tp->rto = HZ/5;
-}
-
-/* WARNING: this must not be called if tp->saw_timestamp was false. */
-extern __inline__ void tcp_replace_ts_recent(struct sock *sk, struct tcp_opt *tp,
- __u32 start_seq, __u32 end_seq)
-{
- /* It is start_seq <= last_ack_seq combined
- with in window check. If start_seq<=last_ack_seq<=rcv_nxt,
- then segment is in window if end_seq>=rcv_nxt.
- */
- if (!after(start_seq, tp->last_ack_sent) &&
- !before(end_seq, tp->rcv_nxt)) {
- /* PAWS bug workaround wrt. ACK frames, the PAWS discard
- * extra check below makes sure this can only happen
- * for pure ACK frames. -DaveM
- *
- * Plus: expired timestamps.
- *
- * Plus: resets failing PAWS.
- */
- if((s32)(tp->rcv_tsval - tp->ts_recent) >= 0) {
- tp->ts_recent = tp->rcv_tsval;
- tp->ts_recent_stamp = tcp_time_stamp;
- }
- }
-}
-
-#define PAWS_24DAYS (HZ * 60 * 60 * 24 * 24)
-
-extern __inline__ int tcp_paws_discard(struct tcp_opt *tp, struct tcphdr *th, unsigned len)
-{
- return ((s32)(tp->rcv_tsval - tp->ts_recent) < 0 &&
- (s32)(tcp_time_stamp - tp->ts_recent_stamp) < PAWS_24DAYS &&
- /* Sorry, PAWS as specified is broken wrt. pure-ACKs -DaveM */
- len != (th->doff * 4));
-}
-
-
-static int __tcp_sequence(struct tcp_opt *tp, u32 seq, u32 end_seq)
-{
- u32 end_window = tp->rcv_wup + tp->rcv_wnd;
-
- if (tp->rcv_wnd &&
- after(end_seq, tp->rcv_nxt) &&
- before(seq, end_window))
- return 1;
- if (seq != end_window)
- return 0;
- return (seq == end_seq);
-}
-
-/* This functions checks to see if the tcp header is actually acceptable. */
-extern __inline__ int tcp_sequence(struct tcp_opt *tp, u32 seq, u32 end_seq)
-{
- if (seq == tp->rcv_nxt)
- return (tp->rcv_wnd || (end_seq == seq));
-
- return __tcp_sequence(tp, seq, end_seq);
-}
-
-/* When we get a reset we do this. */
-static void tcp_reset(struct sock *sk)
-{
- sk->zapped = 1;
-
- /* We want the right error as BSD sees it (and indeed as we do). */
- switch (sk->state) {
- case TCP_SYN_SENT:
- sk->err = ECONNREFUSED;
- break;
- case TCP_CLOSE_WAIT:
- sk->err = EPIPE;
- break;
- default:
- sk->err = ECONNRESET;
- };
- tcp_set_state(sk, TCP_CLOSE);
- sk->shutdown = SHUTDOWN_MASK;
- if (!sk->dead)
- sk->state_change(sk);
-}
-
-/* This tags the retransmission queue when SACKs arrive. */
-static void tcp_sacktag_write_queue(struct sock *sk, struct tcp_sack_block *sp, int nsacks)
-{
- struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
- int i = nsacks;
-
- while(i--) {
- struct sk_buff *skb = skb_peek(&sk->write_queue);
- __u32 start_seq = ntohl(sp->start_seq);
- __u32 end_seq = ntohl(sp->end_seq);
- int fack_count = 0;
-
- while((skb != NULL) &&
- (skb != tp->send_head) &&
- (skb != (struct sk_buff *)&sk->write_queue)) {
- /* The retransmission queue is always in order, so
- * we can short-circuit the walk early.
- */
- if(after(TCP_SKB_CB(skb)->seq, end_seq))
- break;
-
- /* We play conservative, we don't allow SACKS to partially
- * tag a sequence space.
- */
- fack_count++;
- if(!after(start_seq, TCP_SKB_CB(skb)->seq) &&
- !before(end_seq, TCP_SKB_CB(skb)->end_seq)) {
- /* If this was a retransmitted frame, account for it. */
- if((TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) &&
- tp->retrans_out)
- tp->retrans_out--;
- TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED;
-
- /* RULE: All new SACKs will either decrease retrans_out
- * or advance fackets_out.
- */
- if(fack_count > tp->fackets_out)
- tp->fackets_out = fack_count;
- }
- skb = skb->next;
- }
- sp++; /* Move on to the next SACK block. */
- }
-}
-
-/* Look for tcp options. Normally only called on SYN and SYNACK packets.
- * But, this can also be called on packets in the established flow when
- * the fast version below fails.
- */
-void tcp_parse_options(struct sock *sk, struct tcphdr *th, struct tcp_opt *tp, int no_fancy)
-{
- unsigned char *ptr;
- int length=(th->doff*4)-sizeof(struct tcphdr);
- int saw_mss = 0;
-
- ptr = (unsigned char *)(th + 1);
- tp->saw_tstamp = 0;
-
- while(length>0) {
- int opcode=*ptr++;
- int opsize;
-
- switch (opcode) {
- case TCPOPT_EOL:
- return;
- case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
- length--;
- continue;
- default:
- opsize=*ptr++;
- if (opsize < 2) /* "silly options" */
- return;
- if (opsize > length)
- break; /* don't parse partial options */
- switch(opcode) {
- case TCPOPT_MSS:
- if(opsize==TCPOLEN_MSS && th->syn) {
- u16 in_mss = ntohs(*(__u16 *)ptr);
- if (in_mss == 0)
- in_mss = 536;
- if (tp->mss_clamp > in_mss)
- tp->mss_clamp = in_mss;
- saw_mss = 1;
- }
- break;
- case TCPOPT_WINDOW:
- if(opsize==TCPOLEN_WINDOW && th->syn)
- if (!no_fancy && sysctl_tcp_window_scaling) {
- tp->wscale_ok = 1;
- tp->snd_wscale = *(__u8 *)ptr;
- if(tp->snd_wscale > 14) {
- if(net_ratelimit())
- printk("tcp_parse_options: Illegal window "
- "scaling value %d >14 received.",
- tp->snd_wscale);
- tp->snd_wscale = 14;
- }
- }
- break;
- case TCPOPT_TIMESTAMP:
- if(opsize==TCPOLEN_TIMESTAMP) {
- if (sysctl_tcp_timestamps && !no_fancy) {
- tp->tstamp_ok = 1;
- tp->saw_tstamp = 1;
- tp->rcv_tsval = ntohl(*(__u32 *)ptr);
- tp->rcv_tsecr = ntohl(*(__u32 *)(ptr+4));
- }
- }
- break;
- case TCPOPT_SACK_PERM:
- if(opsize==TCPOLEN_SACK_PERM && th->syn) {
- if (sysctl_tcp_sack && !no_fancy) {
- tp->sack_ok = 1;
- tp->num_sacks = 0;
- }
- }
- break;
-
- case TCPOPT_SACK:
- if((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
- sysctl_tcp_sack && (sk != NULL) && !th->syn) {
- int sack_bytes = opsize - TCPOLEN_SACK_BASE;
-
- if(!(sack_bytes % TCPOLEN_SACK_PERBLOCK)) {
- int num_sacks = sack_bytes >> 3;
- struct tcp_sack_block *sackp;
-
- sackp = (struct tcp_sack_block *)ptr;
- tcp_sacktag_write_queue(sk, sackp, num_sacks);
- }
- }
- };
- ptr+=opsize-2;
- length-=opsize;
- };
- }
- if(th->syn && saw_mss == 0)
- tp->mss_clamp = 536;
-}
-
-/* Fast parse options. This hopes to only see timestamps.
- * If it is wrong it falls back on tcp_parse_options().
- */
-static __inline__ int tcp_fast_parse_options(struct sock *sk, struct tcphdr *th, struct tcp_opt *tp)
-{
- /* If we didn't send out any options ignore them all. */
- if (tp->tcp_header_len == sizeof(struct tcphdr))
- return 0;
- if (th->doff == sizeof(struct tcphdr)>>2) {
- tp->saw_tstamp = 0;
- return 0;
- } else if (th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) {
- __u32 *ptr = (__u32 *)(th + 1);
- if (*ptr == __constant_ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
- | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
- tp->saw_tstamp = 1;
- tp->rcv_tsval = ntohl(*++ptr);
- tp->rcv_tsecr = ntohl(*++ptr);
- return 1;
- }
- }
- tcp_parse_options(sk, th, tp, 0);
- return 1;
-}
-
-#define FLAG_DATA 0x01 /* Incoming frame contained data. */
-#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */
-#define FLAG_DATA_ACKED 0x04 /* This ACK acknowledged new data. */
-#define FLAG_RETRANS_DATA_ACKED 0x08 /* "" "" some of which was retransmitted. */
-
-static __inline__ void clear_fast_retransmit(struct tcp_opt *tp)
-{
- if (tp->dup_acks > 3)
- tp->snd_cwnd = (tp->snd_ssthresh);
-
- tp->dup_acks = 0;
-}
-
-/* NOTE: This code assumes that tp->dup_acks gets cleared when a
- * retransmit timer fires.
- */
-static void tcp_fast_retrans(struct sock *sk, u32 ack, int not_dup)
-{
- struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
-
- /* Note: If not_dup is set this implies we got a
- * data carrying packet or a window update.
- * This carries no new information about possible
- * lost packets, so we have to ignore it for the purposes
- * of counting duplicate acks. Ideally this does not imply we
- * should stop our fast retransmit phase, more acks may come
- * later without data to help us. Unfortunately this would make
- * the code below much more complex. For now if I see such
- * a packet I clear the fast retransmit phase.
- */
- if (ack == tp->snd_una && tp->packets_out && (not_dup == 0)) {
- /* This is the standard reno style fast retransmit branch. */
-
- /* 1. When the third duplicate ack is received, set ssthresh
- * to one half the current congestion window, but no less
- * than two segments. Retransmit the missing segment.
- */
- if (tp->high_seq == 0 || after(ack, tp->high_seq)) {
- tp->dup_acks++;
- if ((tp->fackets_out > 3) || (tp->dup_acks == 3)) {
- tp->snd_ssthresh = tcp_recalc_ssthresh(tp);
- tp->snd_cwnd = (tp->snd_ssthresh + 3);
- tp->high_seq = tp->snd_nxt;
- if(!tp->fackets_out)
- tcp_retransmit_skb(sk,
- skb_peek(&sk->write_queue));
- else
- tcp_fack_retransmit(sk);
- tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
- }
- } else if (++tp->dup_acks > 3) {
- /* 2. Each time another duplicate ACK arrives, increment
- * cwnd by the segment size. [...] Transmit a packet...
- *
- * Packet transmission will be done on normal flow processing
- * since we're not in "retransmit mode". We do not use
- * duplicate ACKs to artificially inflate the congestion
- * window when doing FACK.
- */
- if(!tp->fackets_out) {
- tp->snd_cwnd++;
- } else {
- /* Fill any further holes which may have
- * appeared.
- *
- * We may want to change this to run every
- * further multiple-of-3 dup ack increments,
- * to be more robust against out-of-order
- * packet delivery. -DaveM
- */
- tcp_fack_retransmit(sk);
- }
- }
- } else if (tp->high_seq != 0) {
- /* In this branch we deal with clearing the Floyd style
- * block on duplicate fast retransmits, and if requested
- * we do Hoe style secondary fast retransmits.
- */
- if (!before(ack, tp->high_seq) || (not_dup & FLAG_DATA) != 0) {
- /* Once we have acked all the packets up to high_seq
- * we are done this fast retransmit phase.
- * Alternatively data arrived. In this case we
- * Have to abort the fast retransmit attempt.
- * Note that we do want to accept a window
- * update since this is expected with Hoe's algorithm.
- */
- clear_fast_retransmit(tp);
-
- /* After we have cleared up to high_seq we can
- * clear the Floyd style block.
- */
- if (!before(ack, tp->high_seq)) {
- tp->high_seq = 0;
- tp->fackets_out = 0;
- }
- } else if (tp->dup_acks >= 3) {
- if (!tp->fackets_out) {
- /* Hoe Style. We didn't ack the whole
- * window. Take this as a cue that
- * another packet was lost and retransmit it.
- * Don't muck with the congestion window here.
- * Note that we have to be careful not to
- * act if this was a window update and it
- * didn't ack new data, since this does
- * not indicate a packet left the system.
- * We can test this by just checking
- * if ack changed from snd_una, since
- * the only way to get here without advancing
- * from snd_una is if this was a window update.
- */
- if (ack != tp->snd_una && before(ack, tp->high_seq)) {
- tcp_retransmit_skb(sk,
- skb_peek(&sk->write_queue));
- tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
- }
- } else {
- /* FACK style, fill any remaining holes in
- * receiver's queue.
- */
- tcp_fack_retransmit(sk);
- }
- }
- }
-}
-
-/* This is Jacobson's slow start and congestion avoidance.
- * SIGCOMM '88, p. 328.
- */
-static __inline__ void tcp_cong_avoid(struct tcp_opt *tp)
-{
- if (tp->snd_cwnd <= tp->snd_ssthresh) {
- /* In "safe" area, increase. */
- tp->snd_cwnd++;
- } else {
- /* In dangerous area, increase slowly.
- * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd
- */
- if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
- tp->snd_cwnd++;
- tp->snd_cwnd_cnt=0;
- } else
- tp->snd_cwnd_cnt++;
- }
-}
-
-/* Remove acknowledged frames from the retransmission queue. */
-static int tcp_clean_rtx_queue(struct sock *sk, __u32 ack,
- __u32 *seq, __u32 *seq_rtt)
-{
- struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
- struct sk_buff *skb;
- __u32 now = tcp_time_stamp;
- int acked = 0;
-
- /* If we are retransmitting, and this ACK clears up to
- * the retransmit head, or further, then clear our state.
- */
- if (tp->retrans_head != NULL &&
- !before(ack, TCP_SKB_CB(tp->retrans_head)->end_seq))
- tp->retrans_head = NULL;
-
- while((skb=skb_peek(&sk->write_queue)) && (skb != tp->send_head)) {
- struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
- __u8 sacked = scb->sacked;
-
- /* If our packet is before the ack sequence we can
- * discard it as it's confirmed to have arrived at
- * the other end.
- */
- if (after(scb->end_seq, ack))
- break;
-
- /* Initial outgoing SYN's get put onto the write_queue
- * just like anything else we transmit. It is not
- * true data, and if we misinform our callers that
- * this ACK acks real data, we will erroneously exit
- * connection startup slow start one packet too
- * quickly. This is severely frowned upon behavior.
- */
- if((sacked & TCPCB_SACKED_RETRANS) && tp->retrans_out)
- tp->retrans_out--;
- if(!(scb->flags & TCPCB_FLAG_SYN)) {
- acked |= FLAG_DATA_ACKED;
- if(sacked & TCPCB_SACKED_RETRANS)
- acked |= FLAG_RETRANS_DATA_ACKED;
- if(tp->fackets_out)
- tp->fackets_out--;
- } else {
- /* This is pure paranoia. */
- tp->retrans_head = NULL;
- }
- tp->packets_out--;
- *seq = scb->seq;
- *seq_rtt = now - scb->when;
- __skb_unlink(skb, skb->list);
- kfree_skb(skb);
- }
- return acked;
-}
-
-static void tcp_ack_probe(struct sock *sk, __u32 ack)
-{
- struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
-
- /* Our probe was answered. */
- tp->probes_out = 0;
-
- /* Was it a usable window open? */
-
- /* should always be non-null */
- if (tp->send_head != NULL &&
- !before (ack + tp->snd_wnd, TCP_SKB_CB(tp->send_head)->end_seq)) {
- tp->backoff = 0;
- tp->pending = 0;
- tcp_clear_xmit_timer(sk, TIME_PROBE0);
- } else {
- tcp_reset_xmit_timer(sk, TIME_PROBE0,
- min(tp->rto << tp->backoff, 120*HZ));
- }
-}
-
-/* Should we open up the congestion window? */
-static __inline__ int should_advance_cwnd(struct tcp_opt *tp, int flag)
-{
- /* Data must have been acked. */
- if ((flag & FLAG_DATA_ACKED) == 0)
- return 0;
-
- /* Some of the data acked was retransmitted somehow? */
- if ((flag & FLAG_RETRANS_DATA_ACKED) != 0) {
- /* We advance in all cases except during
- * non-FACK fast retransmit/recovery.
- */
- if (tp->fackets_out != 0 ||
- tp->retransmits != 0)
- return 1;
-
- /* Non-FACK fast retransmit does it's own
- * congestion window management, don't get
- * in the way.
- */
- return 0;
- }
-
- /* New non-retransmitted data acked, always advance. */
- return 1;
-}
-
-/* Read draft-ietf-tcplw-high-performance before mucking
- * with this code. (Superceeds RFC1323)
- */
-static void tcp_ack_saw_tstamp(struct sock *sk, struct tcp_opt *tp,
- u32 seq, u32 ack, int flag)
-{
- __u32 seq_rtt;
-
- /* RTTM Rule: A TSecr value received in a segment is used to
- * update the averaged RTT measurement only if the segment
- * acknowledges some new data, i.e., only if it advances the
- * left edge of the send window.
- *
- * See draft-ietf-tcplw-high-performance-00, section 3.3.
- * 1998/04/10 Andrey V. Savochkin <saw@msu.ru>
- */
- if (!(flag & FLAG_DATA_ACKED))
- return;
-
- seq_rtt = tcp_time_stamp - tp->rcv_tsecr;
- tcp_rtt_estimator(tp, seq_rtt);
- if (tp->retransmits) {
- if (tp->packets_out == 0) {
- tp->retransmits = 0;
- tp->fackets_out = 0;
- tp->retrans_out = 0;
- tp->backoff = 0;
- tcp_set_rto(tp);
- } else {
- /* Still retransmitting, use backoff */
- tcp_set_rto(tp);
- tp->rto = tp->rto << tp->backoff;
- }
- } else {
- tcp_set_rto(tp);
- }
-
- tcp_bound_rto(tp);
-}
-
-static __inline__ void tcp_ack_packets_out(struct sock *sk, struct tcp_opt *tp)
-{
- struct sk_buff *skb = skb_peek(&sk->write_queue);
-
- /* Some data was ACK'd, if still retransmitting (due to a
- * timeout), resend more of the retransmit queue. The
- * congestion window is handled properly by that code.
- */
- if (tp->retransmits) {
- tcp_xmit_retransmit_queue(sk);
- tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
- } else {
- __u32 when = tp->rto - (tcp_time_stamp - TCP_SKB_CB(skb)->when);
- if ((__s32)when < 0)
- when = 1;
- tcp_reset_xmit_timer(sk, TIME_RETRANS, when);
- }
-}
-
-/* This routine deals with incoming acks, but not outgoing ones. */
-static int tcp_ack(struct sock *sk, struct tcphdr *th,
- u32 ack_seq, u32 ack, int len)
-{
- struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
- int flag = 0;
- u32 seq = 0;
- u32 seq_rtt = 0;
-
- if(sk->zapped)
- return(1); /* Dead, can't ack any more so why bother */
-
- if (tp->pending == TIME_KEEPOPEN)
- tp->probes_out = 0;
-
- tp->rcv_tstamp = tcp_time_stamp;
-
- /* If the ack is newer than sent or older than previous acks
- * then we can probably ignore it.
- */
- if (after(ack, tp->snd_nxt) || before(ack, tp->snd_una))
- goto uninteresting_ack;
-
- /* If there is data set flag 1 */
- if (len != th->doff*4) {
- flag |= FLAG_DATA;
- tcp_delack_estimator(tp);
- }
-
- /* Update our send window. */
-
- /* This is the window update code as per RFC 793
- * snd_wl{1,2} are used to prevent unordered
- * segments from shrinking the window
- */
- if (before(tp->snd_wl1, ack_seq) ||
- (tp->snd_wl1 == ack_seq && !after(tp->snd_wl2, ack))) {
- u32 nwin = ntohs(th->window) << tp->snd_wscale;
-
- if ((tp->snd_wl2 != ack) || (nwin > tp->snd_wnd)) {
- flag |= FLAG_WIN_UPDATE;
- tp->snd_wnd = nwin;
-
- tp->snd_wl1 = ack_seq;
- tp->snd_wl2 = ack;
-
- if (nwin > tp->max_window)
- tp->max_window = nwin;
- }
- }
-
- /* We passed data and got it acked, remove any soft error
- * log. Something worked...
- */
- sk->err_soft = 0;
-
- /* If this ack opens up a zero window, clear backoff. It was
- * being used to time the probes, and is probably far higher than
- * it needs to be for normal retransmission.
- */
- if (tp->pending == TIME_PROBE0)
- tcp_ack_probe(sk, ack);
-
- /* See if we can take anything off of the retransmit queue. */
- flag |= tcp_clean_rtx_queue(sk, ack, &seq, &seq_rtt);
-
- /* We must do this here, before code below clears out important
- * state contained in tp->fackets_out and tp->retransmits. -DaveM
- */
- if (should_advance_cwnd(tp, flag))
- tcp_cong_avoid(tp);
-
- /* If we have a timestamp, we always do rtt estimates. */
- if (tp->saw_tstamp) {
- tcp_ack_saw_tstamp(sk, tp, seq, ack, flag);
- } else {
- /* If we were retransmiting don't count rtt estimate. */
- if (tp->retransmits) {
- if (tp->packets_out == 0) {
- tp->retransmits = 0;
- tp->fackets_out = 0;
- tp->retrans_out = 0;
- }
- } else {
- /* We don't have a timestamp. Can only use
- * packets that are not retransmitted to determine
- * rtt estimates. Also, we must not reset the
- * backoff for rto until we get a non-retransmitted
- * packet. This allows us to deal with a situation
- * where the network delay has increased suddenly.
- * I.e. Karn's algorithm. (SIGCOMM '87, p5.)
- */
- if (flag & FLAG_DATA_ACKED) {
- if(!(flag & FLAG_RETRANS_DATA_ACKED)) {
- tp->backoff = 0;
- tcp_rtt_estimator(tp, seq_rtt);
- tcp_set_rto(tp);
- tcp_bound_rto(tp);
- }
- }
- }
- }
-
- if (tp->packets_out) {
- if (flag & FLAG_DATA_ACKED)
- tcp_ack_packets_out(sk, tp);
- } else {
- tcp_clear_xmit_timer(sk, TIME_RETRANS);
- }
-
- flag &= (FLAG_DATA | FLAG_WIN_UPDATE);
- if ((ack == tp->snd_una && tp->packets_out && flag == 0) ||
- (tp->high_seq != 0)) {
- tcp_fast_retrans(sk, ack, flag);
- } else {
- /* Clear any aborted fast retransmit starts. */
- tp->dup_acks = 0;
- }
- /* It is not a brain fart, I thought a bit now. 8)
- *
- * Forward progress is indicated, if:
- * 1. the ack acknowledges new data.
- * 2. or the ack is duplicate, but it is caused by new segment
- * arrival. This case is filtered by:
- * - it contains no data, syn or fin.
- * - it does not update window.
- * 3. or new SACK. It is difficult to check, so that we ignore it.
- *
- * Forward progress is also indicated by arrival new data,
- * which was caused by window open from our side. This case is more
- * difficult and it is made (alas, incorrectly) in tcp_data_queue().
- * --ANK (990513)
- */
- if (ack != tp->snd_una || (flag == 0 && !th->fin))
- dst_confirm(sk->dst_cache);
-
- /* Remember the highest ack received. */
- tp->snd_una = ack;
- return 1;
-
-uninteresting_ack:
- SOCK_DEBUG(sk, "Ack ignored %u %u\n", ack, tp->snd_nxt);
- return 0;
-}
-
-/* New-style handling of TIME_WAIT sockets. */
-extern void tcp_tw_schedule(struct tcp_tw_bucket *tw);
-extern void tcp_tw_reschedule(struct tcp_tw_bucket *tw);
-extern void tcp_tw_deschedule(struct tcp_tw_bucket *tw);
-
-void tcp_timewait_kill(struct tcp_tw_bucket *tw)
-{
- struct tcp_bind_bucket *tb = tw->tb;
-
- /* Disassociate with bind bucket. */
- if(tw->bind_next)
- tw->bind_next->bind_pprev = tw->bind_pprev;
- *(tw->bind_pprev) = tw->bind_next;
- if (tb->owners == NULL) {
- if (tb->next)
- tb->next->pprev = tb->pprev;
- *(tb->pprev) = tb->next;
- kmem_cache_free(tcp_bucket_cachep, tb);
- }
-
- /* Unlink from established hashes. */
- if(tw->next)
- tw->next->pprev = tw->pprev;
- *tw->pprev = tw->next;
-
- /* We decremented the prot->inuse count when we entered TIME_WAIT
- * and the sock from which this came was destroyed.
- */
- tw->sklist_next->sklist_prev = tw->sklist_prev;
- tw->sklist_prev->sklist_next = tw->sklist_next;
-
- /* Ok, now free it up. */
- kmem_cache_free(tcp_timewait_cachep, tw);
-}
-
-/* We come here as a special case from the AF specific TCP input processing,
- * and the SKB has no owner. Essentially handling this is very simple,
- * we just keep silently eating rx'd packets, acking them if necessary,
- * until none show up for the entire timeout period.
- *
- * Return 0, TCP_TW_ACK, TCP_TW_RST
- */
-enum tcp_tw_status
-tcp_timewait_state_process(struct tcp_tw_bucket *tw, struct sk_buff *skb,
- struct tcphdr *th, unsigned len)
-{
- /* RFC 1122:
- * "When a connection is [...] on TIME-WAIT state [...]
- * [a TCP] MAY accept a new SYN from the remote TCP to
- * reopen the connection directly, if it:
- *
- * (1) assigns its initial sequence number for the new
- * connection to be larger than the largest sequence
- * number it used on the previous connection incarnation,
- * and
- *
- * (2) returns to TIME-WAIT state if the SYN turns out
- * to be an old duplicate".
- */
- if(th->syn && !th->rst && after(TCP_SKB_CB(skb)->seq, tw->rcv_nxt)) {
- struct sock *sk;
- struct tcp_func *af_specific = tw->af_specific;
- __u32 isn;
-
- isn = tw->snd_nxt + 128000;
- if(isn == 0)
- isn++;
- tcp_tw_deschedule(tw);
- tcp_timewait_kill(tw);
- sk = af_specific->get_sock(skb, th);
- if(sk == NULL ||
- !ipsec_sk_policy(sk,skb) ||
- atomic_read(&sk->sock_readers) != 0)
- return 0;
- skb_set_owner_r(skb, sk);
- af_specific = sk->tp_pinfo.af_tcp.af_specific;
- if(af_specific->conn_request(sk, skb, isn) < 0)
- return TCP_TW_RST; /* Toss a reset back. */
- return 0; /* Discard the frame. */
- }
-
- /* Check RST or SYN */
- if(th->rst || th->syn) {
- /* This is TIME_WAIT assasination, in two flavors.
- * Oh well... nobody has a sufficient solution to this
- * protocol bug yet.
- */
- if(sysctl_tcp_rfc1337 == 0) {
- tcp_tw_deschedule(tw);
- tcp_timewait_kill(tw);
- }
- if(!th->rst)
- return TCP_TW_RST; /* toss a reset back */
- return 0;
- } else {
- /* In this case we must reset the TIMEWAIT timer. */
- if(th->ack)
- tcp_tw_reschedule(tw);
- }
- /* Ack old packets if necessary */
- if (!after(TCP_SKB_CB(skb)->end_seq, tw->rcv_nxt) &&
- (th->doff * 4) > len)
- return TCP_TW_ACK;
- return 0;
-}
-
-/* Enter the time wait state. This is always called from BH
- * context. Essentially we whip up a timewait bucket, copy the
- * relevant info into it from the SK, and mess with hash chains
- * and list linkage.
- */
-static __inline__ void tcp_tw_hashdance(struct sock *sk, struct tcp_tw_bucket *tw)
-{
- struct sock **head, *sktw;
-
- /* Step 1: Remove SK from established hash. */
- if(sk->next)
- sk->next->pprev = sk->pprev;
- *sk->pprev = sk->next;
- sk->pprev = NULL;
- tcp_reg_zap(sk);
-
- /* Step 2: Put TW into bind hash where SK was. */
- tw->tb = (struct tcp_bind_bucket *)sk->prev;
- if((tw->bind_next = sk->bind_next) != NULL)
- sk->bind_next->bind_pprev = &tw->bind_next;
- tw->bind_pprev = sk->bind_pprev;
- *sk->bind_pprev = (struct sock *)tw;
- sk->prev = NULL;
-
- /* Step 3: Same for the protocol sklist. */
- (tw->sklist_next = sk->sklist_next)->sklist_prev = (struct sock *)tw;
- (tw->sklist_prev = sk->sklist_prev)->sklist_next = (struct sock *)tw;
- sk->sklist_next = NULL;
- sk->prot->inuse--;
-
- /* Step 4: Hash TW into TIMEWAIT half of established hash table. */
- head = &tcp_ehash[sk->hashent + (tcp_ehash_size/2)];
- sktw = (struct sock *)tw;
- if((sktw->next = *head) != NULL)
- (*head)->pprev = &sktw->next;
- *head = sktw;
- sktw->pprev = head;
-}
-
-void tcp_time_wait(struct sock *sk)
-{
- struct tcp_tw_bucket *tw;
-
- tw = kmem_cache_alloc(tcp_timewait_cachep, SLAB_ATOMIC);
- if(tw != NULL) {
- /* Give us an identity. */
- tw->daddr = sk->daddr;
- tw->rcv_saddr = sk->rcv_saddr;
- tw->bound_dev_if= sk->bound_dev_if;
- tw->num = sk->num;
- tw->state = TCP_TIME_WAIT;
- tw->sport = sk->sport;
- tw->dport = sk->dport;
- tw->family = sk->family;
- tw->reuse = sk->reuse;
- tw->rcv_nxt = sk->tp_pinfo.af_tcp.rcv_nxt;
- tw->snd_nxt = sk->tp_pinfo.af_tcp.snd_nxt;
- tw->window = tcp_select_window(sk);
- tw->af_specific = sk->tp_pinfo.af_tcp.af_specific;
-
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
- if(tw->family == PF_INET6) {
- memcpy(&tw->v6_daddr,
- &sk->net_pinfo.af_inet6.daddr,
- sizeof(struct in6_addr));
- memcpy(&tw->v6_rcv_saddr,
- &sk->net_pinfo.af_inet6.rcv_saddr,
- sizeof(struct in6_addr));
- }
-#endif
- /* Linkage updates. */
- tcp_tw_hashdance(sk, tw);
-
- /* Get the TIME_WAIT timeout firing. */
- tcp_tw_schedule(tw);
-
- /* CLOSE the SK. */
- if(sk->state == TCP_ESTABLISHED)
- tcp_statistics.TcpCurrEstab--;
- sk->state = TCP_CLOSE;
- net_reset_timer(sk, TIME_DONE,
- min(sk->tp_pinfo.af_tcp.srtt * 2, TCP_DONE_TIME));
- } else {
- /* Sorry, we're out of memory, just CLOSE this
- * socket up. We've got bigger problems than
- * non-graceful socket closings.
- */
- tcp_set_state(sk, TCP_CLOSE);
- }
-
- /* Prevent rcvmsg/sndmsg calls, and wake people up. */
- sk->shutdown = SHUTDOWN_MASK;
- if(!sk->dead)
- sk->state_change(sk);
-}
-
-/*
- * Process the FIN bit. This now behaves as it is supposed to work
- * and the FIN takes effect when it is validly part of sequence
- * space. Not before when we get holes.
- *
- * If we are ESTABLISHED, a received fin moves us to CLOSE-WAIT
- * (and thence onto LAST-ACK and finally, CLOSE, we never enter
- * TIME-WAIT)
- *
- * If we are in FINWAIT-1, a received FIN indicates simultaneous
- * close and we go into CLOSING (and later onto TIME-WAIT)
- *
- * If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT.
- */
-
-static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
-{
- sk->tp_pinfo.af_tcp.fin_seq = TCP_SKB_CB(skb)->end_seq;
-
- tcp_send_ack(sk);
-
- if (!sk->dead) {
- sk->state_change(sk);
- sock_wake_async(sk->socket, 1);
- }
-
- switch(sk->state) {
- case TCP_SYN_RECV:
- case TCP_ESTABLISHED:
- /* Move to CLOSE_WAIT */
- tcp_set_state(sk, TCP_CLOSE_WAIT);
- if (th->rst)
- sk->shutdown = SHUTDOWN_MASK;
- break;
-
- case TCP_CLOSE_WAIT:
- case TCP_CLOSING:
- /* Received a retransmission of the FIN, do
- * nothing.
- */
- break;
- case TCP_LAST_ACK:
- /* RFC793: Remain in the LAST-ACK state. */
- break;
-
- case TCP_FIN_WAIT1:
- /* This case occurs when a simultaneous close
- * happens, we must ack the received FIN and
- * enter the CLOSING state.
- *
- * This causes a WRITE timeout, which will either
- * move on to TIME_WAIT when we timeout, or resend
- * the FIN properly (maybe we get rid of that annoying
- * FIN lost hang). The TIME_WRITE code is already
- * correct for handling this timeout.
- */
- tcp_set_state(sk, TCP_CLOSING);
- break;
- case TCP_FIN_WAIT2:
- /* Received a FIN -- send ACK and enter TIME_WAIT. */
- tcp_time_wait(sk);
- break;
- default:
- /* Only TCP_LISTEN and TCP_CLOSE are left, in these
- * cases we should never reach this piece of code.
- */
- printk("tcp_fin: Impossible, sk->state=%d\n", sk->state);
- break;
- };
-}
-
-/* These routines update the SACK block as out-of-order packets arrive or
- * in-order packets close up the sequence space.
- */
-static void tcp_sack_maybe_coalesce(struct tcp_opt *tp, struct tcp_sack_block *sp)
-{
- int this_sack, num_sacks = tp->num_sacks;
- struct tcp_sack_block *swalk = &tp->selective_acks[0];
-
- /* If more than one SACK block, see if the recent change to SP eats into
- * or hits the sequence space of other SACK blocks, if so coalesce.
- */
- if(num_sacks != 1) {
- for(this_sack = 0; this_sack < num_sacks; this_sack++, swalk++) {
- if(swalk == sp)
- continue;
-
- /* First case, bottom of SP moves into top of the
- * sequence space of SWALK.
- */
- if(between(sp->start_seq, swalk->start_seq, swalk->end_seq)) {
- sp->start_seq = swalk->start_seq;
- goto coalesce;
- }
- /* Second case, top of SP moves into bottom of the
- * sequence space of SWALK.
- */
- if(between(sp->end_seq, swalk->start_seq, swalk->end_seq)) {
- sp->end_seq = swalk->end_seq;
- goto coalesce;
- }
- }
- }
- /* SP is the only SACK, or no coalescing cases found. */
- return;
-
-coalesce:
- /* Zap SWALK, by moving every further SACK up by one slot.
- * Decrease num_sacks.
- */
- for(; this_sack < num_sacks-1; this_sack++, swalk++) {
- struct tcp_sack_block *next = (swalk + 1);
- swalk->start_seq = next->start_seq;
- swalk->end_seq = next->end_seq;
- }
- tp->num_sacks--;
-}
-
-static __inline__ void tcp_sack_swap(struct tcp_sack_block *sack1, struct tcp_sack_block *sack2)
-{
- __u32 tmp;
-
- tmp = sack1->start_seq;
- sack1->start_seq = sack2->start_seq;
- sack2->start_seq = tmp;
-
- tmp = sack1->end_seq;
- sack1->end_seq = sack2->end_seq;
- sack2->end_seq = tmp;
-}
-
-static void tcp_sack_new_ofo_skb(struct sock *sk, struct sk_buff *skb)
-{
- struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
- struct tcp_sack_block *sp = &tp->selective_acks[0];
- int cur_sacks = tp->num_sacks;
-
- if (!cur_sacks)
- goto new_sack;
-
- /* Optimize for the common case, new ofo frames arrive
- * "in order". ;-) This also satisfies the requirements
- * of RFC2018 about ordering of SACKs.
- */
- if(sp->end_seq == TCP_SKB_CB(skb)->seq) {
- sp->end_seq = TCP_SKB_CB(skb)->end_seq;
- tcp_sack_maybe_coalesce(tp, sp);
- } else if(sp->start_seq == TCP_SKB_CB(skb)->end_seq) {
- /* Re-ordered arrival, in this case, can be optimized
- * as well.
- */
- sp->start_seq = TCP_SKB_CB(skb)->seq;
- tcp_sack_maybe_coalesce(tp, sp);
- } else {
- struct tcp_sack_block *swap = sp + 1;
- int this_sack, max_sacks = (tp->tstamp_ok ? 3 : 4);
-
- /* Oh well, we have to move things around.
- * Try to find a SACK we can tack this onto.
- */
-
- for(this_sack = 1; this_sack < cur_sacks; this_sack++, swap++) {
- if((swap->end_seq == TCP_SKB_CB(skb)->seq) ||
- (swap->start_seq == TCP_SKB_CB(skb)->end_seq)) {
- if(swap->end_seq == TCP_SKB_CB(skb)->seq)
- swap->end_seq = TCP_SKB_CB(skb)->end_seq;
- else
- swap->start_seq = TCP_SKB_CB(skb)->seq;
- tcp_sack_swap(sp, swap);
- tcp_sack_maybe_coalesce(tp, sp);
- return;
- }
- }
-
- /* Could not find an adjacent existing SACK, build a new one,
- * put it at the front, and shift everyone else down. We
- * always know there is at least one SACK present already here.
- *
- * If the sack array is full, forget about the last one.
- */
- if (cur_sacks >= max_sacks) {
- cur_sacks--;
- tp->num_sacks--;
- }
- while(cur_sacks >= 1) {
- struct tcp_sack_block *this = &tp->selective_acks[cur_sacks];
- struct tcp_sack_block *prev = (this - 1);
- this->start_seq = prev->start_seq;
- this->end_seq = prev->end_seq;
- cur_sacks--;
- }
-
- new_sack:
- /* Build the new head SACK, and we're done. */
- sp->start_seq = TCP_SKB_CB(skb)->seq;
- sp->end_seq = TCP_SKB_CB(skb)->end_seq;
- tp->num_sacks++;
- }
-}
-
-static void tcp_sack_remove_skb(struct tcp_opt *tp, struct sk_buff *skb)
-{
- struct tcp_sack_block *sp = &tp->selective_acks[0];
- int num_sacks = tp->num_sacks;
- int this_sack;
-
- /* This is an in order data segment _or_ an out-of-order SKB being
- * moved to the receive queue, so we know this removed SKB will eat
- * from the front of a SACK.
- */
- for(this_sack = 0; this_sack < num_sacks; this_sack++, sp++) {
- /* Check if the start of the sack is covered by skb. */
- if(!before(sp->start_seq, TCP_SKB_CB(skb)->seq) &&
- before(sp->start_seq, TCP_SKB_CB(skb)->end_seq))
- break;
- }
-
- /* This should only happen if so many SACKs get built that some get
- * pushed out before we get here, or we eat some in sequence packets
- * which are before the first SACK block.
- */
- if(this_sack >= num_sacks)
- return;
-
- sp->start_seq = TCP_SKB_CB(skb)->end_seq;
- if(!before(sp->start_seq, sp->end_seq)) {
- /* Zap this SACK, by moving forward any other SACKS. */
- for(this_sack += 1; this_sack < num_sacks; this_sack++, sp++) {
- struct tcp_sack_block *next = (sp + 1);
- sp->start_seq = next->start_seq;
- sp->end_seq = next->end_seq;
- }
- tp->num_sacks--;
- }
-}
-
-static void tcp_sack_extend(struct tcp_opt *tp, struct sk_buff *old_skb, struct sk_buff *new_skb)
-{
- struct tcp_sack_block *sp = &tp->selective_acks[0];
- int num_sacks = tp->num_sacks;
- int this_sack;
-
- for(this_sack = 0; this_sack < num_sacks; this_sack++, sp++) {
- if(sp->end_seq == TCP_SKB_CB(old_skb)->end_seq)
- break;
- }
- if(this_sack >= num_sacks)
- return;
- sp->end_seq = TCP_SKB_CB(new_skb)->end_seq;
-}
-
-/* This one checks to see if we can put data from the
- * out_of_order queue into the receive_queue.
- */
-static void tcp_ofo_queue(struct sock *sk)
-{
- struct sk_buff *skb;
- struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
-
- while ((skb = skb_peek(&tp->out_of_order_queue))) {
- if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
- break;
-
- if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
- SOCK_DEBUG(sk, "ofo packet was already received \n");
- __skb_unlink(skb, skb->list);
- kfree_skb(skb);
- continue;
- }
- SOCK_DEBUG(sk, "ofo requeuing : rcv_next %X seq %X - %X\n",
- tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
- TCP_SKB_CB(skb)->end_seq);
-
- if(tp->sack_ok)
- tcp_sack_remove_skb(tp, skb);
- __skb_unlink(skb, skb->list);
- __skb_queue_tail(&sk->receive_queue, skb);
- tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
- if(skb->h.th->fin)
- tcp_fin(skb, sk, skb->h.th);
- }
-}
-
-static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
-{
- struct sk_buff *skb1;
- struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
-
- /* Queue data for delivery to the user.
- * Packets in sequence go to the receive queue.
- * Out of sequence packets to the out_of_order_queue.
- */
- if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
- /* Ok. In sequence. */
- queue_and_out:
- dst_confirm(sk->dst_cache);
- __skb_queue_tail(&sk->receive_queue, skb);
- tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
- if(skb->h.th->fin) {
- tcp_fin(skb, sk, skb->h.th);
- } else {
- tcp_remember_ack(tp, skb->h.th, skb);
- }
- /* This may have eaten into a SACK block. */
- if(tp->sack_ok && tp->num_sacks)
- tcp_sack_remove_skb(tp, skb);
- tcp_ofo_queue(sk);
-
- /* Turn on fast path. */
- if (skb_queue_len(&tp->out_of_order_queue) == 0)
- tp->pred_flags = htonl(((tp->tcp_header_len >> 2) << 28) |
- (0x10 << 16) |
- tp->snd_wnd);
- return;
- }
-
- /* An old packet, either a retransmit or some packet got lost. */
- if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
- /* A retransmit, 2nd most common case. Force an imediate ack. */
- SOCK_DEBUG(sk, "retransmit received: seq %X\n", TCP_SKB_CB(skb)->seq);
- tcp_enter_quickack_mode(tp);
- kfree_skb(skb);
- return;
- }
-
- if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
- /* Partial packet, seq < rcv_next < end_seq */
- SOCK_DEBUG(sk, "partial packet: rcv_next %X seq %X - %X\n",
- tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
- TCP_SKB_CB(skb)->end_seq);
-
- goto queue_and_out;
- }
-
- /* Ok. This is an out_of_order segment, force an ack. */
- tp->delayed_acks++;
- tcp_enter_quickack_mode(tp);
-
- /* Disable header prediction. */
- tp->pred_flags = 0;
-
- SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
- tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
-
- if (skb_peek(&tp->out_of_order_queue) == NULL) {
- /* Initial out of order segment, build 1 SACK. */
- if(tp->sack_ok) {
- tp->num_sacks = 1;
- tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq;
- tp->selective_acks[0].end_seq = TCP_SKB_CB(skb)->end_seq;
- }
- __skb_queue_head(&tp->out_of_order_queue,skb);
- } else {
- for(skb1=tp->out_of_order_queue.prev; ; skb1 = skb1->prev) {
- /* Already there. */
- if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb1)->seq) {
- if (skb->len >= skb1->len) {
- if(tp->sack_ok)
- tcp_sack_extend(tp, skb1, skb);
- __skb_append(skb1, skb);
- __skb_unlink(skb1, skb1->list);
- kfree_skb(skb1);
- } else {
- /* A duplicate, smaller than what is in the
- * out-of-order queue right now, toss it.
- */
- kfree_skb(skb);
- }
- break;
- }
-
- if (after(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb1)->seq)) {
- __skb_append(skb1, skb);
- if(tp->sack_ok)
- tcp_sack_new_ofo_skb(sk, skb);
- break;
- }
-
- /* See if we've hit the start. If so insert. */
- if (skb1 == skb_peek(&tp->out_of_order_queue)) {
- __skb_queue_head(&tp->out_of_order_queue,skb);
- if(tp->sack_ok)
- tcp_sack_new_ofo_skb(sk, skb);
- break;
- }
- }
- }
-}
-
-
-/*
- * This routine handles the data. If there is room in the buffer,
- * it will be have already been moved into it. If there is no
- * room, then we will just have to discard the packet.
- */
-
-static int tcp_data(struct sk_buff *skb, struct sock *sk, unsigned int len)
-{
- struct tcphdr *th;
- struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
- char *str1 = "pfinet: tcp_data check point 1.\n";
- char *str2 = "pfinet: tcp_data check point 2.\n";
- char *str3 = "pfinet: tcp_data check point 3.\n";
- int stderr_fd = fileno (stderr);
-
- th = skb->h.th;
- skb_pull(skb, th->doff*4);
- skb_trim(skb, len - (th->doff*4));
-
- if (skb->len == 0 && !th->fin)
- return(0);
-
- write (stderr_fd, str1, strlen (str1) + 1);
- fflush (stderr);
- /*
- * If our receive queue has grown past its limits shrink it.
- * Make sure to do this before moving snd_nxt, otherwise
- * data might be acked for that we don't have enough room.
- */
- if (atomic_read(&sk->rmem_alloc) > sk->rcvbuf) {
- if (prune_queue(sk) < 0) {
- /* Still not enough room. That can happen when
- * skb->true_size differs significantly from skb->len.
- */
- return 0;
- }
- }
-
- tcp_data_queue(sk, skb);
-
- write (stderr_fd, str2, strlen (str2) + 1);
- fflush (stderr);
- if (before(tp->rcv_nxt, tp->copied_seq)) {
- printk(KERN_DEBUG "*** tcp.c:tcp_data bug acked < copied\n");
- tp->rcv_nxt = tp->copied_seq;
- }
-
- /* Above, tcp_data_queue() increments delayed_acks appropriately.
- * Now tell the user we may have some data.
- */
- if (!sk->dead) {
- sk->data_ready(sk,0);
- }
- write (stderr_fd, str3, strlen (str3) + 1);
- fflush (stderr);
- return(1);
-}
-
-static void __tcp_data_snd_check(struct sock *sk, struct sk_buff *skb)
-{
- struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
-
- if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una + tp->snd_wnd) &&
- tcp_packets_in_flight(tp) < tp->snd_cwnd) {
- /* Put more data onto the wire. */
- tcp_write_xmit(sk);
- } else if (tp->packets_out == 0 && !tp->pending) {
- /* Start probing the receivers window. */
- tcp_reset_xmit_timer(sk, TIME_PROBE0, tp->rto);
- }
-}
-
-static __inline__ void tcp_data_snd_check(struct sock *sk)
-{
- struct sk_buff *skb = sk->tp_pinfo.af_tcp.send_head;
-
- if (skb != NULL)
- __tcp_data_snd_check(sk, skb);
-}
-
-/*
- * Adapt the MSS value used to make delayed ack decision to the
- * real world.
- */
-static __inline__ void tcp_measure_rcv_mss(struct sock *sk, struct sk_buff *skb)
-{
- struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
- unsigned int len = skb->len, lss;
-
- if (len > tp->rcv_mss)
- tp->rcv_mss = len;
- lss = tp->last_seg_size;
- tp->last_seg_size = 0;
- if (len >= 536) {
- if (len == lss)
- tp->rcv_mss = len;
- tp->last_seg_size = len;
- }
-}
-
-/*
- * Check if sending an ack is needed.
- */
-static __inline__ void __tcp_ack_snd_check(struct sock *sk)
-{
- struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
-
- /* This also takes care of updating the window.
- * This if statement needs to be simplified.
- *
- * Rules for delaying an ack:
- * - delay time <= 0.5 HZ
- * - we don't have a window update to send
- * - must send at least every 2 full sized packets
- * - must send an ACK if we have any out of order data
- *
- * With an extra heuristic to handle loss of packet
- * situations and also helping the sender leave slow
- * start in an expediant manner.
- */
-
- /* Two full frames received or... */
- if (((tp->rcv_nxt - tp->rcv_wup) >= tp->rcv_mss * MAX_DELAY_ACK) ||
- /* We will update the window "significantly" or... */
- tcp_raise_window(sk) ||
- /* We entered "quick ACK" mode or... */
- tcp_in_quickack_mode(tp) ||
- /* We have out of order data */
- (skb_peek(&tp->out_of_order_queue) != NULL)) {
- /* Then ack it now */
- tcp_send_ack(sk);
- } else {
- /* Else, send delayed ack. */
- tcp_send_delayed_ack(tp, HZ/2);
- }
-}
-
-static __inline__ void tcp_ack_snd_check(struct sock *sk)
-{
- struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
- if (tp->delayed_acks == 0) {
- /* We sent a data segment already. */
- return;
- }
- __tcp_ack_snd_check(sk);
-}
-
-
-/*
- * This routine is only called when we have urgent data
- * signalled. Its the 'slow' part of tcp_urg. It could be
- * moved inline now as tcp_urg is only called from one
- * place. We handle URGent data wrong. We have to - as
- * BSD still doesn't use the correction from RFC961.
- * For 1003.1g we should support a new option TCP_STDURG to permit
- * either form (or just set the sysctl tcp_stdurg).
- */
-
-static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
-{
- struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
- u32 ptr = ntohs(th->urg_ptr);
-
- if (ptr && !sysctl_tcp_stdurg)
- ptr--;
- ptr += ntohl(th->seq);
-
- /* Ignore urgent data that we've already seen and read. */
- if (after(tp->copied_seq, ptr))
- return;
-
- /* Do we already have a newer (or duplicate) urgent pointer? */
- if (tp->urg_data && !after(ptr, tp->urg_seq))
- return;
-
- /* Tell the world about our new urgent pointer. */
- if (sk->proc != 0) {
- if (sk->proc > 0)
- kill_proc(sk->proc, SIGURG, 1);
- else
- kill_pg(-sk->proc, SIGURG, 1);
- }
-
- /* We may be adding urgent data when the last byte read was
- * urgent. To do this requires some care. We cannot just ignore
- * tp->copied_seq since we would read the last urgent byte again
- * as data, nor can we alter copied_seq until this data arrives
- * or we break the sematics of SIOCATMARK (and thus sockatmark())
- */
- if (tp->urg_seq == tp->copied_seq)
- tp->copied_seq++; /* Move the copied sequence on correctly */
- tp->urg_data = URG_NOTYET;
- tp->urg_seq = ptr;
-
- /* Disable header prediction. */
- tp->pred_flags = 0;
-}
-
-/* This is the 'fast' part of urgent handling. */
-static inline void tcp_urg(struct sock *sk, struct tcphdr *th, unsigned long len)
-{
- struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
-
- /* Check if we get a new urgent pointer - normally not. */
- if (th->urg)
- tcp_check_urg(sk,th);
-
- /* Do we wait for any urgent data? - normally not... */
- if (tp->urg_data == URG_NOTYET) {
- u32 ptr = tp->urg_seq - ntohl(th->seq) + (th->doff*4);
-
- /* Is the urgent pointer pointing into this packet? */
- if (ptr < len) {
- tp->urg_data = URG_VALID | *(ptr + (unsigned char *) th);
- if (!sk->dead)
- sk->data_ready(sk,0);
- }
- }
-}
-
-/* Clean the out_of_order queue if we can, trying to get
- * the socket within its memory limits again.
- *
- * Return less than zero if we should start dropping frames
- * until the socket owning process reads some of the data
- * to stabilize the situation.
- */
-static int prune_queue(struct sock *sk)
-{
- struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
- struct sk_buff * skb;
-
- SOCK_DEBUG(sk, "prune_queue: c=%x\n", tp->copied_seq);
-
- net_statistics.PruneCalled++;
-
- /* First, purge the out_of_order queue. */
- skb = __skb_dequeue_tail(&tp->out_of_order_queue);
- if(skb != NULL) {
- /* Free it all. */
- do { net_statistics.OfoPruned += skb->len;
- kfree_skb(skb);
- skb = __skb_dequeue_tail(&tp->out_of_order_queue);
- } while(skb != NULL);
-
- /* Reset SACK state. A conforming SACK implementation will
- * do the same at a timeout based retransmit. When a connection
- * is in a sad state like this, we care only about integrity
- * of the connection not performance.
- */
- if(tp->sack_ok)
- tp->num_sacks = 0;
- }
-
- /* If we are really being abused, tell the caller to silently
- * drop receive data on the floor. It will get retransmitted
- * and hopefully then we'll have sufficient space.
- *
- * We used to try to purge the in-order packets too, but that
- * turns out to be deadly and fraught with races. Consider:
- *
- * 1) If we acked the data, we absolutely cannot drop the
- * packet. This data would then never be retransmitted.
- * 2) It is possible, with a proper sequence of events involving
- * delayed acks and backlog queue handling, to have the user
- * read the data before it gets acked. The previous code
- * here got this wrong, and it lead to data corruption.
- * 3) Too much state changes happen when the FIN arrives, so once
- * we've seen that we can't remove any in-order data safely.
- *
- * The net result is that removing in-order receive data is too
- * complex for anyones sanity. So we don't do it anymore. But
- * if we are really having our buffer space abused we stop accepting
- * new receive data.
- */
- if(atomic_read(&sk->rmem_alloc) < (sk->rcvbuf << 1))
- return 0;
-
- /* Massive buffer overcommit. */
- return -1;
-}
-
-/*
- * TCP receive function for the ESTABLISHED state.
- *
- * It is split into a fast path and a slow path. The fast path is
- * disabled when:
- * - A zero window was announced from us - zero window probing
- * is only handled properly in the slow path.
- * - Out of order segments arrived.
- * - Urgent data is expected.
- * - There is no buffer space left
- * - Unexpected TCP flags/window values/header lengths are received
- * (detected by checking the TCP header against pred_flags)
- * - Data is sent in both directions. Fast path only supports pure senders
- * or pure receivers (this means either the sequence number or the ack
- * value must stay constant)
- *
- * When these conditions are not satisfied it drops into a standard
- * receive procedure patterned after RFC793 to handle all cases.
- * The first three cases are guaranteed by proper pred_flags setting,
- * the rest is checked inline. Fast processing is turned on in
- * tcp_data_queue when everything is OK.
- */
-int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
- struct tcphdr *th, unsigned len)
-{
- struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
- int queued;
- u32 flg;
- char *str1 = "pfinet tcp_rcv_established check point 1\n";
- char *str2 = "pfinet tcp_rcv_established check point 2\n";
- int stderr_fd = fileno (stderr);
-
- /*
- * Header prediction.
- * The code follows the one in the famous
- * "30 instruction TCP receive" Van Jacobson mail.
- *
- * Van's trick is to deposit buffers into socket queue
- * on a device interrupt, to call tcp_recv function
- * on the receive process context and checksum and copy
- * the buffer to user space. smart...
- *
- * Our current scheme is not silly either but we take the
- * extra cost of the net_bh soft interrupt processing...
- * We do checksum and copy also but from device to kernel.
- */
-
- /*
- * RFC1323: H1. Apply PAWS check first.
- */
- if (tcp_fast_parse_options(sk, th, tp)) {
- if (tp->saw_tstamp) {
- if (tcp_paws_discard(tp, th, len)) {
- tcp_statistics.TcpInErrs++;
- if (!th->rst) {
- tcp_send_ack(sk);
- goto discard;
- }
- }
- tcp_replace_ts_recent(sk, tp,
- TCP_SKB_CB(skb)->seq,
- TCP_SKB_CB(skb)->end_seq);
- }
- }
-
- flg = *(((u32 *)th) + 3) & ~htonl(0xFC8 << 16);
-
- /* pred_flags is 0xS?10 << 16 + snd_wnd
- * if header_predition is to be made
- * 'S' will always be tp->tcp_header_len >> 2
- * '?' will be 0 else it will be !0
- * (when there are holes in the receive
- * space for instance)
- * PSH flag is ignored.
- */
-
- if (flg == tp->pred_flags && TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
- if (len <= th->doff*4) {
- /* Bulk data transfer: sender */
- if (len == th->doff*4) {
- tcp_ack(sk, th, TCP_SKB_CB(skb)->seq,
- TCP_SKB_CB(skb)->ack_seq, len);
- kfree_skb(skb);
- tcp_data_snd_check(sk);
- return 0;
- } else { /* Header too small */
- tcp_statistics.TcpInErrs++;
- goto discard;
- }
- } else if (TCP_SKB_CB(skb)->ack_seq == tp->snd_una &&
- atomic_read(&sk->rmem_alloc) <= sk->rcvbuf) {
- /* Bulk data transfer: receiver */
- __skb_pull(skb,th->doff*4);
-
- tcp_measure_rcv_mss(sk, skb);
-
- /* DO NOT notify forward progress here.
- * It saves dozen of CPU instructions in fast path. --ANK
- */
- __skb_queue_tail(&sk->receive_queue, skb);
- tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
-
- /* FIN bit check is not done since if FIN is set in
- * this frame, the pred_flags won't match up. -DaveM
- */
- sk->data_ready(sk, 0);
- tcp_delack_estimator(tp);
-
- tcp_remember_ack(tp, th, skb);
-
- __tcp_ack_snd_check(sk);
- return 0;
- }
- }
-
- /*
- * Standard slow path.
- */
-
- if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
- /* RFC793, page 37: "In all states except SYN-SENT, all reset
- * (RST) segments are validated by checking their SEQ-fields."
- * And page 69: "If an incoming segment is not acceptable,
- * an acknowledgment should be sent in reply (unless the RST bit
- * is set, if so drop the segment and return)".
- */
- if (th->rst)
- goto discard;
- if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
- SOCK_DEBUG(sk, "seq:%d end:%d wup:%d wnd:%d\n",
- TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
- tp->rcv_wup, tp->rcv_wnd);
- }
- tcp_send_ack(sk);
- goto discard;
- }
-
- if(th->syn && TCP_SKB_CB(skb)->seq != tp->syn_seq) {
- SOCK_DEBUG(sk, "syn in established state\n");
- tcp_statistics.TcpInErrs++;
- tcp_reset(sk);
- return 1;
- }
-
- if(th->rst) {
- tcp_reset(sk);
- goto discard;
- }
-
- if(th->ack)
- tcp_ack(sk, th, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->ack_seq, len);
-
- /* Process urgent data. */
- tcp_urg(sk, th, len);
-
- /* step 7: process the segment text */
- queued = tcp_data(skb, sk, len);
-
- /* This must be after tcp_data() does the skb_pull() to
- * remove the header size from skb->len.
- *
- * Dave!!! Phrase above (and all about rcv_mss) has
- * nothing to do with reality. rcv_mss must measure TOTAL
- * size, including sacks, IP options etc. Hence, measure_rcv_mss
- * must occure before pulling etc, otherwise it will flap
- * like hell. Even putting it before tcp_data is wrong,
- * it should use skb->tail - skb->nh.raw instead.
- * --ANK (980805)
- *
- * BTW I broke it. Now all TCP options are handled equally
- * in mss_clamp calculations (i.e. ignored, rfc1122),
- * and mss_cache does include all of them (i.e. tstamps)
- * except for sacks, to calulate effective mss faster.
- * --ANK (980805)
- */
- tcp_measure_rcv_mss(sk, skb);
-
- write (stderr_fd, str1, strlen (str1) + 1);
- fflush (stderr_fd);
- /* Be careful, tcp_data() may have put this into TIME_WAIT. */
- if(sk->state != TCP_CLOSE) {
- tcp_data_snd_check(sk);
- tcp_ack_snd_check(sk);
- }
- write (stderr_fd, str2, strlen (str2) + 1);
- fflush (stderr_fd);
-
- if (!queued) {
- discard:
- kfree_skb(skb);
- }
-
- return 0;
-}
-
-/*
- * Process an incoming SYN or SYN-ACK for SYN_RECV sockets represented
- * as an open_request.
- */
-
-struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
- struct open_request *req)
-{
- struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
- u32 flg;
-
- /* assumption: the socket is not in use.
- * as we checked the user count on tcp_rcv and we're
- * running from a soft interrupt.
- */
-
- /* Check for syn retransmission */
- flg = *(((u32 *)skb->h.th) + 3);
-
- flg &= __constant_htonl(0x00170000);
- /* Only SYN set? */
- if (flg == __constant_htonl(0x00020000)) {
- if (TCP_SKB_CB(skb)->seq == req->rcv_isn) {
- /* retransmited syn.
- */
- req->class->rtx_syn_ack(sk, req);
- return NULL;
- } else {
- return sk; /* Pass new SYN to the listen socket. */
- }
- }
-
- /* We know it's an ACK here */
- if (req->sk) {
- /* socket already created but not
- * yet accepted()...
- */
- sk = req->sk;
- } else {
- /* In theory the packet could be for a cookie, but
- * TIME_WAIT should guard us against this.
- * XXX: Nevertheless check for cookies?
- * This sequence number check is done again later,
- * but we do it here to prevent syn flood attackers
- * from creating big SYN_RECV sockets.
- */
- if (!between(TCP_SKB_CB(skb)->ack_seq, req->snt_isn, req->snt_isn+1) ||
- !between(TCP_SKB_CB(skb)->seq, req->rcv_isn,
- req->rcv_isn+1+req->rcv_wnd)) {
- req->class->send_reset(skb);
- return NULL;
- }
-
- sk = tp->af_specific->syn_recv_sock(sk, skb, req, NULL);
- tcp_dec_slow_timer(TCP_SLT_SYNACK);
- if (sk == NULL)
- return NULL;
-
- req->expires = 0UL;
- req->sk = sk;
- }
- skb_orphan(skb);
- skb_set_owner_r(skb, sk);
- return sk;
-}
-
-/*
- * This function implements the receiving procedure of RFC 793 for
- * all states except ESTABLISHED and TIME_WAIT.
- * It's called from both tcp_v4_rcv and tcp_v6_rcv and should be
- * address independent.
- */
-
-int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
- struct tcphdr *th, unsigned len)
-{
- struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
- int queued = 0;
-
- switch (sk->state) {
- case TCP_CLOSE:
- /* When state == CLOSED, hash lookup always fails.
- *
- * But, there is a back door, the backlog queue.
- * If we have a sequence of packets in the backlog
- * during __release_sock() which have a sequence such
- * that:
- * packet X causes entry to TCP_CLOSE state
- * ...
- * packet X + N has FIN bit set
- *
- * We report a (luckily) harmless error in this case.
- * The issue is that backlog queue processing bypasses
- * any hash lookups (we know which socket packets are for).
- * The correct behavior here is what 2.0.x did, since
- * a TCP_CLOSE socket does not exist. Drop the frame
- * and send a RST back to the other end.
- */
- return 1;
-
- case TCP_LISTEN:
- /* These use the socket TOS..
- * might want to be the received TOS
- */
- if(th->ack)
- return 1;
-
- if(th->syn) {
- if(tp->af_specific->conn_request(sk, skb, 0) < 0)
- return 1;
-
- /* Now we have several options: In theory there is
- * nothing else in the frame. KA9Q has an option to
- * send data with the syn, BSD accepts data with the
- * syn up to the [to be] advertised window and
- * Solaris 2.1 gives you a protocol error. For now
- * we just ignore it, that fits the spec precisely
- * and avoids incompatibilities. It would be nice in
- * future to drop through and process the data.
- *
- * Now that TTCP is starting to be used we ought to
- * queue this data.
- * But, this leaves one open to an easy denial of
- * service attack, and SYN cookies can't defend
- * against this problem. So, we drop the data
- * in the interest of security over speed.
- */
- goto discard;
- }
-
- goto discard;
- break;
-
- case TCP_SYN_SENT:
- /* SYN sent means we have to look for a suitable ack and
- * either reset for bad matches or go to connected.
- * The SYN_SENT case is unusual and should
- * not be in line code. [AC]
- */
- if(th->ack) {
- /* rfc793:
- * "If the state is SYN-SENT then
- * first check the ACK bit
- * If the ACK bit is set
- * If SEG.ACK =< ISS, or SEG.ACK > SND.NXT, send
- * a reset (unless the RST bit is set, if so drop
- * the segment and return)"
- *
- * I cite this place to emphasize one essential
- * detail, this check is different of one
- * in established state: SND.UNA <= SEG.ACK <= SND.NXT.
- * SEG_ACK == SND.UNA == ISS is invalid in SYN-SENT,
- * because we have no previous data sent before SYN.
- * --ANK(990513)
- *
- * We do not send data with SYN, so that RFC-correct
- * test reduces to:
- */
- if (sk->zapped ||
- TCP_SKB_CB(skb)->ack_seq != tp->snd_nxt)
- return 1;
-
- /* Now ACK is acceptable.
- *
- * "If the RST bit is set
- * If the ACK was acceptable then signal the user "error:
- * connection reset", drop the segment, enter CLOSED state,
- * delete TCB, and return."
- */
-
- if (th->rst) {
- tcp_reset(sk);
- goto discard;
- }
-
- /* rfc793:
- * "fifth, if neither of the SYN or RST bits is set then
- * drop the segment and return."
- *
- * See note below!
- * --ANK(990513)
- */
-
- if (!th->syn)
- goto discard;
-
- /* rfc793:
- * "If the SYN bit is on ...
- * are acceptable then ...
- * (our SYN has been ACKed), change the connection
- * state to ESTABLISHED..."
- *
- * Do you see? SYN-less ACKs in SYN-SENT state are
- * completely ignored.
- *
- * The bug causing stalled SYN-SENT sockets
- * was here: tcp_ack advanced snd_una and canceled
- * retransmit timer, so that bare ACK received
- * in SYN-SENT state (even with invalid ack==ISS,
- * because tcp_ack check is too weak for SYN-SENT)
- * causes moving socket to invalid semi-SYN-SENT,
- * semi-ESTABLISHED state and connection hangs.
- *
- * There exist buggy stacks, which really send
- * such ACKs: f.e. 202.226.91.94 (okigate.oki.co.jp)
- * Actually, if this host did not try to get something
- * from ftp.inr.ac.ru I'd never find this bug 8)
- *
- * --ANK (990514)
- */
-
- tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
- tcp_ack(sk,th, TCP_SKB_CB(skb)->seq,
- TCP_SKB_CB(skb)->ack_seq, len);
-
- /* Ok.. it's good. Set up sequence numbers and
- * move to established.
- */
- tp->rcv_nxt = TCP_SKB_CB(skb)->seq+1;
- tp->rcv_wup = TCP_SKB_CB(skb)->seq+1;
-
- /* RFC1323: The window in SYN & SYN/ACK segments is
- * never scaled.
- */
- tp->snd_wnd = htons(th->window);
- tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
- tp->snd_wl2 = TCP_SKB_CB(skb)->ack_seq;
- tp->fin_seq = TCP_SKB_CB(skb)->seq;
-
- tcp_set_state(sk, TCP_ESTABLISHED);
- tcp_parse_options(sk, th, tp, 0);
-
- if (tp->wscale_ok == 0) {
- tp->snd_wscale = tp->rcv_wscale = 0;
- tp->window_clamp = min(tp->window_clamp,65535);
- }
-
- if (tp->tstamp_ok) {
- tp->tcp_header_len =
- sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
- } else
- tp->tcp_header_len = sizeof(struct tcphdr);
- if (tp->saw_tstamp) {
- tp->ts_recent = tp->rcv_tsval;
- tp->ts_recent_stamp = tcp_time_stamp;
- }
-
- /* Can't be earlier, doff would be wrong. */
- tcp_send_ack(sk);
-
- sk->dport = th->source;
- tp->copied_seq = tp->rcv_nxt;
-
- if(!sk->dead) {
- sk->state_change(sk);
- sock_wake_async(sk->socket, 0);
- }
- } else {
- if(th->syn && !th->rst) {
- /* The previous version of the code
- * checked for "connecting to self"
- * here. that check is done now in
- * tcp_connect.
- */
- tcp_set_state(sk, TCP_SYN_RECV);
- tcp_parse_options(sk, th, tp, 0);
- if (tp->saw_tstamp) {
- tp->ts_recent = tp->rcv_tsval;
- tp->ts_recent_stamp = tcp_time_stamp;
- }
-
- tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
- tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
-
- /* RFC1323: The window in SYN & SYN/ACK segments is
- * never scaled.
- */
- tp->snd_wnd = htons(th->window);
- tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
-
- tcp_send_synack(sk);
- } else
- break;
- }
-
- /* tp->tcp_header_len and tp->mss_clamp
- probably changed, synchronize mss.
- */
- tcp_sync_mss(sk, tp->pmtu_cookie);
- tp->rcv_mss = tp->mss_cache;
-
- if (sk->state == TCP_SYN_RECV)
- goto discard;
-
- goto step6;
- }
-
- /* Parse the tcp_options present on this header.
- * By this point we really only expect timestamps.
- * Note that this really has to be here and not later for PAWS
- * (RFC1323) to work.
- */
- if (tcp_fast_parse_options(sk, th, tp)) {
- /* NOTE: assumes saw_tstamp is never set if we didn't
- * negotiate the option. tcp_fast_parse_options() must
- * guarantee this.
- */
- if (tp->saw_tstamp) {
- if (tcp_paws_discard(tp, th, len)) {
- tcp_statistics.TcpInErrs++;
- if (!th->rst) {
- tcp_send_ack(sk);
- goto discard;
- }
- }
- tcp_replace_ts_recent(sk, tp,
- TCP_SKB_CB(skb)->seq,
- TCP_SKB_CB(skb)->end_seq);
- }
- }
-
- /* The silly FIN test here is necessary to see an advancing ACK in
- * retransmitted FIN frames properly. Consider the following sequence:
- *
- * host1 --> host2 FIN XSEQ:XSEQ(0) ack YSEQ
- * host2 --> host1 FIN YSEQ:YSEQ(0) ack XSEQ
- * host1 --> host2 XSEQ:XSEQ(0) ack YSEQ+1
- * host2 --> host1 FIN YSEQ:YSEQ(0) ack XSEQ+1 (fails tcp_sequence test)
- *
- * At this point the connection will deadlock with host1 believing
- * that his FIN is never ACK'd, and thus it will retransmit it's FIN
- * forever. The following fix is from Taral (taral@taral.net).
- */
-
- /* step 1: check sequence number */
- if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq) &&
- !(th->fin && TCP_SKB_CB(skb)->end_seq == tp->rcv_nxt)) {
- if (!th->rst) {
- tcp_send_ack(sk);
- }
- goto discard;
- }
-
- /* step 2: check RST bit */
- if(th->rst) {
- tcp_reset(sk);
- goto discard;
- }
-
- /* step 3: check security and precedence [ignored] */
-
- /* step 4:
- *
- * Check for a SYN, and ensure it matches the SYN we were
- * first sent. We have to handle the rather unusual (but valid)
- * sequence that KA9Q derived products may generate of
- *
- * SYN
- * SYN|ACK Data
- * ACK (lost)
- * SYN|ACK Data + More Data
- * .. we must ACK not RST...
- *
- * We keep syn_seq as the sequence space occupied by the
- * original syn.
- */
-
- if (th->syn && TCP_SKB_CB(skb)->seq != tp->syn_seq) {
- tcp_reset(sk);
- return 1;
- }
-
- /* step 5: check the ACK field */
- if (th->ack) {
- int acceptable = tcp_ack(sk, th, TCP_SKB_CB(skb)->seq,
- TCP_SKB_CB(skb)->ack_seq, len);
-
- switch(sk->state) {
- case TCP_SYN_RECV:
- if (acceptable) {
- tcp_set_state(sk, TCP_ESTABLISHED);
- sk->dport = th->source;
- tp->copied_seq = tp->rcv_nxt;
-
- if(!sk->dead)
- sk->state_change(sk);
-
- tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
- tp->snd_wnd = htons(th->window) << tp->snd_wscale;
- tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
- tp->snd_wl2 = TCP_SKB_CB(skb)->ack_seq;
-
- } else {
- SOCK_DEBUG(sk, "bad ack\n");
- return 1;
- }
- break;
-
- case TCP_FIN_WAIT1:
- if (tp->snd_una == tp->write_seq) {
- sk->shutdown |= SEND_SHUTDOWN;
- tcp_set_state(sk, TCP_FIN_WAIT2);
- if (!sk->dead)
- sk->state_change(sk);
- else
- tcp_reset_msl_timer(sk, TIME_CLOSE, sysctl_tcp_fin_timeout);
- }
- break;
-
- case TCP_CLOSING:
- if (tp->snd_una == tp->write_seq) {
- tcp_time_wait(sk);
- goto discard;
- }
- break;
-
- case TCP_LAST_ACK:
- if (tp->snd_una == tp->write_seq) {
- sk->shutdown = SHUTDOWN_MASK;
- tcp_set_state(sk,TCP_CLOSE);
- if (!sk->dead)
- sk->state_change(sk);
- goto discard;
- }
- break;
- }
- } else
- goto discard;
-
-step6:
- /* step 6: check the URG bit */
- tcp_urg(sk, th, len);
-
- /* step 7: process the segment text */
- switch (sk->state) {
- case TCP_CLOSE_WAIT:
- case TCP_CLOSING:
- if (!before(TCP_SKB_CB(skb)->seq, tp->fin_seq))
- break;
-
- case TCP_FIN_WAIT1:
- case TCP_FIN_WAIT2:
- /* RFC 793 says to queue data in these states,
- * RFC 1122 says we MUST send a reset.
- * BSD 4.4 also does reset.
- */
- if ((sk->shutdown & RCV_SHUTDOWN) && sk->dead) {
- if (after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
- tcp_reset(sk);
- return 1;
- }
- }
-
- case TCP_ESTABLISHED:
- queued = tcp_data(skb, sk, len);
-
- /* This must be after tcp_data() does the skb_pull() to
- * remove the header size from skb->len.
- */
- tcp_measure_rcv_mss(sk, skb);
- break;
- }
-
- tcp_data_snd_check(sk);
- tcp_ack_snd_check(sk);
-
- if (!queued) {
-discard:
- kfree_skb(skb);
- }
- return 0;
-}
diff --git a/pfinet.old/linux-src/net/ipv4/tcp_output.c~ b/pfinet.old/linux-src/net/ipv4/tcp_output.c~
deleted file mode 100644
index df6d48f2..00000000
--- a/pfinet.old/linux-src/net/ipv4/tcp_output.c~
+++ /dev/null
@@ -1,1150 +0,0 @@
-/*
- * INET An implementation of the TCP/IP protocol suite for the LINUX
- * operating system. INET is implemented using the BSD Socket
- * interface as the means of communication with the user level.
- *
- * Implementation of the Transmission Control Protocol(TCP).
- *
- * Version: $Id: tcp_output.c,v 1.108.2.1 1999/05/14 23:07:36 davem Exp $
- *
- * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
- * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
- * Mark Evans, <evansmp@uhura.aston.ac.uk>
- * Corey Minyard <wf-rch!minyard@relay.EU.net>
- * Florian La Roche, <flla@stud.uni-sb.de>
- * Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
- * Linus Torvalds, <torvalds@cs.helsinki.fi>
- * Alan Cox, <gw4pts@gw4pts.ampr.org>
- * Matthew Dillon, <dillon@apollo.west.oic.com>
- * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
- * Jorge Cwik, <jorge@laser.satlink.net>
- */
-
-/*
- * Changes: Pedro Roque : Retransmit queue handled by TCP.
- * : Fragmentation on mtu decrease
- * : Segment collapse on retransmit
- * : AF independence
- *
- * Linus Torvalds : send_delayed_ack
- * David S. Miller : Charge memory using the right skb
- * during syn/ack processing.
- * David S. Miller : Output engine completely rewritten.
- * Andrea Arcangeli: SYNACK carry ts_recent in tsecr.
- *
- */
-
-#include <net/tcp.h>
-
-extern int sysctl_tcp_timestamps;
-extern int sysctl_tcp_window_scaling;
-extern int sysctl_tcp_sack;
-
-/* People can turn this off for buggy TCP's found in printers etc. */
-int sysctl_tcp_retrans_collapse = 1;
-
-/* Get rid of any delayed acks, we sent one already.. */
-static __inline__ void clear_delayed_acks(struct sock * sk)
-{
- struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
-
- tp->delayed_acks = 0;
- if(tcp_in_quickack_mode(tp))
- tcp_exit_quickack_mode(tp);
- tcp_clear_xmit_timer(sk, TIME_DACK);
-}
-
-static __inline__ void update_send_head(struct sock *sk)
-{
- struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
-
- tp->send_head = tp->send_head->next;
- if (tp->send_head == (struct sk_buff *) &sk->write_queue)
- tp->send_head = NULL;
-}
-
-/* This routine actually transmits TCP packets queued in by
- * tcp_do_sendmsg(). This is used by both the initial
- * transmission and possible later retransmissions.
- * All SKB's seen here are completely headerless. It is our
- * job to build the TCP header, and pass the packet down to
- * IP so it can do the same plus pass the packet off to the
- * device.
- *
- * We are working here with either a clone of the original
- * SKB, or a fresh unique copy made by the retransmit engine.
- */
-void tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
-{
- if(skb != NULL) {
- struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
- struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
- int tcp_header_size = tp->tcp_header_len;
- struct tcphdr *th;
- int sysctl_flags;
-
-#define SYSCTL_FLAG_TSTAMPS 0x1
-#define SYSCTL_FLAG_WSCALE 0x2
-#define SYSCTL_FLAG_SACK 0x4
-
- sysctl_flags = 0;
- if(tcb->flags & TCPCB_FLAG_SYN) {
- tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS;
- if(sysctl_tcp_timestamps) {
- tcp_header_size += TCPOLEN_TSTAMP_ALIGNED;
- sysctl_flags |= SYSCTL_FLAG_TSTAMPS;
- }
- if(sysctl_tcp_window_scaling) {
- tcp_header_size += TCPOLEN_WSCALE_ALIGNED;
- sysctl_flags |= SYSCTL_FLAG_WSCALE;
- }
- if(sysctl_tcp_sack) {
- sysctl_flags |= SYSCTL_FLAG_SACK;
- if(!(sysctl_flags & SYSCTL_FLAG_TSTAMPS))
- tcp_header_size += TCPOLEN_SACKPERM_ALIGNED;
- }
- } else if(tp->sack_ok && tp->num_sacks) {
- /* A SACK is 2 pad bytes, a 2 byte header, plus
- * 2 32-bit sequence numbers for each SACK block.
- */
- tcp_header_size += (TCPOLEN_SACK_BASE_ALIGNED +
- (tp->num_sacks * TCPOLEN_SACK_PERBLOCK));
- }
- th = (struct tcphdr *) skb_push(skb, tcp_header_size);
- skb->h.th = th;
- skb_set_owner_w(skb, sk);
-
- /* Build TCP header and checksum it. */
- th->source = sk->sport;
- th->dest = sk->dport;
- th->seq = htonl(TCP_SKB_CB(skb)->seq);
- th->ack_seq = htonl(tp->rcv_nxt);
- th->doff = (tcp_header_size >> 2);
- th->res1 = 0;
- *(((__u8 *)th) + 13) = tcb->flags;
- if(!(tcb->flags & TCPCB_FLAG_SYN))
- th->window = htons(tcp_select_window(sk));
- th->check = 0;
- th->urg_ptr = ntohs(tcb->urg_ptr);
- if(tcb->flags & TCPCB_FLAG_SYN) {
- /* RFC1323: The window in SYN & SYN/ACK segments
- * is never scaled.
- */
- th->window = htons(tp->rcv_wnd);
- tcp_syn_build_options((__u32 *)(th + 1), tp->mss_clamp,
- (sysctl_flags & SYSCTL_FLAG_TSTAMPS),
- (sysctl_flags & SYSCTL_FLAG_SACK),
- (sysctl_flags & SYSCTL_FLAG_WSCALE),
- tp->rcv_wscale,
- TCP_SKB_CB(skb)->when,
- tp->ts_recent);
- } else {
- tcp_build_and_update_options((__u32 *)(th + 1),
- tp, TCP_SKB_CB(skb)->when);
- }
- tp->af_specific->send_check(sk, th, skb->len, skb);
-
- clear_delayed_acks(sk);
- tp->last_ack_sent = tp->rcv_nxt;
- tcp_statistics.TcpOutSegs++;
- tp->af_specific->queue_xmit(skb);
- }
-#undef SYSCTL_FLAG_TSTAMPS
-#undef SYSCTL_FLAG_WSCALE
-#undef SYSCTL_FLAG_SACK
-}
-
-/* This is the main buffer sending routine. We queue the buffer
- * and decide whether to queue or transmit now.
- */
-void tcp_send_skb(struct sock *sk, struct sk_buff *skb, int force_queue)
-{
- struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
-
- /* Advance write_seq and place onto the write_queue. */
- tp->write_seq += (TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq);
- __skb_queue_tail(&sk->write_queue, skb);
-
- if (!force_queue && tp->send_head == NULL && tcp_snd_test(sk, skb)) {
- /* Send it out now. */
- TCP_SKB_CB(skb)->when = tcp_time_stamp;
- tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
- tp->packets_out++;
- tcp_transmit_skb(sk, skb_clone(skb, GFP_KERNEL));
- if(!tcp_timer_is_set(sk, TIME_RETRANS))
- tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
- } else {
- /* Queue it, remembering where we must start sending. */
- if (tp->send_head == NULL)
- tp->send_head = skb;
- if (!force_queue && tp->packets_out == 0 && !tp->pending) {
- tp->pending = TIME_PROBE0;
- tcp_reset_xmit_timer(sk, TIME_PROBE0, tp->rto);
- }
- }
-}
-
-/* Function to create two new TCP segments. Shrinks the given segment
- * to the specified size and appends a new segment with the rest of the
- * packet to the list. This won't be called frequently, I hope.
- * Remember, these are still headerless SKBs at this point.
- */
-static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len)
-{
- struct sk_buff *buff;
- int nsize = skb->len - len;
- u16 flags;
-
- /* Get a new skb... force flag on. */
- buff = sock_wmalloc(sk,
- (nsize + MAX_HEADER + sk->prot->max_header),
- 1, GFP_ATOMIC);
- if (buff == NULL)
- return -1; /* We'll just try again later. */
-
- /* Reserve space for headers. */
- skb_reserve(buff, MAX_HEADER + sk->prot->max_header);
-
- /* Correct the sequence numbers. */
- TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
- TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
-
- /* PSH and FIN should only be set in the second packet. */
- flags = TCP_SKB_CB(skb)->flags;
- TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN | TCPCB_FLAG_PSH);
- if(flags & TCPCB_FLAG_URG) {
- u16 old_urg_ptr = TCP_SKB_CB(skb)->urg_ptr;
-
- /* Urgent data is always a pain in the ass. */
- if(old_urg_ptr > len) {
- TCP_SKB_CB(skb)->flags &= ~(TCPCB_FLAG_URG);
- TCP_SKB_CB(skb)->urg_ptr = 0;
- TCP_SKB_CB(buff)->urg_ptr = old_urg_ptr - len;
- } else {
- flags &= ~(TCPCB_FLAG_URG);
- }
- }
- if(!(flags & TCPCB_FLAG_URG))
- TCP_SKB_CB(buff)->urg_ptr = 0;
- TCP_SKB_CB(buff)->flags = flags;
- TCP_SKB_CB(buff)->sacked = 0;
-
- /* Copy and checksum data tail into the new buffer. */
- buff->csum = csum_partial_copy(skb->data + len, skb_put(buff, nsize),
- nsize, 0);
-
- /* This takes care of the FIN sequence number too. */
- TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
- skb_trim(skb, len);
-
- /* Rechecksum original buffer. */
- skb->csum = csum_partial(skb->data, skb->len, 0);
-
- /* Looks stupid, but our code really uses when of
- * skbs, which it never sent before. --ANK
- */
- TCP_SKB_CB(buff)->when = TCP_SKB_CB(skb)->when;
-
- /* Link BUFF into the send queue. */
- __skb_append(skb, buff);
-
- return 0;
-}
-
-/* This function synchronize snd mss to current pmtu/exthdr set.
-
- tp->user_mss is mss set by user by TCP_MAXSEG. It does NOT counts
- for TCP options, but includes only bare TCP header.
-
- tp->mss_clamp is mss negotiated at connection setup.
- It is minumum of user_mss and mss received with SYN.
- It also does not include TCP options.
-
- tp->pmtu_cookie is last pmtu, seen by this function.
-
- tp->mss_cache is current effective sending mss, including
- all tcp options except for SACKs. It is evaluated,
- taking into account current pmtu, but never exceeds
- tp->mss_clamp.
-
- NOTE1. rfc1122 clearly states that advertised MSS
- DOES NOT include either tcp or ip options.
-
- NOTE2. tp->pmtu_cookie and tp->mss_cache are READ ONLY outside
- this function. --ANK (980731)
- */
-
-int tcp_sync_mss(struct sock *sk, u32 pmtu)
-{
- struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
- int mss_now;
-
- /* Calculate base mss without TCP options:
- It is MMS_S - sizeof(tcphdr) of rfc1122
- */
- mss_now = pmtu - tp->af_specific->net_header_len - sizeof(struct tcphdr);
-
- /* Clamp it (mss_clamp does not include tcp options) */
- if (mss_now > tp->mss_clamp)
- mss_now = tp->mss_clamp;
-
- /* Now subtract TCP options size, not including SACKs */
- mss_now -= tp->tcp_header_len - sizeof(struct tcphdr);
-
- /* Now subtract optional transport overhead */
- mss_now -= tp->ext_header_len;
-
- /* It we got too small (or even negative) value,
- clamp it by 8 from below. Why 8 ?
- Well, it could be 1 with the same success,
- but if IP accepted segment of length 1,
- it would love 8 even more 8) --ANK (980731)
- */
- if (mss_now < 8)
- mss_now = 8;
-
- /* And store cached results */
- tp->pmtu_cookie = pmtu;
- tp->mss_cache = mss_now;
- return mss_now;
-}
-
-
-/* This routine writes packets to the network. It advances the
- * send_head. This happens as incoming acks open up the remote
- * window for us.
- */
-void tcp_write_xmit(struct sock *sk)
-{
- struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
- unsigned int mss_now;
-
- /* Account for SACKS, we may need to fragment due to this.
- * It is just like the real MSS changing on us midstream.
- * We also handle things correctly when the user adds some
- * IP options mid-stream. Silly to do, but cover it.
- */
- mss_now = tcp_current_mss(sk);
-
- /* If we are zapped, the bytes will have to remain here.
- * In time closedown will empty the write queue and all
- * will be happy.
- */
- if(!sk->zapped) {
- struct sk_buff *skb;
- int sent_pkts = 0;
-
- /* Anything on the transmit queue that fits the window can
- * be added providing we are:
- *
- * a) following SWS avoidance [and Nagle algorithm]
- * b) not exceeding our congestion window.
- * c) not retransmitting [Nagle]
- */
- while((skb = tp->send_head) && tcp_snd_test(sk, skb)) {
- if (skb->len > mss_now) {
- if (tcp_fragment(sk, skb, mss_now))
- break;
- }
-
- /* Advance the send_head. This one is going out. */
- update_send_head(sk);
- TCP_SKB_CB(skb)->when = tcp_time_stamp;
- tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
- tp->packets_out++;
- tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC));
- sent_pkts = 1;
- }
-
- /* If we sent anything, make sure the retransmit
- * timer is active.
- */
- if (sent_pkts && !tcp_timer_is_set(sk, TIME_RETRANS))
- tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
- }
-}
-
-/* This function returns the amount that we can raise the
- * usable window based on the following constraints
- *
- * 1. The window can never be shrunk once it is offered (RFC 793)
- * 2. We limit memory per socket
- *
- * RFC 1122:
- * "the suggested [SWS] avoidance algorithm for the receiver is to keep
- * RECV.NEXT + RCV.WIN fixed until:
- * RCV.BUFF - RCV.USER - RCV.WINDOW >= min(1/2 RCV.BUFF, MSS)"
- *
- * i.e. don't raise the right edge of the window until you can raise
- * it at least MSS bytes.
- *
- * Unfortunately, the recommended algorithm breaks header prediction,
- * since header prediction assumes th->window stays fixed.
- *
- * Strictly speaking, keeping th->window fixed violates the receiver
- * side SWS prevention criteria. The problem is that under this rule
- * a stream of single byte packets will cause the right side of the
- * window to always advance by a single byte.
- *
- * Of course, if the sender implements sender side SWS prevention
- * then this will not be a problem.
- *
- * BSD seems to make the following compromise:
- *
- * If the free space is less than the 1/4 of the maximum
- * space available and the free space is less than 1/2 mss,
- * then set the window to 0.
- * Otherwise, just prevent the window from shrinking
- * and from being larger than the largest representable value.
- *
- * This prevents incremental opening of the window in the regime
- * where TCP is limited by the speed of the reader side taking
- * data out of the TCP receive queue. It does nothing about
- * those cases where the window is constrained on the sender side
- * because the pipeline is full.
- *
- * BSD also seems to "accidentally" limit itself to windows that are a
- * multiple of MSS, at least until the free space gets quite small.
- * This would appear to be a side effect of the mbuf implementation.
- * Combining these two algorithms results in the observed behavior
- * of having a fixed window size at almost all times.
- *
- * Below we obtain similar behavior by forcing the offered window to
- * a multiple of the mss when it is feasible to do so.
- *
- * Note, we don't "adjust" for TIMESTAMP or SACK option bytes.
- */
-u32 __tcp_select_window(struct sock *sk)
-{
- struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
- unsigned int mss = tp->mss_cache;
- int free_space;
- u32 window;
-
- /* Sometimes free_space can be < 0. */
- free_space = (sk->rcvbuf - atomic_read(&sk->rmem_alloc)) / 2;
- if (tp->window_clamp) {
- if (free_space > ((int) tp->window_clamp))
- free_space = tp->window_clamp;
- mss = min(tp->window_clamp, mss);
- } else {
- printk("tcp_select_window: tp->window_clamp == 0.\n");
- }
-
- if (mss < 1) {
- mss = 1;
- printk("tcp_select_window: sk->mss fell to 0.\n");
- }
-
- if ((free_space < (sk->rcvbuf/4)) && (free_space < ((int) (mss/2)))) {
- window = 0;
- tp->pred_flags = 0;
- } else {
- /* Get the largest window that is a nice multiple of mss.
- * Window clamp already applied above.
- * If our current window offering is within 1 mss of the
- * free space we just keep it. This prevents the divide
- * and multiply from happening most of the time.
- * We also don't do any window rounding when the free space
- * is too small.
- */
- window = tp->rcv_wnd;
- if ((((int) window) <= (free_space - ((int) mss))) ||
- (((int) window) > free_space))
- window = (((unsigned int) free_space)/mss)*mss;
- }
- return window;
-}
-
-/* Attempt to collapse two adjacent SKB's during retransmission. */
-static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int mss_now)
-{
- struct sk_buff *next_skb = skb->next;
-
- /* The first test we must make is that neither of these two
- * SKB's are still referenced by someone else.
- */
- if(!skb_cloned(skb) && !skb_cloned(next_skb)) {
- int skb_size = skb->len, next_skb_size = next_skb->len;
- u16 flags = TCP_SKB_CB(skb)->flags;
-
- /* Punt if the first SKB has URG set. */
- if(flags & TCPCB_FLAG_URG)
- return;
-
- /* Also punt if next skb has been SACK'd. */
- if(TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_ACKED)
- return;
-
- /* Punt if not enough space exists in the first SKB for
- * the data in the second, or the total combined payload
- * would exceed the MSS.
- */
- if ((next_skb_size > skb_tailroom(skb)) ||
- ((skb_size + next_skb_size) > mss_now))
- return;
-
- /* Ok. We will be able to collapse the packet. */
- __skb_unlink(next_skb, next_skb->list);
-
- if(skb->len % 4) {
- /* Must copy and rechecksum all data. */
- memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size);
- skb->csum = csum_partial(skb->data, skb->len, 0);
- } else {
- /* Optimize, actually we could also combine next_skb->csum
- * to skb->csum using a single add w/carry operation too.
- */
- skb->csum = csum_partial_copy(next_skb->data,
- skb_put(skb, next_skb_size),
- next_skb_size, skb->csum);
- }
-
- /* Update sequence range on original skb. */
- TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
-
- /* Merge over control information. */
- flags |= TCP_SKB_CB(next_skb)->flags; /* This moves PSH/FIN etc. over */
- if(flags & TCPCB_FLAG_URG) {
- u16 urgptr = TCP_SKB_CB(next_skb)->urg_ptr;
- TCP_SKB_CB(skb)->urg_ptr = urgptr + skb_size;
- }
- TCP_SKB_CB(skb)->flags = flags;
-
- /* All done, get rid of second SKB and account for it so
- * packet counting does not break.
- */
- kfree_skb(next_skb);
- sk->tp_pinfo.af_tcp.packets_out--;
- }
-}
-
-/* Do a simple retransmit without using the backoff mechanisms in
- * tcp_timer. This is used for path mtu discovery.
- * The socket is already locked here.
- */
-void tcp_simple_retransmit(struct sock *sk)
-{
- struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
- struct sk_buff *skb, *old_next_skb;
- unsigned int mss = tcp_current_mss(sk);
-
- /* Don't muck with the congestion window here. */
- tp->dup_acks = 0;
- tp->high_seq = tp->snd_nxt;
- tp->retrans_head = NULL;
-
- /* Input control flow will see that this was retransmitted
- * and not use it for RTT calculation in the absence of
- * the timestamp option.
- */
- for (old_next_skb = skb = skb_peek(&sk->write_queue);
- ((skb != tp->send_head) &&
- (skb != (struct sk_buff *)&sk->write_queue));
- skb = skb->next) {
- int resend_skb = 0;
-
- /* Our goal is to push out the packets which we
- * sent already, but are being chopped up now to
- * account for the PMTU information we have.
- *
- * As we resend the queue, packets are fragmented
- * into two pieces, and when we try to send the
- * second piece it may be collapsed together with
- * a subsequent packet, and so on. -DaveM
- */
- if (old_next_skb != skb || skb->len > mss)
- resend_skb = 1;
- old_next_skb = skb->next;
- if (resend_skb != 0)
- tcp_retransmit_skb(sk, skb);
- }
-}
-
-static __inline__ void update_retrans_head(struct sock *sk)
-{
- struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
-
- tp->retrans_head = tp->retrans_head->next;
- if((tp->retrans_head == tp->send_head) ||
- (tp->retrans_head == (struct sk_buff *) &sk->write_queue)) {
- tp->retrans_head = NULL;
- tp->rexmt_done = 1;
- }
-}
-
-/* This retransmits one SKB. Policy decisions and retransmit queue
- * state updates are done by the caller. Returns non-zero if an
- * error occurred which prevented the send.
- */
-int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
-{
- struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
- unsigned int cur_mss = tcp_current_mss(sk);
-
- if(skb->len > cur_mss) {
- if(tcp_fragment(sk, skb, cur_mss))
- return 1; /* We'll try again later. */
-
- /* New SKB created, account for it. */
- tp->packets_out++;
- }
-
- /* Collapse two adjacent packets if worthwhile and we can. */
- if(!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) &&
- (skb->len < (cur_mss >> 1)) &&
- (skb->next != tp->send_head) &&
- (skb->next != (struct sk_buff *)&sk->write_queue) &&
- (sysctl_tcp_retrans_collapse != 0))
- tcp_retrans_try_collapse(sk, skb, cur_mss);
-
- if(tp->af_specific->rebuild_header(sk))
- return 1; /* Routing failure or similar. */
-
- /* Some Solaris stacks overoptimize and ignore the FIN on a
- * retransmit when old data is attached. So strip it off
- * since it is cheap to do so and saves bytes on the network.
- */
- if(skb->len > 0 &&
- (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&
- tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
- TCP_SKB_CB(skb)->seq = TCP_SKB_CB(skb)->end_seq - 1;
- skb_trim(skb, 0);
- skb->csum = 0;
- }
-
- /* Ok, we're gonna send it out, update state. */
- TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_RETRANS;
- tp->retrans_out++;
-
- /* Make a copy, if the first transmission SKB clone we made
- * is still in somebody's hands, else make a clone.
- */
- TCP_SKB_CB(skb)->when = tcp_time_stamp;
- if(skb_cloned(skb))
- skb = skb_copy(skb, GFP_ATOMIC);
- else
- skb = skb_clone(skb, GFP_ATOMIC);
-
- tcp_transmit_skb(sk, skb);
-
- /* Update global TCP statistics and return success. */
- sk->prot->retransmits++;
- tcp_statistics.TcpRetransSegs++;
-
- return 0;
-}
-
-/* This gets called after a retransmit timeout, and the initially
- * retransmitted data is acknowledged. It tries to continue
- * resending the rest of the retransmit queue, until either
- * we've sent it all or the congestion window limit is reached.
- * If doing SACK, the first ACK which comes back for a timeout
- * based retransmit packet might feed us FACK information again.
- * If so, we use it to avoid unnecessarily retransmissions.
- */
-void tcp_xmit_retransmit_queue(struct sock *sk)
-{
- struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
- struct sk_buff *skb;
-
- if (tp->retrans_head == NULL &&
- tp->rexmt_done == 0)
- tp->retrans_head = skb_peek(&sk->write_queue);
- if (tp->retrans_head == tp->send_head)
- tp->retrans_head = NULL;
-
- /* Each time, advance the retrans_head if we got
- * a packet out or we skipped one because it was
- * SACK'd. -DaveM
- */
- while ((skb = tp->retrans_head) != NULL) {
- /* If it has been ack'd by a SACK block, we don't
- * retransmit it.
- */
- if(!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
- /* Send it out, punt if error occurred. */
- if(tcp_retransmit_skb(sk, skb))
- break;
-
- update_retrans_head(sk);
-
- /* Stop retransmitting if we've hit the congestion
- * window limit.
- */
- if (tp->retrans_out >= tp->snd_cwnd)
- break;
- } else {
- update_retrans_head(sk);
- }
- }
-}
-
-/* Using FACK information, retransmit all missing frames at the receiver
- * up to the forward most SACK'd packet (tp->fackets_out) if the packet
- * has not been retransmitted already.
- */
-void tcp_fack_retransmit(struct sock *sk)
-{
- struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
- struct sk_buff *skb = skb_peek(&sk->write_queue);
- int packet_cnt = 0;
-
- while((skb != NULL) &&
- (skb != tp->send_head) &&
- (skb != (struct sk_buff *)&sk->write_queue)) {
- __u8 sacked = TCP_SKB_CB(skb)->sacked;
-
- if(sacked & (TCPCB_SACKED_ACKED | TCPCB_SACKED_RETRANS))
- goto next_packet;
-
- /* Ok, retransmit it. */
- if(tcp_retransmit_skb(sk, skb))
- break;
-
- if(tcp_packets_in_flight(tp) >= tp->snd_cwnd)
- break;
-next_packet:
- packet_cnt++;
- if(packet_cnt >= tp->fackets_out)
- break;
- skb = skb->next;
- }
-}
-
-/* Send a fin. The caller locks the socket for us. This cannot be
- * allowed to fail queueing a FIN frame under any circumstances.
- */
-void tcp_send_fin(struct sock *sk)
-{
- struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
- struct sk_buff *skb = skb_peek_tail(&sk->write_queue);
- unsigned int mss_now;
-
- /* Optimization, tack on the FIN if we have a queue of
- * unsent frames. But be careful about outgoing SACKS
- * and IP options.
- */
- mss_now = tcp_current_mss(sk);
-
- if((tp->send_head != NULL) && (skb->len < mss_now)) {
- /* tcp_write_xmit() takes care of the rest. */
- TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN;
- TCP_SKB_CB(skb)->end_seq++;
- tp->write_seq++;
-
- /* Special case to avoid Nagle bogosity. If this
- * segment is the last segment, and it was queued
- * due to Nagle/SWS-avoidance, send it out now.
- */
- if(tp->send_head == skb &&
- !sk->nonagle &&
- skb->len < (tp->mss_cache >> 1) &&
- tp->packets_out &&
- !(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_URG)) {
- update_send_head(sk);
- TCP_SKB_CB(skb)->when = tcp_time_stamp;
- tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
- tp->packets_out++;
- tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC));
- if(!tcp_timer_is_set(sk, TIME_RETRANS))
- tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
- }
- } else {
- /* Socket is locked, keep trying until memory is available. */
- do {
- skb = sock_wmalloc(sk,
- (MAX_HEADER +
- sk->prot->max_header),
- 1, GFP_KERNEL);
- } while (skb == NULL);
-
- /* Reserve space for headers and prepare control bits. */
- skb_reserve(skb, MAX_HEADER + sk->prot->max_header);
- skb->csum = 0;
- TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_FIN);
- TCP_SKB_CB(skb)->sacked = 0;
- TCP_SKB_CB(skb)->urg_ptr = 0;
-
- /* FIN eats a sequence byte, write_seq advanced by tcp_send_skb(). */
- TCP_SKB_CB(skb)->seq = tp->write_seq;
- TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;
- tcp_send_skb(sk, skb, 0);
- }
-}
-
-/* We get here when a process closes a file descriptor (either due to
- * an explicit close() or as a byproduct of exit()'ing) and there
- * was unread data in the receive queue. This behavior is recommended
- * by draft-ietf-tcpimpl-prob-03.txt section 3.10. -DaveM
- */
-void tcp_send_active_reset(struct sock *sk)
-{
- struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
- struct sk_buff *skb;
-
- /* NOTE: No TCP options attached and we never retransmit this. */
- skb = alloc_skb(MAX_HEADER + sk->prot->max_header, GFP_KERNEL);
- if (!skb)
- return;
-
- /* Reserve space for headers and prepare control bits. */
- skb_reserve(skb, MAX_HEADER + sk->prot->max_header);
- skb->csum = 0;
- TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_RST);
- TCP_SKB_CB(skb)->sacked = 0;
- TCP_SKB_CB(skb)->urg_ptr = 0;
-
- /* Send it off. */
- TCP_SKB_CB(skb)->seq = tp->write_seq;
- TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
- TCP_SKB_CB(skb)->when = tcp_time_stamp;
- tcp_transmit_skb(sk, skb);
-}
-
-/* WARNING: This routine must only be called when we have already sent
- * a SYN packet that crossed the incoming SYN that caused this routine
- * to get called. If this assumption fails then the initial rcv_wnd
- * and rcv_wscale values will not be correct.
- */
-int tcp_send_synack(struct sock *sk)
-{
- struct tcp_opt* tp = &(sk->tp_pinfo.af_tcp);
- struct sk_buff* skb;
-
- skb = sock_wmalloc(sk, (MAX_HEADER + sk->prot->max_header),
- 1, GFP_ATOMIC);
- if (skb == NULL)
- return -ENOMEM;
-
- /* Reserve space for headers and prepare control bits. */
- skb_reserve(skb, MAX_HEADER + sk->prot->max_header);
- skb->csum = 0;
- TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_SYN);
- TCP_SKB_CB(skb)->sacked = 0;
- TCP_SKB_CB(skb)->urg_ptr = 0;
-
- /* SYN eats a sequence byte. */
- TCP_SKB_CB(skb)->seq = tp->snd_una;
- TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;
- __skb_queue_tail(&sk->write_queue, skb);
- TCP_SKB_CB(skb)->when = tcp_time_stamp;
- tp->packets_out++;
- tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC));
- return 0;
-}
-
-/*
- * Prepare a SYN-ACK.
- */
-struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
- struct open_request *req, int mss)
-{
- struct tcphdr *th;
- int tcp_header_size;
- struct sk_buff *skb;
-
- skb = sock_wmalloc(sk, MAX_HEADER + sk->prot->max_header, 1, GFP_ATOMIC);
- if (skb == NULL)
- return NULL;
-
- /* Reserve space for headers. */
- skb_reserve(skb, MAX_HEADER + sk->prot->max_header);
-
- skb->dst = dst_clone(dst);
-
- /* Don't offer more than they did.
- * This way we don't have to memorize who said what.
- * FIXME: maybe this should be changed for better performance
- * with syncookies.
- */
- req->mss = min(mss, req->mss);
- if (req->mss < 8) {
- printk(KERN_DEBUG "initial req->mss below 8\n");
- req->mss = 8;
- }
-
- tcp_header_size = (sizeof(struct tcphdr) + TCPOLEN_MSS +
- (req->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0) +
- (req->wscale_ok ? TCPOLEN_WSCALE_ALIGNED : 0) +
- /* SACK_PERM is in the place of NOP NOP of TS */
- ((req->sack_ok && !req->tstamp_ok) ? TCPOLEN_SACKPERM_ALIGNED : 0));
- skb->h.th = th = (struct tcphdr *) skb_push(skb, tcp_header_size);
-
- memset(th, 0, sizeof(struct tcphdr));
- th->syn = 1;
- th->ack = 1;
- th->source = sk->sport;
- th->dest = req->rmt_port;
- TCP_SKB_CB(skb)->seq = req->snt_isn;
- TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;
- th->seq = htonl(TCP_SKB_CB(skb)->seq);
- th->ack_seq = htonl(req->rcv_isn + 1);
- if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
- __u8 rcv_wscale;
- /* Set this up on the first call only */
- req->window_clamp = skb->dst->window;
- tcp_select_initial_window(sock_rspace(sk)/2,req->mss,
- &req->rcv_wnd,
- &req->window_clamp,
- req->wscale_ok,
- &rcv_wscale);
- req->rcv_wscale = rcv_wscale;
- }
-
- /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
- th->window = htons(req->rcv_wnd);
-
- TCP_SKB_CB(skb)->when = tcp_time_stamp;
- tcp_syn_build_options((__u32 *)(th + 1), req->mss, req->tstamp_ok,
- req->sack_ok, req->wscale_ok, req->rcv_wscale,
- TCP_SKB_CB(skb)->when,
- req->ts_recent);
-
- skb->csum = 0;
- th->doff = (tcp_header_size >> 2);
- tcp_statistics.TcpOutSegs++;
- return skb;
-}
-
-void tcp_connect(struct sock *sk, struct sk_buff *buff, int mtu)
-{
- struct dst_entry *dst = sk->dst_cache;
- struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
-
- /* Reserve space for headers. */
- skb_reserve(buff, MAX_HEADER + sk->prot->max_header);
-
- tp->snd_wnd = 0;
- tp->snd_wl1 = 0;
- tp->snd_wl2 = tp->write_seq;
- tp->snd_una = tp->write_seq;
- tp->rcv_nxt = 0;
-
- sk->err = 0;
-
- /* We'll fix this up when we get a response from the other end.
- * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT.
- */
- tp->tcp_header_len = sizeof(struct tcphdr) +
- (sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0);
-
- /* If user gave his TCP_MAXSEG, record it to clamp */
- if (tp->user_mss)
- tp->mss_clamp = tp->user_mss;
- tcp_sync_mss(sk, mtu);
-
- /* Now unpleasant action: if initial pmtu is too low
- set lower clamp. I am not sure that it is good.
- To be more exact, I do not think that clamping at value, which
- is apparently transient and may improve in future is good idea.
- It would be better to wait until peer will returns its MSS
- (probably 65535 too) and now advertise something sort of 65535
- or at least first hop device mtu. Is it clear, what I mean?
- We should tell peer what maximal mss we expect to RECEIVE,
- it has nothing to do with pmtu.
- I am afraid someone will be confused by such huge value.
- --ANK (980731)
- */
- if (tp->mss_cache + tp->tcp_header_len - sizeof(struct tcphdr) < tp->mss_clamp )
- tp->mss_clamp = tp->mss_cache + tp->tcp_header_len - sizeof(struct tcphdr);
-
- TCP_SKB_CB(buff)->flags = TCPCB_FLAG_SYN;
- TCP_SKB_CB(buff)->sacked = 0;
- TCP_SKB_CB(buff)->urg_ptr = 0;
- buff->csum = 0;
- TCP_SKB_CB(buff)->seq = tp->write_seq++;
- TCP_SKB_CB(buff)->end_seq = tp->write_seq;
- tp->snd_nxt = TCP_SKB_CB(buff)->end_seq;
-
- tp->window_clamp = dst->window;
- tcp_select_initial_window(sock_rspace(sk)/2,tp->mss_clamp,
- &tp->rcv_wnd,
- &tp->window_clamp,
- sysctl_tcp_window_scaling,
- &tp->rcv_wscale);
- /* Ok, now lock the socket before we make it visible to
- * the incoming packet engine.
- */
- lock_sock(sk);
-
- /* Socket identity change complete, no longer
- * in TCP_CLOSE, so enter ourselves into the
- * hash tables.
- */
- tcp_set_state(sk,TCP_SYN_SENT);
- sk->prot->hash(sk);
-
- tp->rto = dst->rtt;
- tcp_init_xmit_timers(sk);
- tp->retransmits = 0;
- tp->fackets_out = 0;
- tp->retrans_out = 0;
-
- /* Send it off. */
- __skb_queue_tail(&sk->write_queue, buff);
- TCP_SKB_CB(buff)->when = tcp_time_stamp;
- tp->packets_out++;
- tcp_transmit_skb(sk, skb_clone(buff, GFP_KERNEL));
- tcp_statistics.TcpActiveOpens++;
-
- /* Timer for repeating the SYN until an answer. */
- tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
-
- /* Now, it is safe to release the socket. */
- release_sock(sk);
-}
-
-/* Send out a delayed ack, the caller does the policy checking
- * to see if we should even be here. See tcp_input.c:tcp_ack_snd_check()
- * for details.
- */
-void tcp_send_delayed_ack(struct tcp_opt *tp, int max_timeout)
-{
- unsigned long timeout;
-
- /* Stay within the limit we were given */
- timeout = tp->ato;
- if (timeout > max_timeout)
- timeout = max_timeout;
- timeout += jiffies;
-
- /* Use new timeout only if there wasn't a older one earlier. */
- if (!tp->delack_timer.prev) {
- tp->delack_timer.expires = timeout;
- add_timer(&tp->delack_timer);
- } else {
- if (time_before(timeout, tp->delack_timer.expires))
- mod_timer(&tp->delack_timer, timeout);
- }
-}
-
-/* This routine sends an ack and also updates the window. */
-void tcp_send_ack(struct sock *sk)
-{
- char *str1 = "pfinet tcp_send_ack check point 1\n";
- char *str2 = "pfinet tcp_send_ack check point 2\n";
- int stderr_fd = fileno (stderr);
- /* If we have been reset, we may not send again. */
- if(!sk->zapped) {
- struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
- struct sk_buff *buff;
-
- /* We are not putting this on the write queue, so
- * tcp_transmit_skb() will set the ownership to this
- * sock.
- */
- buff = alloc_skb(MAX_HEADER + sk->prot->max_header, GFP_ATOMIC);
- if (buff == NULL) {
- /* Force it to send an ack. We don't have to do this
- * (ACK is unreliable) but it's much better use of
- * bandwidth on slow links to send a spare ack than
- * resend packets.
- *
- * This is the one possible way that we can delay an
- * ACK and have tp->ato indicate that we are in
- * quick ack mode, so clear it.
- */
- if(tcp_in_quickack_mode(tp))
- tcp_exit_quickack_mode(tp);
- tcp_send_delayed_ack(tp, HZ/2);
- return;
- }
-
- /* Reserve space for headers and prepare control bits. */
- skb_reserve(buff, MAX_HEADER + sk->prot->max_header);
- buff->csum = 0;
- TCP_SKB_CB(buff)->flags = TCPCB_FLAG_ACK;
- TCP_SKB_CB(buff)->sacked = 0;
- TCP_SKB_CB(buff)->urg_ptr = 0;
-
- /* Send it off, this clears delayed acks for us. */
- TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tp->snd_nxt;
- TCP_SKB_CB(buff)->when = tcp_time_stamp;
- write (stderr_fd, str1, strlen (str1) + 1);
- fflush (stderr);
- tcp_transmit_skb(sk, buff);
- write (stderr_fd, str2, strlen (str2) + 1);
- fflush (stderr);
- }
-}
-
-/* This routine sends a packet with an out of date sequence
- * number. It assumes the other end will try to ack it.
- */
-void tcp_write_wakeup(struct sock *sk)
-{
- /* After a valid reset we can send no more. */
- if (!sk->zapped) {
- struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
- struct sk_buff *skb;
-
- /* Write data can still be transmitted/retransmitted in the
- * following states. If any other state is encountered, return.
- * [listen/close will never occur here anyway]
- */
- if ((1 << sk->state) &
- ~(TCPF_ESTABLISHED|TCPF_CLOSE_WAIT|TCPF_FIN_WAIT1|
- TCPF_LAST_ACK|TCPF_CLOSING))
- return;
-
- if (before(tp->snd_nxt, tp->snd_una + tp->snd_wnd) &&
- ((skb = tp->send_head) != NULL)) {
- unsigned long win_size;
-
- /* We are probing the opening of a window
- * but the window size is != 0
- * must have been a result SWS avoidance ( sender )
- */
- win_size = tp->snd_wnd - (tp->snd_nxt - tp->snd_una);
- if (win_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq) {
- if (tcp_fragment(sk, skb, win_size))
- return; /* Let a retransmit get it. */
- }
- update_send_head(sk);
- TCP_SKB_CB(skb)->when = tcp_time_stamp;
- tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
- tp->packets_out++;
- tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC));
- if (!tcp_timer_is_set(sk, TIME_RETRANS))
- tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
- } else {
- /* We don't queue it, tcp_transmit_skb() sets ownership. */
- skb = alloc_skb(MAX_HEADER + sk->prot->max_header,
- GFP_ATOMIC);
- if (skb == NULL)
- return;
-
- /* Reserve space for headers and set control bits. */
- skb_reserve(skb, MAX_HEADER + sk->prot->max_header);
- skb->csum = 0;
- TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK;
- TCP_SKB_CB(skb)->sacked = 0;
- TCP_SKB_CB(skb)->urg_ptr = 0;
-
- /* Use a previous sequence. This should cause the other
- * end to send an ack. Don't queue or clone SKB, just
- * send it.
- */
- TCP_SKB_CB(skb)->seq = tp->snd_nxt - 1;
- TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
- TCP_SKB_CB(skb)->when = tcp_time_stamp;
- tcp_transmit_skb(sk, skb);
- }
- }
-}
-
-/* A window probe timeout has occurred. If window is not closed send
- * a partial packet else a zero probe.
- */
-void tcp_send_probe0(struct sock *sk)
-{
- struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
-
- tcp_write_wakeup(sk);
- tp->pending = TIME_PROBE0;
- tp->backoff++;
- tp->probes_out++;
- tcp_reset_xmit_timer (sk, TIME_PROBE0,
- min(tp->rto << tp->backoff, 120*HZ));
-}
diff --git a/pfinet.old/pfinet.patch~ b/pfinet.old/pfinet.patch~
deleted file mode 100644
index c5882899..00000000
--- a/pfinet.old/pfinet.patch~
+++ /dev/null
@@ -1,31 +0,0 @@
-Index: ethernet.c
-===================================================================
-RCS file: /sources/hurd/hurd/pfinet/ethernet.c,v
-retrieving revision 1.32
-diff -u -r1.32 ethernet.c
---- ethernet.c 9 Oct 2007 08:01:34 -0000 1.32
-+++ ethernet.c 29 Aug 2008 23:46:13 -0000
-@@ -68,15 +68,15 @@
- {
- }
-
--static short ether_filter[] =
-+/* The BPF instruction allows IP and ARP packets */
-+static struct bpf_insn ether_filter[] =
- {
--#ifdef NETF_IN
-- /* We have to tell the packet filtering code that we're interested in
-- incoming packets. */
-- NETF_IN, /* Header. */
--#endif
-- NETF_PUSHLIT | NETF_NOP,
-- 1
-+ {NETF_IN|NETF_BPF, /* Header. */ 0, 0, 0},
-+ {40, 0, 0, 12},
-+ {21, 1, 0, 2054},
-+ {21, 0, 1, 2048},
-+ {6, 0, 0, 1500},
-+ {6, 0, 0, 0}
- };
- static int ether_filter_len = sizeof (ether_filter) / sizeof (short);
-
diff --git a/pfinet.old/sched.c~ b/pfinet.old/sched.c~
deleted file mode 100644
index 4e67df6e..00000000
--- a/pfinet.old/sched.c~
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- Copyright (C) 1995,96,2000,02 Free Software Foundation, Inc.
-
- This file is part of the GNU Hurd.
-
- The GNU Hurd is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2, or (at
- your option) any later version.
-
- The GNU Hurd is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. */
-
-#include "pfinet.h"
-
-#include <asm/system.h>
-#include <linux/sched.h>
-#include <linux/interrupt.h>
-
-struct mutex global_lock = MUTEX_INITIALIZER;
-struct mutex net_bh_lock = MUTEX_INITIALIZER;
-struct condition net_bh_wakeup = CONDITION_INITIALIZER;
-
-struct task_struct current_contents; /* zeros are right default values */
-
-
-/* Wake up the owner of the SOCK. If HOW is zero, then just
- send SIGIO. If HOW is one, then send SIGIO only if the
- SO_WAITDATA flag is off. If HOW is two, then send SIGIO
- only if the SO_NOSPACE flag is on, and also clear it. */
-int
-sock_wake_async (struct socket *sock, int how)
-{
- /* For now, do nothing. XXX */
- return 0;
-}
-
-
-/* This function is the "net_bh worker thread".
- The packet receiver thread calls net/core/dev.c::netif_rx with a packet;
- netif_rx either drops the packet, or enqueues it and wakes us up
- via mark_bh which is really condition_broadcast on net_bh_wakeup.
- The packet receiver thread holds net_bh_lock while calling netif_rx.
- We wake up and take global_lock, which locks out RPC service threads.
- We then also take net_bh_lock running net_bh.
- Thus, only this thread running net_bh locks out the packet receiver
- thread (which takes only net_bh_lock while calling netif_rx), so packets
- are quickly moved from the Mach port's message queue to the `backlog'
- queue, or dropped, without synchronizing with RPC service threads.
- (The RPC service threads lock out the running of net_bh, but not
- the queuing/dropping of packets in netif_rx.) */
-any_t
-net_bh_worker (any_t arg)
-{
- char *str1 = "pfinet net_bh_worker before locking net_bh_lock";
- char *str2 = "pfinet net_bh_worker after locking net_bh_lock";
- char *str3 = "pfinet net_bh_worker after unlocking net_bh_lock";
- int stderr_fd = fileno (stderr);
- __mutex_lock (&global_lock);
- while (1)
- {
- condition_wait (&net_bh_wakeup, &global_lock);
- write (stderr_fd, str1, strlen (str1) + 1);
- fflush (stderr);
- __mutex_lock (&net_bh_lock);
- write (stderr_fd, str2, strlen (str2) + 1);
- fflush (stderr);
- net_bh ();
- __mutex_unlock (&net_bh_lock);
- write (stderr_fd, str3, strlen (str3) + 1);
- fflush (stderr);
- }
- /*NOTREACHED*/
- return 0;
-}
diff --git a/pfinet.old/socket-ops.c~ b/pfinet.old/socket-ops.c~
deleted file mode 100644
index 726219b9..00000000
--- a/pfinet.old/socket-ops.c~
+++ /dev/null
@@ -1,546 +0,0 @@
-/* Interface functions for the socket.defs interface.
- Copyright (C) 1995,96,97,99,2000,02,07 Free Software Foundation, Inc.
- Written by Michael I. Bushnell, p/BSG.
-
- This file is part of the GNU Hurd.
-
- The GNU Hurd is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2, or (at
- your option) any later version.
-
- The GNU Hurd is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. */
-
-#include <sys/stat.h>
-#include <hurd/trivfs.h>
-#include <string.h>
-#include <stddef.h>
-#include <fcntl.h>
-#include <hurd/fshelp.h>
-
-#include "pfinet.h"
-#include "socket_S.h"
-
-#include <linux/sched.h>
-#include <linux/socket.h>
-#include <linux/net.h>
-#include <net/sock.h>
-
-
-error_t
-S_socket_create (struct trivfs_protid *master,
- int sock_type,
- int protocol,
- mach_port_t *port,
- mach_msg_type_name_t *porttype)
-{
- struct sock_user *user;
- struct socket *sock;
- error_t err;
- int isroot;
-
- if (!master)
- return EOPNOTSUPP;
-
- /* Don't allow bogus SOCK_PACKET here. */
-
- if ((sock_type != SOCK_STREAM
- && sock_type != SOCK_DGRAM
- && sock_type != SOCK_SEQPACKET
- && sock_type != SOCK_RAW)
- || protocol < 0)
- return EINVAL;
-
- __mutex_lock (&global_lock);
-
- become_task_protid (master);
-
- sock = sock_alloc ();
-
- sock->type = sock_type;
-
- isroot = master->isroot;
- if (!isroot)
- {
- struct stat st;
-
- /* XXX */
- st.st_uid = pfinet_owner;
- st.st_gid = pfinet_group;
-
- err = fshelp_isowner (&st, master->user);
- if (! err)
- isroot = 1;
- }
-
- if (master->pi.class == trivfs_protid_portclasses[PORTCLASS_INET])
- err = - (*net_families[PF_INET]->create) (sock, protocol);
- else
- err = - (*net_families[PF_INET6]->create) (sock, protocol);
-
- if (err)
- sock_release (sock);
- else
- {
- user = make_sock_user (sock, isroot, 0, 1);
- *port = ports_get_right (user);
- *porttype = MACH_MSG_TYPE_MAKE_SEND;
- ports_port_deref (user);
- }
-
- __mutex_unlock (&global_lock);
-
- return err;
-}
-
-
-/* Listen on a socket. */
-error_t
-S_socket_listen (struct sock_user *user, int queue_limit)
-{
- error_t err;
-
- if (!user)
- return EOPNOTSUPP;
-
- __mutex_lock (&global_lock);
- become_task (user);
- err = - (*user->sock->ops->listen) (user->sock, queue_limit);
- __mutex_unlock (&global_lock);
-
- return err;
-}
-
-error_t
-S_socket_accept (struct sock_user *user,
- mach_port_t *new_port,
- mach_msg_type_name_t *new_port_type,
- mach_port_t *addr_port,
- mach_msg_type_name_t *addr_port_type)
-{
- struct sock_user *newuser;
- struct socket *sock, *newsock;
- error_t err;
-
- if (!user)
- return EOPNOTSUPP;
-
- sock = user->sock;
-
- __mutex_lock (&global_lock);
-
- become_task (user);
-
- newsock = sock_alloc ();
- if (!newsock)
- err = ENOMEM;
- else
- {
- newsock->type = sock->type;
-
- err = - (*sock->ops->dup) (newsock, sock);
- if (!err)
- err = - (*sock->ops->accept) (sock, newsock, sock->flags);
-
- if (!err)
- /* In Linux there is a race here with the socket closing before the
- ops->getname call we do in make_sockaddr_port. Since we still
- have the world locked, this shouldn't be an issue for us. */
- err = make_sockaddr_port (newsock, 1, addr_port, addr_port_type);
-
- if (!err)
- {
- newuser = make_sock_user (newsock, user->isroot, 0, 1);
- *new_port = ports_get_right (newuser);
- *new_port_type = MACH_MSG_TYPE_MAKE_SEND;
- ports_port_deref (newuser);
- }
-
- if (err)
- sock_release (newsock);
- }
-
- __mutex_unlock (&global_lock);
-
- return err;
-}
-
-error_t
-S_socket_connect (struct sock_user *user,
- struct sock_addr *addr)
-{
- struct socket *sock;
- error_t err;
-
- if (!user || !addr)
- return EOPNOTSUPP;
-
- sock = user->sock;
-
- __mutex_lock (&global_lock);
-
- become_task (user);
-
- err = - (*sock->ops->connect) (sock, &addr->address, addr->address.sa_len,
- sock->flags);
-
- __mutex_unlock (&global_lock);
-
- /* MiG should do this for us, but it doesn't. */
- if (!err)
- mach_port_deallocate (mach_task_self (), addr->pi.port_right);
-
- return err;
-}
-
-error_t
-S_socket_bind (struct sock_user *user,
- struct sock_addr *addr)
-{
- error_t err;
-
- if (!user)
- return EOPNOTSUPP;
- if (! addr)
- return EADDRNOTAVAIL;
-
- __mutex_lock (&global_lock);
- become_task (user);
- err = - (*user->sock->ops->bind) (user->sock,
- &addr->address, addr->address.sa_len);
- __mutex_unlock (&global_lock);
-
- /* MiG should do this for us, but it doesn't. */
- if (!err)
- mach_port_deallocate (mach_task_self (), addr->pi.port_right);
-
- return err;
-}
-
-error_t
-S_socket_name (struct sock_user *user,
- mach_port_t *addr_port,
- mach_msg_type_name_t *addr_port_name)
-{
- if (!user)
- return EOPNOTSUPP;
-
- __mutex_lock (&global_lock);
- become_task (user);
- make_sockaddr_port (user->sock, 0, addr_port, addr_port_name);
- __mutex_unlock (&global_lock);
- return 0;
-}
-
-error_t
-S_socket_peername (struct sock_user *user,
- mach_port_t *addr_port,
- mach_msg_type_name_t *addr_port_name)
-{
- error_t err;
-
- if (!user)
- return EOPNOTSUPP;
-
- __mutex_lock (&global_lock);
- become_task (user);
- err = make_sockaddr_port (user->sock, 1, addr_port, addr_port_name);
- __mutex_unlock (&global_lock);
-
- return err;
-}
-
-error_t
-S_socket_connect2 (struct sock_user *user1,
- struct sock_user *user2)
-{
- error_t err;
-
- if (!user1 || !user2)
- return EOPNOTSUPP;
-
- __mutex_lock (&global_lock);
-
- become_task (user1);
-
- if (user1->sock->type != user2->sock->type)
- err = EINVAL;
- else if (user1->sock->state != SS_UNCONNECTED
- && user2->sock->state != SS_UNCONNECTED)
- err = EISCONN;
- else
- err = - (*user1->sock->ops->socketpair) (user1->sock, user2->sock);
-
- __mutex_unlock (&global_lock);
-
- /* MiG should do this for us, but it doesn't. */
- if (!err)
- mach_port_deallocate (mach_task_self (), user2->pi.port_right);
-
- return err;
-}
-
-error_t
-S_socket_create_address (mach_port_t server,
- int sockaddr_type,
- char *data,
- mach_msg_type_number_t data_len,
- mach_port_t *addr_port,
- mach_msg_type_name_t *addr_port_type)
-{
- error_t err;
- struct sock_addr *addrstruct;
- const struct sockaddr *const sa = (void *) data;
-
- if (sockaddr_type != AF_INET && sockaddr_type != AF_INET6)
- return EAFNOSUPPORT;
- if (sa->sa_family != sockaddr_type
- || data_len < offsetof (struct sockaddr, sa_data))
- return EINVAL;
-
- err = ports_create_port (addrport_class, pfinet_bucket,
- (offsetof (struct sock_addr, address)
- + data_len), &addrstruct);
- if (err)
- return err;
-
- memcpy (&addrstruct->address, data, data_len);
-
- /* BSD does not require incoming sa_len to be set, so we don't either. */
- addrstruct->address.sa_len = data_len;
-
- *addr_port = ports_get_right (addrstruct);
- *addr_port_type = MACH_MSG_TYPE_MAKE_SEND;
- ports_port_deref (addrstruct);
- return 0;
-}
-
-error_t
-S_socket_fabricate_address (mach_port_t server,
- int sockaddr_type,
- mach_port_t *addr_port,
- mach_msg_type_name_t *addr_port_type)
-{
- return EOPNOTSUPP;
-}
-
-error_t
-S_socket_whatis_address (struct sock_addr *addr,
- int *type,
- char **data,
- mach_msg_type_number_t *datalen)
-{
- if (!addr)
- return EOPNOTSUPP;
-
- *type = addr->address.sa_family;
- if (*datalen < addr->address.sa_len)
- *data = mmap (0, addr->address.sa_len,
- PROT_READ|PROT_WRITE, MAP_ANON, 0, 0);
- *datalen = addr->address.sa_len;
- memcpy (*data, &addr->address, addr->address.sa_len);
-
- return 0;
-}
-
-error_t
-S_socket_shutdown (struct sock_user *user,
- int direction)
-{
- error_t err;
-
- if (!user)
- return EOPNOTSUPP;
-
- __mutex_lock (&global_lock);
- become_task (user);
- err = - (*user->sock->ops->shutdown) (user->sock, direction);
- __mutex_unlock (&global_lock);
-
- return err;
-}
-
-error_t
-S_socket_getopt (struct sock_user *user,
- int level,
- int option,
- char **data,
- size_t *datalen)
-{
- error_t err;
-
- if (! user)
- return EOPNOTSUPP;
-
- __mutex_lock (&global_lock);
- become_task (user);
-
- int len = *datalen;
- err = - (level == SOL_SOCKET ? sock_getsockopt
- : *user->sock->ops->getsockopt)
- (user->sock, level, option, *data, &len);
- *datalen = len;
-
- __mutex_unlock (&global_lock);
-
- /* XXX option data not properly typed, needs byte-swapping for netmsgserver.
- Most options are ints, some like IP_OPTIONS are bytesex-neutral. */
-
- return err;
-}
-
-error_t
-S_socket_setopt (struct sock_user *user,
- int level,
- int option,
- char *data,
- size_t datalen)
-{
- error_t err;
-
- if (! user)
- return EOPNOTSUPP;
-
- /* XXX option data not properly typed, needs byte-swapping for netmsgserver.
- Most options are ints, some like IP_OPTIONS are bytesex-neutral. */
-
- __mutex_lock (&global_lock);
- become_task (user);
-
- err = - (level == SOL_SOCKET ? sock_setsockopt
- : *user->sock->ops->setsockopt)
- (user->sock, level, option, data, datalen);
-
- __mutex_unlock (&global_lock);
-
- return err;
-}
-
-error_t
-S_socket_send (struct sock_user *user,
- struct sock_addr *addr,
- int flags,
- char *data,
- size_t datalen,
- mach_port_t *ports,
- size_t nports,
- char *control,
- size_t controllen,
- mach_msg_type_number_t *amount)
-{
- int sent;
- struct iovec iov = { data, datalen };
- struct msghdr m = { msg_name: addr ? &addr->address : 0,
- msg_namelen: addr ? addr->address.sa_len : 0,
- msg_flags: flags,
- msg_controllen: 0, msg_iov: &iov, msg_iovlen: 1 };
- char *str1 = "pfinet socket_send check point 1.\n";
- int stderr_fd = fileno (stderr);
-
- if (!user)
- return EOPNOTSUPP;
-
- /* Don't do this yet, it's too bizarre to think about right now. */
- if (nports != 0 || controllen != 0)
- return EINVAL;
-
- fprintf (stderr, "pfinet socket_send before locking global_lock.\n");
- fflush (stderr);
-
- __mutex_lock (&global_lock);
- write (stderr_fd, str1, strlen (str1) + 1);
- fflush (stderr);
- become_task (user);
- if (user->sock->flags & O_NONBLOCK)
- m.msg_flags |= MSG_DONTWAIT;
- sent = (*user->sock->ops->sendmsg) (user->sock, &m, datalen, 0);
- __mutex_unlock (&global_lock);
-
- fprintf (stderr, "pfinet socket_send after unlocking global_lock.\n");
- fflush (stderr);
-
- /* MiG should do this for us, but it doesn't. */
- if (addr && sent >= 0)
- mach_port_deallocate (mach_task_self (), addr->pi.port_right);
-
- if (sent >= 0)
- {
- *amount = sent;
- return 0;
- }
- else
- return (error_t)-sent;
-}
-
-error_t
-S_socket_recv (struct sock_user *user,
- mach_port_t *addrport,
- mach_msg_type_name_t *addrporttype,
- int flags,
- char **data,
- size_t *datalen,
- mach_port_t **ports,
- mach_msg_type_name_t *portstype,
- size_t *nports,
- char **control,
- size_t *controllen,
- int *outflags,
- mach_msg_type_number_t amount)
-{
- error_t err;
- union { struct sockaddr_storage storage; struct sockaddr sa; } addr;
- int alloced = 0;
- struct iovec iov;
- struct msghdr m = { msg_name: &addr.sa, msg_namelen: sizeof addr,
- msg_controllen: 0, msg_iov: &iov, msg_iovlen: 1 };
-
- if (!user)
- return EOPNOTSUPP;
-
- /* Instead of this, we should peek and the socket and only
- allocate as much as necessary. */
- if (amount > *datalen)
- {
- *data = mmap (0, amount, PROT_READ|PROT_WRITE, MAP_ANON, 0, 0);
- alloced = 1;
- }
-
- iov.iov_base = *data;
- iov.iov_len = amount;
-
- __mutex_lock (&global_lock);
- become_task (user);
- if (user->sock->flags & O_NONBLOCK)
- flags |= MSG_DONTWAIT;
- err = (*user->sock->ops->recvmsg) (user->sock, &m, amount, flags, 0);
- __mutex_unlock (&global_lock);
-
- if (err < 0)
- err = -err;
- else
- {
- *datalen = err;
- if (alloced && round_page (*datalen) < round_page (amount))
- munmap (*data + round_page (*datalen),
- round_page (amount) - round_page (*datalen));
- err = S_socket_create_address (0, addr.sa.sa_family,
- (void *) &addr.sa, m.msg_namelen,
- addrport, addrporttype);
- if (err && alloced)
- munmap (*data, *datalen);
-
- *outflags = m.msg_flags;
- *nports = 0;
- *portstype = MACH_MSG_TYPE_COPY_SEND;
- *controllen = 0;
- }
-
- return err;
-}
diff --git a/pfinet.old/tmp.patch~ b/pfinet.old/tmp.patch~
deleted file mode 100644
index e293857a..00000000
--- a/pfinet.old/tmp.patch~
+++ /dev/null
@@ -1,146 +0,0 @@
-? addrconf.d
-? af_inet.d
-? af_inet6.d
-? arp.d
-? checksum.d
-? datagram.d
-? datagram_ipv6.d
-? dev.d
-? dev_mcast.d
-? devinet.d
-? dst.d
-? dummy.d
-? eth.d
-? ethernet.d
-? exthdrs.d
-? fib_frontend.d
-? fib_hash.d
-? fib_semantics.d
-? icmp.d
-? icmpv6.d
-? igmp.d
-? iioctl-ops.d
-? iioctl.sdefs.d
-? iioctlServer.c
-? iioctlServer.d
-? iioctl_S.h
-? io-ops.d
-? io.sdefs.d
-? ioServer.c
-? ioServer.d
-? io_S.h
-? iovec.d
-? ip6_fib.d
-? ip6_flowlabel.d
-? ip6_input.d
-? ip6_output.d
-? ip_forward.d
-? ip_fragment.d
-? ip_input.d
-? ip_options.d
-? ip_output.d
-? ip_sockglue.d
-? ipv6_sockglue.d
-? kmem_cache.d
-? loopback.d
-? main.d
-? mcast.d
-? misc.d
-? ndisc.d
-? neighbour.d
-? old-checksum.d
-? options.d
-? pfinet
-? pfinet-ops.d
-? pfinet.prof_d
-? pfinet.sdefs.d
-? pfinetServer.c
-? pfinetServer.d
-? pfinet_S.h
-? protocol.d
-? protocol_ipv6.d
-? raw.d
-? raw_ipv6.d
-? reassembly.d
-? route.d
-? route_ipv6.d
-? sched.d
-? skbuff.d
-? sock.d
-? socket-ops.d
-? socket.d
-? socket.sdefs.d
-? socketServer.c
-? socketServer.d
-? socket_S.h
-? startup_notify.sdefs.d
-? startup_notifyServer.c
-? startup_notifyServer.d
-? startup_notify_S.h
-? stubs.d
-? syncookies.d
-? sysctl_net_ipv4.d
-? tcp.d
-? tcp_input.d
-? tcp_ipv4.d
-? tcp_ipv6.d
-? tcp_output.d
-? tcp_timer.d
-? time.d
-? timer-emul.d
-? timer.d
-? tmp.patch
-? tunnel.d
-? udp.d
-? udp_ipv6.d
-? utils.d
-? asm/checksum.h
-Index: ethernet.c
-===================================================================
-RCS file: /sources/hurd/hurd/pfinet/ethernet.c,v
-retrieving revision 1.32
-diff -r1.32 ethernet.c
-28a29,30
-> #define _HACK_ERRNO_H
-> #include <errno.h>
-33a36
-> #include <device/bpf.h>
-71c74,75
-< static short ether_filter[] =
----
-> /* The BPF instruction allows IP and ARP packets */
-> static struct bpf_insn ether_filter[] =
-73,79c77,82
-< #ifdef NETF_IN
-< /* We have to tell the packet filtering code that we're interested in
-< incoming packets. */
-< NETF_IN, /* Header. */
-< #endif
-< NETF_PUSHLIT | NETF_NOP,
-< 1
----
-> {NETF_IN|NETF_BPF, /* Header. */ 0, 0, 0},
-> {40, 0, 0, 12},
-> {21, 1, 0, 2054},
-> {21, 0, 1, 2048},
-> {6, 0, 0, 1500},
-> {6, 0, 0, 0}
-98a102
-> static int count = 0;
-118a123,125
-> // fprintf (stderr, "pfinet receives the %dst packet.\n", ++count);
-> // fflush (stderr);
->
-169,171c176,179
-< err = get_privileged_ports (0, &master_device);
-< if (err)
-< error (2, err, "cannot get device master port");
----
-> /* The device name here is the path of a device file. */
-> master_device = file_name_lookup (dev->name, 0, 0);
-> if (master_device == MACH_PORT_NULL)
-> error (2, errno, "file_name_lookup %s", dev->name);
-173c181
-< err = device_open (master_device, D_WRITE | D_READ, dev->name, &edev->ether_port);
----
-> err = device_open (master_device, D_WRITE | D_READ, "eth", &edev->ether_port);
diff --git a/pfinet.old/tunnel.c~ b/pfinet.old/tunnel.c~
deleted file mode 100644
index c4f95804..00000000
--- a/pfinet.old/tunnel.c~
+++ /dev/null
@@ -1,636 +0,0 @@
-/*
- Copyright (C) 1995,96,98,99,2000,02 Free Software Foundation, Inc.
- Written by Michael I. Bushnell, p/BSG.
-
- This file is part of the GNU Hurd.
-
- The GNU Hurd is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2, or (at
- your option) any later version.
-
- The GNU Hurd is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. */
-
-#include "pfinet.h"
-
-#include <hurd.h>
-#include <cthreads.h>
-#include <fcntl.h>
-#include <device/device.h>
-#include <device/net_status.h>
-#include <netinet/in.h>
-#include <string.h>
-#include <error.h>
-#include <errno.h>
-
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/if_arp.h>
-#include <linux/ppp_defs.h>
-#include <linux/if_ppp.h>
-
-struct port_class *tunnel_cntlclass;
-struct port_class *tunnel_class;
-
-struct tunnel_device
-{
- struct tunnel_device *next;
- struct trivfs_control *cntl;
- char *devname;
- file_t underlying;
- struct iouser *user;
- struct sk_buff_head xq; /* Transmit queue. */
- struct condition wait; /* For read and select. */
- struct condition select_alert; /* For read and select. */
- struct mutex lock; /* For read and select. */
- int read_blocked; /* For read and select. */
- struct device dev;
- struct net_device_stats stats;
-};
-
-
-/* Linked list of all tunnel devices. */
-struct tunnel_device *tunnel_dev;
-
-
-struct net_device_stats *
-tunnel_get_stats (struct device *dev)
-{
- struct tunnel_device *tdev = (struct tunnel_device *) dev->priv;
-
- assert (tdev);
-
- return &tdev->stats;
-}
-
-int
-tunnel_stop (struct device *dev)
-{
- struct tunnel_device *tdev = (struct tunnel_device *) dev->priv;
- struct sk_buff *skb;
-
- assert (tdev);
-
- while ((skb = skb_dequeue (&tdev->xq)) != 0)
- dev_kfree_skb(skb);
-
- /* Call those only if removing the device completely. */
- /* free (tdev->devname); */
- /* XXX??? mach_port_deallocate (mach_task_self, tdev->underlying) */
- return 0;
-}
-
-void
-tunnel_set_multi (struct device *dev)
-{
-}
-
-void
-tunnel_initialize (void)
-{
-}
-
-int
-tunnel_open (struct device *dev)
-{
- struct tunnel_device *tdev = (struct tunnel_device *) dev->priv;
-
- assert (tdev);
-
- skb_queue_head_init(&tdev->xq);
-
- return 0;
-}
-
-/* Transmit an ethernet frame */
-int
-tunnel_xmit (struct sk_buff *skb, struct device *dev)
-{
- struct tunnel_device *tdev = (struct tunnel_device *) dev->priv;
-
- assert (tdev);
-
- __mutex_lock (&tdev->lock);
-
- /* Avoid unlimited growth. */
- if (skb_queue_len(&tdev->xq) > 128)
- {
- struct sk_buff *skb;
-
- skb = skb_dequeue(&tdev->xq);
- dev_kfree_skb(skb);
- }
-
- /* Queue it for processing. */
- skb_queue_tail(&tdev->xq, skb);
-
- if (tdev->read_blocked)
- {
- tdev->read_blocked = 0;
- condition_broadcast (&tdev->wait);
- }
-
- __mutex_unlock (&tdev->lock);
-
- return 0;
-}
-
-void
-setup_tunnel_device (char *name, struct device **device)
-{
- error_t err;
- struct tunnel_device *tdev;
- struct device *dev;
-
- /* Do global initialization before setting up first tunnel device. */
- if (!tunnel_dev)
- {
- trivfs_add_control_port_class (&tunnel_cntlclass);
- trivfs_add_protid_port_class (&tunnel_class);
- }
-
- tdev = calloc (1, sizeof (struct tunnel_device));
- if (!tdev)
- error (2, ENOMEM, "%s", name);
- tdev->next = tunnel_dev;
- tunnel_dev = tdev;
-
- *device = dev = &tdev->dev;
-
- dev->name = strdup (name);
-
- dev->priv = tdev;
- dev->get_stats = tunnel_get_stats;
-
- /* Functions. These ones are the true "hardware layer" in Linux. */
- dev->open = tunnel_open;
- dev->stop = tunnel_stop;
- dev->hard_start_xmit = tunnel_xmit;
- dev->set_multicast_list = tunnel_set_multi;
-
- /* These are the ones set by drivers/net/ppp_generic.c::ppp_net_init. */
- dev->hard_header = 0;
- dev->hard_header_len = 0;
- dev->mtu = PPP_MTU;
- dev->addr_len = 0;
- dev->tx_queue_len = 3;
- dev->type = ARPHRD_PPP;
- dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
-
- dev_init_buffers (dev);
-
- /* Setting up the translator at /dev/tunX. */
- asprintf (&tdev->devname, "/dev/%s", tdev->dev.name);
- tdev->underlying = file_name_lookup (tdev->devname, O_CREAT|O_NOTRANS, 0664);
-
- if (tdev->underlying == MACH_PORT_NULL)
- error (2, /* XXX */ 1, "%s", tdev->dev.name);
-
- err = trivfs_create_control (tdev->underlying, tunnel_cntlclass,
- pfinet_bucket, tunnel_class, pfinet_bucket,
- &tdev->cntl);
- tdev->cntl->hook = tdev;
-
- if (! err)
- {
- mach_port_t right = ports_get_send_right (tdev->cntl);
- err = file_set_translator (tdev->underlying, 0, FS_TRANS_EXCL
- | FS_TRANS_SET, 0, 0, 0, right,
- MACH_MSG_TYPE_COPY_SEND);
- mach_port_deallocate (mach_task_self (), right);
- }
-
- if (err)
- error (2, err, "%s", tdev->dev.name);
-
- __mutex_init (&tdev->lock);
- condition_init (&tdev->wait);
- condition_init (&tdev->select_alert);
- condition_implies (&tdev->wait, &tdev->select_alert);
-
- /* This call adds the device to the `dev_base' chain,
- initializes its `ifindex' member (which matters!),
- and tells the protocol stacks about the device. */
- err = - register_netdevice (dev);
- assert_perror (err);
-}
-
-/* If a new open with read and/or write permissions is requested,
- restrict to exclusive usage. */
-static error_t
-check_open_hook (struct trivfs_control *cntl,
- struct iouser *user,
- int flags)
-{
- struct tunnel_device *tdev;
-
- for (tdev = tunnel_dev; tdev; tdev = tdev->next)
- if (tdev->cntl == cntl)
- break;
-
- if (tdev && flags != O_NORW)
- {
- if (tdev->user)
- return EBUSY;
- else
- tdev->user = user;
- }
- return 0;
-}
-
-/* When a protid is destroyed, check if it is the current user.
- If yes, release the interface for other users. */
-static void
-pi_destroy_hook (struct trivfs_protid *cred)
-{
- struct tunnel_device *tdev;
-
- if (cred->pi.class != tunnel_class)
- return;
-
- tdev = (struct tunnel_device *) cred->po->cntl->hook;
-
- if (tdev->user == cred->user)
- tdev->user = 0;
-}
-
-/* If this variable is set, it is called every time a new peropen
- structure is created and initialized. */
-error_t (*trivfs_check_open_hook)(struct trivfs_control *,
- struct iouser *, int)
- = check_open_hook;
-
-/* If this variable is set, it is called every time a protid structure
- is about to be destroyed. */
-void (*trivfs_protid_destroy_hook) (struct trivfs_protid *) = pi_destroy_hook;
-
-/* Read data from an IO object. If offset is -1, read from the object
- maintained file pointer. If the object is not seekable, offset is
- ignored. The amount desired to be read is in AMOUNT. */
-error_t
-trivfs_S_io_read (struct trivfs_protid *cred,
- mach_port_t reply, mach_msg_type_name_t reply_type,
- char **data, mach_msg_type_number_t *data_len,
- loff_t offs, size_t amount)
-{
- struct tunnel_device *tdev;
- struct sk_buff *skb;
-
- /* Deny access if they have bad credentials. */
- if (! cred)
- return EOPNOTSUPP;
- else if (! (cred->po->openmodes & O_READ))
- return EBADF;
-
- if (cred->pi.class != tunnel_class)
- return EOPNOTSUPP;
-
- tdev = (struct tunnel_device *) cred->po->cntl->hook;
-
- __mutex_lock (&tdev->lock);
-
- while (skb_queue_len(&tdev->xq) == 0)
- {
- if (cred->po->openmodes & O_NONBLOCK)
- {
- __mutex_unlock (&tdev->lock);
- return EWOULDBLOCK;
- }
-
- tdev->read_blocked = 1;
- if (hurd_condition_wait (&tdev->wait, &tdev->lock))
- {
- __mutex_unlock (&tdev->lock);
- return EINTR;
- }
- /* See term/users.c for possible race? */
- }
-
- skb = skb_dequeue (&tdev->xq);
- assert(skb);
-
- if (skb->len < amount)
- amount = skb->len;
- if (amount > 0)
- {
- /* Possibly allocate a new buffer. */
- if (*data_len < amount)
- {
- *data = mmap (0, amount, PROT_READ|PROT_WRITE, MAP_ANON, 0, 0);
- if (*data == MAP_FAILED)
- {
- dev_kfree_skb (skb);
- __mutex_unlock (&tdev->lock);
- return ENOMEM;
- }
- }
-
- /* Copy the constant data into the buffer. */
- memcpy ((char *) *data, skb->data, amount);
- }
- *data_len = amount;
- dev_kfree_skb (skb);
-
- /* Set atime, see term/users.c */
-
- __mutex_unlock (&tdev->lock);
-
- return 0;
-}
-
-/* Write data to an IO object. If offset is -1, write at the object
- maintained file pointer. If the object is not seekable, offset is
- ignored. The amount successfully written is returned in amount. A
- given user should not have more than one outstanding io_write on an
- object at a time; servers implement congestion control by delaying
- responses to io_write. Servers may drop data (returning ENOBUFS)
- if they receive more than one write when not prepared for it. */
-error_t
-trivfs_S_io_write (struct trivfs_protid *cred,
- mach_port_t reply,
- mach_msg_type_name_t replytype,
- char *data,
- mach_msg_type_number_t datalen,
- off_t offset,
- mach_msg_type_number_t *amount)
-{
- struct tunnel_device *tdev;
- struct sk_buff *skb;
-
- /* Deny access if they have bad credentials. */
- if (! cred)
- return EOPNOTSUPP;
- else if (! (cred->po->openmodes & O_WRITE))
- return EBADF;
-
- if (cred->pi.class != tunnel_class)
- return EOPNOTSUPP;
-
- tdev = (struct tunnel_device *) cred->po->cntl->hook;
-
- __mutex_lock (&tdev->lock);
-
- __mutex_lock (&net_bh_lock);
- skb = alloc_skb (datalen, GFP_ATOMIC);
- skb->len = datalen;
- skb->dev = &tdev->dev;
-
- bcopy (data, skb->data, datalen);
-
- /* Drop it on the queue. */
- skb->mac.raw = skb->data;
- skb->protocol = htons (ETH_P_IP);
- netif_rx (skb);
- __mutex_unlock (&net_bh_lock);
-
- *amount = datalen;
-
- __mutex_unlock (&tdev->lock);
- return 0;
-}
-
-/* Tell how much data can be read from the object without blocking for
- a "long time" (this should be the same meaning of "long time" used
- by the nonblocking flag. */
-kern_return_t
-trivfs_S_io_readable (struct trivfs_protid *cred,
- mach_port_t reply, mach_msg_type_name_t replytype,
- mach_msg_type_number_t *amount)
-{
- struct tunnel_device *tdev;
- struct sk_buff *skb;
-
- /* Deny access if they have bad credentials. */
- if (! cred)
- return EOPNOTSUPP;
- else if (! (cred->po->openmodes & O_READ))
- return EBADF;
-
- if (cred->pi.class != tunnel_class)
- return EOPNOTSUPP;
-
- tdev = (struct tunnel_device *) cred->po->cntl->hook;
-
- __mutex_lock (&tdev->lock);
-
- /* XXX: Now return the length of the next entry in the queue.
- From the BSD manual:
- The tunnel device, normally /dev/tunN, is exclusive-open (it cannot be
- opened if it is already open) and is restricted to the super-user. A
- read() call will return an error (EHOSTDOWN) if the interface is not
- ``ready'' address has been set (which means that the control device is
- open and the interface's). Once the interface is ready, read() will re-
- turn a packet if one is available; if not, it will either block until one
- is or return EWOULDBLOCK, depending on whether non-blocking I/O has been
- enabled. If the packet is longer than is allowed for in the buffer
- passed to read(), the extra data will be silently dropped.
- */
-
- skb = skb_dequeue(&tdev->xq);
- if (skb)
- {
- *amount = skb->len;
- skb_queue_head(&tdev->xq, skb);
- }
- else
- *amount = 0;
-
- __mutex_unlock (&tdev->lock);
-
- return 0;
-}
-
-/* SELECT_TYPE is the bitwise OR of SELECT_READ, SELECT_WRITE, and SELECT_URG.
- Block until one of the indicated types of i/o can be done "quickly", and
- return the types that are then available. ID_TAG is returned as passed; it
- is just for the convenience of the user in matching up reply messages with
- specific requests sent. */
-error_t
-trivfs_S_io_select (struct trivfs_protid *cred,
- mach_port_t reply,
- mach_msg_type_name_t reply_type,
- int *type)
-{
- struct tunnel_device *tdev;
-
- if (!cred)
- return EOPNOTSUPP;
-
- if (cred->pi.class != tunnel_class)
- return EOPNOTSUPP;
-
- tdev = (struct tunnel_device *) cred->po->cntl->hook;
-
- /* We only deal with SELECT_READ here. */
- if (*type & ~SELECT_READ)
- return EINVAL;
-
- if (*type == 0)
- return 0;
-
- __mutex_lock (&tdev->lock);
-
- while (1)
- {
- if (skb_queue_len (&tdev->xq) != 0)
- {
- *type = SELECT_READ;
- __mutex_unlock (&tdev->lock);
- return 0;
- }
-
- ports_interrupt_self_on_port_death (cred, reply);
- tdev->read_blocked = 1;
- if (hurd_condition_wait (&tdev->select_alert, &tdev->lock))
- {
- *type = 0;
- __mutex_unlock (&tdev->lock);
- return EINTR;
- }
- }
-}
-
-/* Change current read/write offset */
-error_t
-trivfs_S_io_seek (struct trivfs_protid *cred,
- mach_port_t reply, mach_msg_type_name_t reply_type,
- off_t offs, int whence, off_t *new_offs)
-{
- if (!cred)
- return EOPNOTSUPP;
-
- if (cred->pi.class != tunnel_class)
- return EOPNOTSUPP;
-
- return ESPIPE;
-}
-
-/* Change the size of the file. If the size increases, new blocks are
- zero-filled. After successful return, it is safe to reference mapped
- areas of the file up to NEW_SIZE. */
-error_t
-trivfs_S_file_set_size (struct trivfs_protid *cred,
- mach_port_t reply, mach_msg_type_name_t reply_type,
- off_t size)
-{
- if (!cred)
- return EOPNOTSUPP;
-
- if (cred->pi.class != tunnel_class)
- return EOPNOTSUPP;
-
- return size == 0 ? 0 : EINVAL;
-}
-
-/* These four routines modify the O_APPEND, O_ASYNC, O_FSYNC, and
- O_NONBLOCK bits for the IO object. In addition, io_get_openmodes
- will tell you which of O_READ, O_WRITE, and O_EXEC the object can
- be used for. The O_ASYNC bit affects icky async I/O; good async
- I/O is done through io_async which is orthogonal to these calls. */
-error_t
-trivfs_S_io_set_all_openmodes(struct trivfs_protid *cred,
- mach_port_t reply,
- mach_msg_type_name_t reply_type,
- int mode)
-{
- if (!cred)
- return EOPNOTSUPP;
-
- if (cred->pi.class != tunnel_class)
- return EOPNOTSUPP;
-
- return 0;
-}
-
-error_t
-trivfs_S_io_set_some_openmodes (struct trivfs_protid *cred,
- mach_port_t reply,
- mach_msg_type_name_t reply_type,
- int bits)
-{
- if (!cred)
- return EOPNOTSUPP;
-
- if (cred->pi.class != tunnel_class)
- return EOPNOTSUPP;
-
- return 0;
-}
-
-error_t
-trivfs_S_io_clear_some_openmodes (struct trivfs_protid *cred,
- mach_port_t reply,
- mach_msg_type_name_t reply_type,
- int bits)
-{
- if (!cred)
- return EOPNOTSUPP;
-
- if (cred->pi.class != tunnel_class)
- return EOPNOTSUPP;
-
- return 0;
-}
-
-error_t
-trivfs_S_io_get_owner (struct trivfs_protid *cred,
- mach_port_t reply,
- mach_msg_type_name_t reply_type,
- pid_t *owner)
-{
- if (!cred)
- return EOPNOTSUPP;
-
- if (cred->pi.class != tunnel_class)
- return EOPNOTSUPP;
-
- *owner = 0;
- return 0;
-}
-
-error_t
-trivfs_S_io_mod_owner (struct trivfs_protid *cred,
- mach_port_t reply, mach_msg_type_name_t reply_type,
- pid_t owner)
-{
- if (!cred)
- return EOPNOTSUPP;
-
- if (cred->pi.class != tunnel_class)
- return EOPNOTSUPP;
-
- return EINVAL;
-}
-
-/* Return objects mapping the data underlying this memory object. If
- the object can be read then memobjrd will be provided; if the
- object can be written then memobjwr will be provided. For objects
- where read data and write data are the same, these objects will be
- equal, otherwise they will be disjoint. Servers are permitted to
- implement io_map but not io_map_cntl. Some objects do not provide
- mapping; they will set none of the ports and return an error. Such
- objects can still be accessed by io_read and io_write. */
-error_t
-trivfs_S_io_map (struct trivfs_protid *cred,
- mach_port_t reply,
- mach_msg_type_name_t replyPoly,
- memory_object_t *rdobj,
- mach_msg_type_name_t *rdtype,
- memory_object_t *wrobj,
- mach_msg_type_name_t *wrtype)
-{
- if (!cred)
- return EOPNOTSUPP;
-
- if (cred->pi.class != tunnel_class)
- return EOPNOTSUPP;
-
- return EINVAL;
-}