71 files changed, 48290 insertions, 0 deletions
diff --git a/pfinet/linux-src/net/core/Makefile b/pfinet/linux-src/net/core/Makefile
new file mode 100644
index 00000000..5df65cd2
--- /dev/null
+++ b/pfinet/linux-src/net/core/Makefile
@@ -0,0 +1,41 @@
+#
+# Makefile for the Linux networking core.
+#
+# Note! Dependencies are done automagically by 'make dep', which also
+# removes any old dependencies. DON'T put your own dependencies here
+# unless it's something special (ie not a .c file).
+#
+# Note 2! The CFLAGS definition is now in the main makefile...
+
+O_TARGET := core.o
+
+O_OBJS	:= sock.o skbuff.o iovec.o datagram.o scm.o
+
+ifeq ($(CONFIG_SYSCTL),y)
+ifeq ($(CONFIG_NET),y)
+O_OBJS += sysctl_net_core.o
+endif
+endif
+
+ifdef CONFIG_FILTER
+O_OBJS += filter.o
+endif
+
+ifdef CONFIG_NET
+
+O_OBJS	+= dev.o dev_mcast.o dst.o neighbour.o rtnetlink.o utils.o
+
+ifdef CONFIG_FIREWALL
+OX_OBJS += firewall.o
+endif
+
+endif
+
+ifdef CONFIG_NET_PROFILE
+OX_OBJS += profile.o
+endif
+
+include $(TOPDIR)/Rules.make
+
+tar:
+		tar -cvf /dev/f1 .
diff --git a/pfinet/linux-src/net/core/datagram.c b/pfinet/linux-src/net/core/datagram.c
new file mode 100644
index 00000000..9bb68fa4
--- /dev/null
+++ b/pfinet/linux-src/net/core/datagram.c
@@ -0,0 +1,249 @@
+/*
+ *	SUCS NET3:
+ *
+ *	Generic datagram handling routines. These are generic for all protocols. Possibly a generic IP version on top
+ *	of these would make sense. Not tonight however 8-).
+ *	This is used because UDP, RAW, PACKET, DDP, IPX, AX.25 and NetROM layer all have identical poll code and mostly
+ *	identical recvmsg() code. So we share it here. The poll was shared before but buried in udp.c so I moved it.
+ *
+ *	Authors:	Alan Cox <alan@redhat.com>. (datagram_poll() from old udp.c code)
+ *
+ *	Fixes:
+ *		Alan Cox	:	NULL return from skb_peek_copy() understood
+ *		Alan Cox	:	Rewrote skb_read_datagram to avoid the skb_peek_copy stuff.
+ *		Alan Cox	:	Added support for SOCK_SEQPACKET. IPX can no longer use the SO_TYPE hack but
+ *					AX.25 now works right, and SPX is feasible.
+ *		Alan Cox	:	Fixed write poll of non IP protocol crash.
+ *		Florian  La Roche:	Changed for my new skbuff handling.
+ *		Darryl Miles	:	Fixed non-blocking SOCK_SEQPACKET.
+ *		Linus Torvalds	:	BSD semantic fixes.
+ *		Alan Cox	:	Datagram iovec handling
+ *		Darryl Miles	:	Fixed non-blocking SOCK_STREAM.
+ *		Alan Cox	:	POSIXisms
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/poll.h>
+
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/route.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+
+
+/*
+ * Wait for a packet..
+ *
+ * Interrupts off so that no packet arrives before we begin sleeping.
+ * Otherwise we might miss our wake up
+ */
+
+static inline void wait_for_packet(struct sock * sk)
+{
+	struct wait_queue wait = { current, NULL };
+
+	add_wait_queue(sk->sleep, &wait);
+	current->state = TASK_INTERRUPTIBLE;
+
+	if (skb_peek(&sk->receive_queue) == NULL)
+		schedule();
+
+	current->state = TASK_RUNNING;
+	remove_wait_queue(sk->sleep, &wait);
+}
+
+/*
+ *	Is a socket 'connection oriented' ?
+ */
+ 
+static inline int connection_based(struct sock *sk)
+{
+	return (sk->type==SOCK_SEQPACKET || sk->type==SOCK_STREAM);
+}
+
+/*
+ *	Get a datagram skbuff, understands the peeking, nonblocking wakeups and possible
+ *	races. This replaces identical code in packet,raw and udp, as well as the IPX
+ *	AX.25 and Appletalk. It also finally fixes the long standing peek and read
+ *	race for datagram sockets. If you alter this routine remember it must be
+ *	re-entrant.
+ *
+ *	This function will lock the socket if a skb is returned, so the caller
+ *	needs to unlock the socket in that case (usually by calling skb_free_datagram)
+ *
+ *	* It does not lock socket since today. This function is
+ *	* free of race conditions. This measure should/can improve
+ *	* significantly datagram socket latencies at high loads,
+ *	* when data copying to user space takes lots of time.
+ *	* (BTW I've just killed the last cli() in IP/IPv6/core/netlink/packet
+ *	*  8) Great win.)
+ *	*			                    --ANK (980729)
+ *
+ *	The order of the tests when we find no data waiting are specified
+ *	quite explicitly by POSIX 1003.1g, don't change them without having
+ *	the standard around please.
+ */
+
+struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock, int *err)
+{
+	int error;
+	struct sk_buff *skb;
+
+	/* Caller is allowed not to check sk->err before skb_recv_datagram() */
+	error = sock_error(sk);
+	if (error)
+		goto no_packet;
+
+restart:
+	while(skb_queue_empty(&sk->receive_queue))	/* No data */
+	{
+		/* Socket errors? */
+		error = sock_error(sk);
+		if (error)
+			goto no_packet;
+
+		/* Socket shut down? */
+		if (sk->shutdown & RCV_SHUTDOWN)
+			goto no_packet;
+
+		/* Sequenced packets can come disconnected. If so we report the problem */
+		error = -ENOTCONN;
+		if(connection_based(sk) && sk->state!=TCP_ESTABLISHED)
+			goto no_packet;
+
+		/* handle signals */
+		error = -ERESTARTSYS;
+		if (signal_pending(current))
+			goto no_packet;
+
+		/* User doesn't want to wait */
+		error = -EAGAIN;
+		if (noblock)
+			goto no_packet;
+
+		wait_for_packet(sk);
+	}
+
+	/* Again only user level code calls this function, so nothing interrupt level
+	   will suddenly eat the receive_queue */
+	if (flags & MSG_PEEK)
+	{
+		unsigned long cpu_flags;
+
+		/* It is the only POTENTIAL race condition
+		   in this function. skb may be stolen by
+		   another receiver after peek, but before
+		   incrementing use count, provided kernel
+		   is reentearble (it is not) or this function
+		   is called by interrupts.
+
+		   Protect it with global skb spinlock,
+		   though for now even this is overkill.
+		                                --ANK (980728)
+		 */
+		spin_lock_irqsave(&skb_queue_lock, cpu_flags);
+		skb = skb_peek(&sk->receive_queue);
+		if(skb!=NULL)
+			atomic_inc(&skb->users);
+		spin_unlock_irqrestore(&skb_queue_lock, cpu_flags);
+	} else
+		skb = skb_dequeue(&sk->receive_queue);
+
+	if (!skb)	/* Avoid race if someone beats us to the data */
+		goto restart;
+	return skb;
+
+no_packet:
+	*err = error;
+	return NULL;
+}
+
+void skb_free_datagram(struct sock * sk, struct sk_buff *skb)
+{
+	kfree_skb(skb);
+}
+
+/*
+ *	Copy a datagram to a linear buffer.
+ */
+
+int skb_copy_datagram(struct sk_buff *skb, int offset, char *to, int size)
+{
+	int err = -EFAULT;
+
+	if (!copy_to_user(to, skb->h.raw + offset, size))
+		err = 0;
+	return err;
+}
+
+
+/*
+ *	Copy a datagram to an iovec.
+ *	Note: the iovec is modified during the copy.
+ */
+ 
+int skb_copy_datagram_iovec(struct sk_buff *skb, int offset, struct iovec *to,
+			    int size)
+{
+	return memcpy_toiovec(to, skb->h.raw + offset, size);
+}
+
+/*
+ *	Datagram poll: Again totally generic. This also handles
+ *	sequenced packet sockets providing the socket receive queue
+ *	is only ever holding data ready to receive.
+ *
+ *	Note: when you _don't_ use this routine for this protocol,
+ *	and you use a different write policy from sock_writeable()
+ *	then please supply your own write_space callback.
+ */
+
+unsigned int datagram_poll(struct file * file, struct socket *sock, poll_table *wait)
+{
+	struct sock *sk = sock->sk;
+	unsigned int mask;
+
+	poll_wait(file, sk->sleep, wait);
+	mask = 0;
+
+	/* exceptional events? */
+	if (sk->err || !skb_queue_empty(&sk->error_queue))
+		mask |= POLLERR;
+	if (sk->shutdown & RCV_SHUTDOWN)
+		mask |= POLLHUP;
+
+	/* readable? */
+	if (!skb_queue_empty(&sk->receive_queue))
+		mask |= POLLIN | POLLRDNORM;
+
+	/* Connection-based need to check for termination and startup */
+	if (connection_based(sk)) {
+		if (sk->state==TCP_CLOSE)
+			mask |= POLLHUP;
+		/* connection hasn't started yet? */
+		if (sk->state == TCP_SYN_SENT)
+			return mask;
+	}
+
+	/* writable? */
+	if (sock_writeable(sk))
+		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+	else
+		sk->socket->flags |= SO_NOSPACE;
+
+	return mask;
+}
diff --git a/pfinet/linux-src/net/core/dev.c b/pfinet/linux-src/net/core/dev.c
new file mode 100644
index 00000000..cc9584a1
--- /dev/null
+++ b/pfinet/linux-src/net/core/dev.c
@@ -0,0 +1,2026 @@
+/*
+ * 	NET3	Protocol independent device support routines.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ *	Derived from the non IP parts of dev.c 1.0.19
+ * 		Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
+ *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *				Mark Evans, <evansmp@uhura.aston.ac.uk>
+ *
+ *	Additional Authors:
+ *		Florian la Roche <rzsfl@rz.uni-sb.de>
+ *		Alan Cox <gw4pts@gw4pts.ampr.org>
+ *		David Hinds <dhinds@allegro.stanford.edu>
+ *		Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
+ *		Adam Sulmicki <adam@cfar.umd.edu>
+ *
+ *	Changes:
+ *		Marcelo Tosatti <marcelo@conectiva.com.br> : dont accept mtu 0 or <
+ *		Alan Cox	:	device private ioctl copies fields back.
+ *		Alan Cox	:	Transmit queue code does relevant stunts to
+ *					keep the queue safe.
+ *		Alan Cox	:	Fixed double lock.
+ *		Alan Cox	:	Fixed promisc NULL pointer trap
+ *		????????	:	Support the full private ioctl range
+ *		Alan Cox	:	Moved ioctl permission check into drivers
+ *		Tim Kordas	:	SIOCADDMULTI/SIOCDELMULTI
+ *		Alan Cox	:	100 backlog just doesn't cut it when
+ *					you start doing multicast video 8)
+ *		Alan Cox	:	Rewrote net_bh and list manager.
+ *		Alan Cox	: 	Fix ETH_P_ALL echoback lengths.
+ *		Alan Cox	:	Took out transmit every packet pass
+ *					Saved a few bytes in the ioctl handler
+ *		Alan Cox	:	Network driver sets packet type before calling netif_rx. Saves
+ *					a function call a packet.
+ *		Alan Cox	:	Hashed net_bh()
+ *		Richard Kooijman:	Timestamp fixes.
+ *		Alan Cox	:	Wrong field in SIOCGIFDSTADDR
+ *		Alan Cox	:	Device lock protection.
+ *		Alan Cox	: 	Fixed nasty side effect of device close changes.
+ *		Rudi Cilibrasi	:	Pass the right thing to set_mac_address()
+ *		Dave Miller	:	32bit quantity for the device lock to make it work out
+ *					on a Sparc.
+ *		Bjorn Ekwall	:	Added KERNELD hack.
+ *		Alan Cox	:	Cleaned up the backlog initialise.
+ *		Craig Metz	:	SIOCGIFCONF fix if space for under
+ *					1 device.
+ *	    Thomas Bogendoerfer :	Return ENODEV for dev_open, if there
+ *					is no device open function.
+ *		Andi Kleen	:	Fix error reporting for SIOCGIFCONF
+ *	    Michael Chastain	:	Fix signed/unsigned for SIOCGIFCONF
+ *		Cyrus Durgin	:	Cleaned for KMOD
+ *		Adam Sulmicki   :	Bug Fix : Network Device Unload
+ *					A network device unload needs to purge
+ *					the backlog queue.
+ *	Paul Rusty Russel	:	SIOCSIFNAME
+ */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/notifier.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <linux/rtnetlink.h>
+#include <net/slhc.h>
+#include <linux/proc_fs.h>
+#include <linux/stat.h>
+#include <net/br.h>
+#include <net/dst.h>
+#include <net/pkt_sched.h>
+#include <net/profile.h>
+#include <linux/init.h>
+#include <linux/kmod.h>
+#ifdef CONFIG_NET_RADIO
+#include <linux/wireless.h>
+#endif	/* CONFIG_NET_RADIO */
+#ifdef CONFIG_PLIP
+extern int plip_init(void);
+#endif
+
+NET_PROFILE_DEFINE(dev_queue_xmit)
+NET_PROFILE_DEFINE(net_bh)
+NET_PROFILE_DEFINE(net_bh_skb)
+
+
+const char *if_port_text[] = {
+  "unknown",
+  "BNC",
+  "10baseT",
+  "AUI",
+  "100baseT",
+  "100baseTX",
+  "100baseFX"
+};
+
+/*
+ *	The list of packet types we will receive (as opposed to discard)
+ *	and the routines to invoke.
+ *
+ *	Why 16. Because with 16 the only overlap we get on a hash of the
+ *	low nibble of the protocol value is RARP/SNAP/X.25. 
+ *
+ *		0800	IP
+ *		0001	802.3
+ *		0002	AX.25
+ *		0004	802.2
+ *		8035	RARP
+ *		0005	SNAP
+ *		0805	X.25
+ *		0806	ARP
+ *		8137	IPX
+ *		0009	Localtalk
+ *		86DD	IPv6
+ */
+
+struct packet_type *ptype_base[16];		/* 16 way hashed list */
+struct packet_type *ptype_all = NULL;		/* Taps */
+
+/*
+ *	Device list lock. Setting it provides that interface
+ *	will not disappear unexpectedly while kernel sleeps.
+ */
+ 
+atomic_t dev_lockct = ATOMIC_INIT(0);
+
+/*
+ *	Our notifier list
+ */
+ 
+static struct notifier_block *netdev_chain=NULL;
+
+/*
+ *	Device drivers call our routines to queue packets here. We empty the
+ *	queue in the bottom half handler.
+ */
+
+static struct sk_buff_head backlog;
+
+#ifdef CONFIG_NET_FASTROUTE
+int netdev_fastroute;
+int netdev_fastroute_obstacles;
+struct net_fastroute_stats dev_fastroute_stat;
+#endif
+
+static void dev_clear_backlog(struct device *dev);
+
+
+/******************************************************************************************
+
+		Protocol management and registration routines
+
+*******************************************************************************************/
+
+/*
+ *	For efficiency
+ */
+
+int netdev_nit=0;
+
+/*
+ *	Add a protocol ID to the list. Now that the input handler is
+ *	smarter we can dispense with all the messy stuff that used to be
+ *	here.
+ *
+ *	BEWARE!!! Protocol handlers, mangling input packets,
+ *	MUST BE last in hash buckets and checking protocol handlers
+ *	MUST start from promiscous ptype_all chain in net_bh.
+ *	It is true now, do not change it.
+ *	Explantion follows: if protocol handler, mangling packet, will
+ *	be the first on list, it is not able to sense, that packet
+ *	is cloned and should be copied-on-write, so that it will
+ *	change it and subsequent readers will get broken packet.
+ *							--ANK (980803)
+ */
+ 
+void dev_add_pack(struct packet_type *pt)
+{
+	int hash;
+#ifdef CONFIG_NET_FASTROUTE
+	/* Hack to detect packet socket */
+	if (pt->data) {
+		netdev_fastroute_obstacles++;
+		dev_clear_fastroute(pt->dev);
+	}
+#endif
+	if(pt->type==htons(ETH_P_ALL))
+	{
+		netdev_nit++;
+		pt->next=ptype_all;
+		ptype_all=pt;
+	}
+	else
+	{	
+		hash=ntohs(pt->type)&15;
+		pt->next = ptype_base[hash];
+		ptype_base[hash] = pt;
+	}
+}
+
+
+/*
+ *	Remove a protocol ID from the list.
+ */
+ 
+void dev_remove_pack(struct packet_type *pt)
+{
+	struct packet_type **pt1;
+	if(pt->type==htons(ETH_P_ALL))
+	{
+		netdev_nit--;
+		pt1=&ptype_all;
+	}
+	else
+		pt1=&ptype_base[ntohs(pt->type)&15];
+	for(; (*pt1)!=NULL; pt1=&((*pt1)->next))
+	{
+		if(pt==(*pt1))
+		{
+			*pt1=pt->next;
+			synchronize_bh();
+#ifdef CONFIG_NET_FASTROUTE
+			if (pt->data)
+				netdev_fastroute_obstacles--;
+#endif
+			return;
+		}
+	}
+	printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
+}
+
+/*****************************************************************************************
+
+			    Device Interface Subroutines
+
+******************************************************************************************/
+
+/* 
+ *	Find an interface by name.
+ */
+ 
+struct device *dev_get(const char *name)
+{
+	struct device *dev;
+
+	for (dev = dev_base; dev != NULL; dev = dev->next) 
+	{
+		if (strcmp(dev->name, name) == 0)
+			return(dev);
+	}
+	return NULL;
+}
+
+struct device * dev_get_by_index(int ifindex)
+{
+	struct device *dev;
+
+	for (dev = dev_base; dev != NULL; dev = dev->next) 
+	{
+		if (dev->ifindex == ifindex)
+			return(dev);
+	}
+	return NULL;
+}
+
+struct device *dev_getbyhwaddr(unsigned short type, char *ha)
+{
+	struct device *dev;
+
+	for (dev = dev_base; dev != NULL; dev = dev->next) 
+	{
+		if (dev->type == type &&
+		    memcmp(dev->dev_addr, ha, dev->addr_len) == 0)
+			return(dev);
+	}
+	return(NULL);
+}
+
+/*
+ *	Passed a format string - eg "lt%d" it will try and find a suitable
+ *	id. Not efficient for many devices, not called a lot..
+ */
+
+int dev_alloc_name(struct device *dev, const char *name)
+{
+	int i;
+	/*
+	 *	If you need over 100 please also fix the algorithm...
+	 */
+	for(i=0;i<100;i++)
+	{
+		sprintf(dev->name,name,i);
+		if(dev_get(dev->name)==NULL)
+			return i;
+	}
+	return -ENFILE;	/* Over 100 of the things .. bail out! */
+}
+ 
+struct device *dev_alloc(const char *name, int *err)
+{
+	struct device *dev=kmalloc(sizeof(struct device)+16, GFP_KERNEL);
+	if(dev==NULL)
+	{
+		*err=-ENOBUFS;
+		return NULL;
+	}
+	dev->name=(char *)(dev+1);	/* Name string space */
+	*err=dev_alloc_name(dev,name);
+	if(*err<0)
+	{
+		kfree(dev);
+		return NULL;
+	}
+	return dev;
+}
+
+void netdev_state_change(struct device *dev)
+{
+	if (dev->flags&IFF_UP)
+		notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
+}
+
+
+/*
+ *	Find and possibly load an interface.
+ */
+ 
+#ifdef CONFIG_KMOD
+
+void dev_load(const char *name)
+{
+	if(!dev_get(name) && capable(CAP_SYS_MODULE))
+		request_module(name);
+}
+
+#else
+
+extern inline void dev_load(const char *unused){;}
+
+#endif
+
+static int default_rebuild_header(struct sk_buff *skb)
+{
+	printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n", skb->dev ? skb->dev->name : "NULL!!!");
+	kfree_skb(skb);
+	return 1;
+}
+
+/*
+ *	Prepare an interface for use. 
+ */
+ 
+int dev_open(struct device *dev)
+{
+	int ret = 0;
+
+	/*
+	 *	Is it already up?
+	 */
+
+	if (dev->flags&IFF_UP)
+		return 0;
+
+	/*
+	 *	Call device private open method
+	 */
+	 
+	if (dev->open) 
+  		ret = dev->open(dev);
+
+	/*
+	 *	If it went open OK then:
+	 */
+	 
+	if (ret == 0) 
+	{
+		/*
+		 *	nil rebuild_header routine,
+		 *	that should be never called and used as just bug trap.
+		 */
+
+		if (dev->rebuild_header == NULL)
+			dev->rebuild_header = default_rebuild_header;
+
+		/*
+		 *	Set the flags.
+		 */
+		dev->flags |= (IFF_UP | IFF_RUNNING);
+
+		/*
+		 *	Initialize multicasting status 
+		 */
+		dev_mc_upload(dev);
+
+		/*
+		 *	Wakeup transmit queue engine
+		 */
+		dev_activate(dev);
+
+		/*
+		 *	... and announce new interface.
+		 */
+		notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
+
+	}
+	return(ret);
+}
+
+#ifdef CONFIG_NET_FASTROUTE
+
+static __inline__ void dev_do_clear_fastroute(struct device *dev)
+{
+	if (dev->accept_fastpath) {
+		int i;
+
+		for (i=0; i<=NETDEV_FASTROUTE_HMASK; i++)
+			dst_release_irqwait(xchg(dev->fastpath+i, NULL));
+	}
+}
+
+void dev_clear_fastroute(struct device *dev)
+{
+	if (dev) {
+		dev_do_clear_fastroute(dev);
+	} else {
+		for (dev = dev_base; dev; dev = dev->next)
+			dev_do_clear_fastroute(dev);
+	}
+}
+#endif
+
+/*
+ *	Completely shutdown an interface.
+ */
+ 
+int dev_close(struct device *dev)
+{
+	if (!(dev->flags&IFF_UP))
+		return 0;
+
+	dev_deactivate(dev);
+
+	dev_lock_wait();
+
+	/*
+	 *	Call the device specific close. This cannot fail.
+	 *	Only if device is UP
+	 */
+	 
+	if (dev->stop)
+		dev->stop(dev);
+
+	if (dev->start)
+		printk("dev_close: bug %s still running\n", dev->name);
+
+	/*
+	 *	Device is now down.
+	 */
+	dev_clear_backlog(dev);
+
+	dev->flags&=~(IFF_UP|IFF_RUNNING);
+#ifdef CONFIG_NET_FASTROUTE
+	dev_clear_fastroute(dev);
+#endif
+
+	/*
+	 *	Tell people we are going down
+	 */
+	notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
+
+	return(0);
+}
+
+
+/*
+ *	Device change register/unregister. These are not inline or static
+ *	as we export them to the world.
+ */
+
+int register_netdevice_notifier(struct notifier_block *nb)
+{
+	return notifier_chain_register(&netdev_chain, nb);
+}
+
+int unregister_netdevice_notifier(struct notifier_block *nb)
+{
+	return notifier_chain_unregister(&netdev_chain,nb);
+}
+
+/*
+ *	Support routine. Sends outgoing frames to any network
+ *	taps currently in use.
+ */
+
+void dev_queue_xmit_nit(struct sk_buff *skb, struct device *dev)
+{
+	struct packet_type *ptype;
+	get_fast_time(&skb->stamp);
+
+	for (ptype = ptype_all; ptype!=NULL; ptype = ptype->next) 
+	{
+		/* Never send packets back to the socket
+		 * they originated from - MvS (miquels@drinkel.ow.org)
+		 */
+		if ((ptype->dev == dev || !ptype->dev) &&
+			((struct sock *)ptype->data != skb->sk))
+		{
+			struct sk_buff *skb2;
+			if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL)
+				break;
+
+			/* Code, following below is wrong.
+
+			   The only reason, why it does work is that
+			   ONLY packet sockets receive outgoing
+			   packets. If such a packet will be (occasionally)
+			   received by normal packet handler, which expects
+			   that mac header is pulled...
+			 */
+
+			/* More sensible variant. skb->nh should be correctly
+			   set by sender, so that the second statement is
+			   just protection against buggy protocols.
+			 */
+			skb2->mac.raw = skb2->data;
+
+			if (skb2->nh.raw < skb2->data || skb2->nh.raw >= skb2->tail) {
+				if (net_ratelimit())
+					printk(KERN_DEBUG "protocol %04x is buggy, dev %s\n", skb2->protocol, dev->name);
+				skb2->nh.raw = skb2->data;
+				if (dev->hard_header)
+					skb2->nh.raw += dev->hard_header_len;
+			}
+
+			skb2->h.raw = skb2->nh.raw;
+			skb2->pkt_type = PACKET_OUTGOING;
+			ptype->func(skb2, skb->dev, ptype);
+		}
+	}
+}
+
+/*
+ *	Fast path for loopback frames.
+ */
+ 
+void dev_loopback_xmit(struct sk_buff *skb)
+{
+	struct sk_buff *newskb=skb_clone(skb, GFP_ATOMIC);
+	if (newskb==NULL)
+		return;
+
+	newskb->mac.raw = newskb->data;
+	skb_pull(newskb, newskb->nh.raw - newskb->data);
+	newskb->pkt_type = PACKET_LOOPBACK;
+	newskb->ip_summed = CHECKSUM_UNNECESSARY;
+	if (newskb->dst==NULL)
+		printk(KERN_DEBUG "BUG: packet without dst looped back 1\n");
+	netif_rx(newskb);
+}
+
+int dev_queue_xmit(struct sk_buff *skb)
+{
+	struct device *dev = skb->dev;
+	struct Qdisc  *q;
+
+#ifdef CONFIG_NET_PROFILE
+	start_bh_atomic();
+	NET_PROFILE_ENTER(dev_queue_xmit);
+#endif
+
+	start_bh_atomic();
+	q = dev->qdisc;
+	if (q->enqueue) {
+		q->enqueue(skb, q);
+		qdisc_wakeup(dev);
+		end_bh_atomic();
+
+#ifdef CONFIG_NET_PROFILE
+	        NET_PROFILE_LEAVE(dev_queue_xmit);
+		end_bh_atomic();
+#endif
+
+		return 0;
+	}
+
+	/* The device has no queue. Common case for software devices:
+	   loopback, all the sorts of tunnels...
+
+	   Really, it is unlikely that bh protection is necessary here:
+	   virtual devices do not generate EOI events.
+	   However, it is possible, that they rely on bh protection
+	   made by us here.
+	 */
+	if (dev->flags&IFF_UP) {
+		if (netdev_nit) 
+			dev_queue_xmit_nit(skb,dev);
+		if (dev->hard_start_xmit(skb, dev) == 0) {
+			end_bh_atomic();
+
+#ifdef CONFIG_NET_PROFILE
+			NET_PROFILE_LEAVE(dev_queue_xmit);
+			end_bh_atomic();
+#endif
+
+			return 0;
+		}
+		if (net_ratelimit())
+			printk(KERN_DEBUG "Virtual device %s asks to queue packet!\n", dev->name);
+	}
+	end_bh_atomic();
+
+	kfree_skb(skb);
+
+#ifdef CONFIG_NET_PROFILE
+	NET_PROFILE_LEAVE(dev_queue_xmit);
+	end_bh_atomic();
+#endif
+
+	return 0;
+}
+
+
+/*=======================================================================
+			Receiver rotutines
+  =======================================================================*/
+
+int netdev_dropping = 0;
+int netdev_max_backlog = 300;
+atomic_t netdev_rx_dropped;
+#ifdef CONFIG_CPU_IS_SLOW
+int net_cpu_congestion;
+#endif
+
+#ifdef CONFIG_NET_HW_FLOWCONTROL
+int netdev_throttle_events;
+static unsigned long netdev_fc_mask = 1;
+unsigned long netdev_fc_xoff = 0;
+
+static struct
+{
+	void (*stimul)(struct device *);
+	struct device *dev;
+} netdev_fc_slots[32];
+
+int netdev_register_fc(struct device *dev, void (*stimul)(struct device *dev))
+{
+	int bit = 0;
+	unsigned long flags;
+
+	save_flags(flags);
+	cli();
+	if (netdev_fc_mask != ~0UL) {
+		bit = ffz(netdev_fc_mask);
+		netdev_fc_slots[bit].stimul = stimul;
+		netdev_fc_slots[bit].dev = dev;
+		set_bit(bit, &netdev_fc_mask);
+		clear_bit(bit, &netdev_fc_xoff);
+	}
+	restore_flags(flags);
+	return bit;
+}
+
+void netdev_unregister_fc(int bit)
+{
+	unsigned long flags;
+
+	save_flags(flags);
+	cli();
+	if (bit > 0) {
+		netdev_fc_slots[bit].stimul = NULL;
+		netdev_fc_slots[bit].dev = NULL;
+		clear_bit(bit, &netdev_fc_mask);
+		clear_bit(bit, &netdev_fc_xoff);
+	}
+	restore_flags(flags);
+}
+
+static void netdev_wakeup(void)
+{
+	unsigned long xoff;
+
+	cli();
+	xoff = netdev_fc_xoff;
+	netdev_fc_xoff = 0;
+	netdev_dropping = 0;
+	netdev_throttle_events++;
+	while (xoff) {
+		int i = ffz(~xoff);
+		xoff &= ~(1<<i);
+		netdev_fc_slots[i].stimul(netdev_fc_slots[i].dev);
+	}
+	sti();
+}
+#endif
+
+static void dev_clear_backlog(struct device *dev)
+{
+	struct sk_buff *prev, *curr;
+
+	/*
+	 *
+	 *  Let now clear backlog queue. -AS
+	 *
+	 *  We are competing here both with netif_rx() and net_bh().
+	 *  We don't want either of those to mess with skb ptrs
+	 *  while we work on them, thus cli()/sti().
+	 *
+	 *  It looks better to use net_bh trick, at least
+	 *  to be sure, that we keep interrupt latency really low. --ANK (980727)
+	 */ 
+
+	if (backlog.qlen) {
+		start_bh_atomic();
+		curr = backlog.next;
+		while ( curr != (struct sk_buff *)(&backlog) ) {
+			unsigned long flags;
+			curr=curr->next;
+			if ( curr->prev->dev == dev ) {
+				prev = curr->prev;
+				spin_lock_irqsave(&skb_queue_lock, flags);
+				__skb_unlink(prev, &backlog);
+				spin_unlock_irqrestore(&skb_queue_lock, flags);
+				kfree_skb(prev);
+			}
+		}
+		end_bh_atomic();
+#ifdef CONFIG_NET_HW_FLOWCONTROL
+		if (netdev_dropping)
+			netdev_wakeup();
+#else
+		netdev_dropping = 0;
+#endif
+	}
+}
+
+/*
+ *	Receive a packet from a device driver and queue it for the upper
+ *	(protocol) levels.  It always succeeds. 
+ */
+
+void netif_rx(struct sk_buff *skb)
+{
+#ifndef CONFIG_CPU_IS_SLOW
+	if(skb->stamp.tv_sec==0)
+		get_fast_time(&skb->stamp);
+#else
+	skb->stamp = xtime;
+#endif
+
+	/* The code is rearranged so that the path is the most
+	   short when CPU is congested, but is still operating.
+	 */
+
+	if (backlog.qlen <= netdev_max_backlog) {
+		if (backlog.qlen) {
+			if (netdev_dropping == 0) {
+				skb_queue_tail(&backlog,skb);
+				mark_bh(NET_BH);
+				return;
+			}
+			atomic_inc(&netdev_rx_dropped);
+			kfree_skb(skb);
+			return;
+		}
+#ifdef CONFIG_NET_HW_FLOWCONTROL
+		if (netdev_dropping)
+			netdev_wakeup();
+#else
+		netdev_dropping = 0;
+#endif
+		skb_queue_tail(&backlog,skb);
+		mark_bh(NET_BH);
+		return;
+	}
+	netdev_dropping = 1;
+	atomic_inc(&netdev_rx_dropped);
+	kfree_skb(skb);
+}
+
+#ifdef CONFIG_BRIDGE
+static inline void handle_bridge(struct sk_buff *skb, unsigned short type)
+{
+	if (br_stats.flags & BR_UP && br_protocol_ok(ntohs(type)))
+	{
+		/*
+		 *	We pass the bridge a complete frame. This means
+		 *	recovering the MAC header first.
+		 */
+		
+		int offset;
+
+		skb=skb_clone(skb, GFP_ATOMIC);
+		if(skb==NULL)		
+			return;
+			
+		offset=skb->data-skb->mac.raw;
+		skb_push(skb,offset);	/* Put header back on for bridge */
+
+		if(br_receive_frame(skb))
+			return;
+		kfree_skb(skb);
+	}
+	return;
+}
+#endif
+
+
+/*
+ *	When we are called the queue is ready to grab, the interrupts are
+ *	on and hardware can interrupt and queue to the receive queue as we
+ *	run with no problems.
+ *	This is run as a bottom half after an interrupt handler that does
+ *	mark_bh(NET_BH);
+ */
+ 
+void net_bh(void)
+{
+	struct packet_type *ptype;
+	struct packet_type *pt_prev;
+	unsigned short type;
+	unsigned long start_time = jiffies;
+#ifdef CONFIG_CPU_IS_SLOW
+	static unsigned long start_busy = 0;
+	static unsigned long ave_busy = 0;
+
+	if (start_busy == 0)
+		start_busy = start_time;
+	net_cpu_congestion = ave_busy>>8;
+#endif
+
+	NET_PROFILE_ENTER(net_bh);
+	/*
+	 *	Can we send anything now? We want to clear the
+	 *	decks for any more sends that get done as we
+	 *	process the input. This also minimises the
+	 *	latency on a transmit interrupt bh.
+	 */
+
+	if (qdisc_head.forw != &qdisc_head)
+		qdisc_run_queues();
+  
+	/*
+	 *	Any data left to process. This may occur because a
+	 *	mark_bh() is done after we empty the queue including
+	 *	that from the device which does a mark_bh() just after
+	 */
+
+	/*
+	 *	While the queue is not empty..
+	 *
+	 *	Note that the queue never shrinks due to
+	 *	an interrupt, so we can do this test without
+	 *	disabling interrupts.
+	 */
+
+	while (!skb_queue_empty(&backlog)) 
+	{
+		struct sk_buff * skb;
+
+		/* Give chance to other bottom halves to run */
+		if (jiffies - start_time > 1)
+			goto net_bh_break;
+
+		/*
+		 *	We have a packet. Therefore the queue has shrunk
+		 */
+		skb = skb_dequeue(&backlog);
+
+#ifdef CONFIG_CPU_IS_SLOW
+		if (ave_busy > 128*16) {
+			kfree_skb(skb);
+			while ((skb = skb_dequeue(&backlog)) != NULL)
+				kfree_skb(skb);
+			break;
+		}
+#endif
+
+
+#if 0
+		NET_PROFILE_SKB_PASSED(skb, net_bh_skb);
+#endif
+#ifdef CONFIG_NET_FASTROUTE
+		if (skb->pkt_type == PACKET_FASTROUTE) {
+			dev_queue_xmit(skb);
+			continue;
+		}
+#endif
+
+		/*
+	 	 *	Bump the pointer to the next structure.
+		 * 
+		 *	On entry to the protocol layer. skb->data and
+		 *	skb->nh.raw point to the MAC and encapsulated data
+		 */
+
+		/* XXX until we figure out every place to modify.. */
+		skb->h.raw = skb->nh.raw = skb->data;
+
+		if (skb->mac.raw < skb->head || skb->mac.raw > skb->data) {
+			printk(KERN_CRIT "%s: wrong mac.raw ptr, proto=%04x\n", skb->dev->name, skb->protocol);
+			kfree_skb(skb);
+			continue;
+		}
+
+		/*
+		 * 	Fetch the packet protocol ID. 
+		 */
+
+		type = skb->protocol;
+
+#ifdef CONFIG_BRIDGE
+		/*
+		 *	If we are bridging then pass the frame up to the
+		 *	bridging code (if this protocol is to be bridged).
+		 *      If it is bridged then move on
+		 */
+		handle_bridge(skb, type); 
+#endif
+
+		/*
+		 *	We got a packet ID.  Now loop over the "known protocols"
+		 * 	list. There are two lists. The ptype_all list of taps (normally empty)
+		 *	and the main protocol list which is hashed perfectly for normal protocols.
+		 */
+
+		pt_prev = NULL;
+		for (ptype = ptype_all; ptype!=NULL; ptype=ptype->next)
+		{
+			if (!ptype->dev || ptype->dev == skb->dev) {
+				if(pt_prev)
+				{
+					struct sk_buff *skb2=skb_clone(skb, GFP_ATOMIC);
+					if(skb2)
+						pt_prev->func(skb2,skb->dev, pt_prev);
+				}
+				pt_prev=ptype;
+			}
+		}
+
+		for (ptype = ptype_base[ntohs(type)&15]; ptype != NULL; ptype = ptype->next) 
+		{
+			if (ptype->type == type && (!ptype->dev || ptype->dev==skb->dev))
+			{
+				/*
+				 *	We already have a match queued. Deliver
+				 *	to it and then remember the new match
+				 */
+				if(pt_prev)
+				{
+					struct sk_buff *skb2;
+
+					skb2=skb_clone(skb, GFP_ATOMIC);
+
+					/*
+					 *	Kick the protocol handler. This should be fast
+					 *	and efficient code.
+					 */
+
+					if(skb2)
+						pt_prev->func(skb2, skb->dev, pt_prev);
+				}
+				/* Remember the current last to do */
+				pt_prev=ptype;
+			}
+		} /* End of protocol list loop */
+
+		/*
+		 *	Is there a last item to send to ?
+		 */
+
+		if(pt_prev)
+			pt_prev->func(skb, skb->dev, pt_prev);
+		/*
+		 * 	Has an unknown packet has been received ?
+		 */
+	 
+		else {
+			kfree_skb(skb);
+		}
+  	}	/* End of queue loop */
+  	
+  	/*
+  	 *	We have emptied the queue
+  	 */
+	
+	/*
+	 *	One last output flush.
+	 */
+
+	if (qdisc_head.forw != &qdisc_head)
+		qdisc_run_queues();
+
+#ifdef  CONFIG_CPU_IS_SLOW
+        if (1) {
+		unsigned long start_idle = jiffies;
+		ave_busy += ((start_idle - start_busy)<<3) - (ave_busy>>4);
+		start_busy = 0;
+	}
+#endif
+#ifdef CONFIG_NET_HW_FLOWCONTROL
+	if (netdev_dropping)
+		netdev_wakeup();
+#else
+	netdev_dropping = 0;
+#endif
+	NET_PROFILE_LEAVE(net_bh);
+	return;
+
+net_bh_break:
+	mark_bh(NET_BH);
+	NET_PROFILE_LEAVE(net_bh);
+	return;
+}
+
+/* Protocol dependent address dumping routines */
+
+static gifconf_func_t * gifconf_list [NPROTO];
+
+int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
+{
+	if (family>=NPROTO)
+		return -EINVAL;
+	gifconf_list[family] = gifconf;
+	return 0;
+}
+
+
+/*
+ *	Map an interface index to its name (SIOCGIFNAME)
+ */
+
+/*
+ *	This call is useful, but I'd remove it too.
+ *
+ *	The reason is purely aestetical, it is the only call
+ *	from SIOC* family using struct ifreq in reversed manner.
+ *	Besides that, it is pretty silly to put "drawing" facility
+ *	to kernel, it is useful only to print ifindices
+ *	in readable form, is not it? --ANK
+ *
+ *	We need this ioctl for efficient implementation of the
+ *	if_indextoname() function required by the IPv6 API.  Without
+ *	it, we would have to search all the interfaces to find a
+ *	match.  --pb
+ */
+
+static int dev_ifname(struct ifreq *arg)
+{
+	struct device *dev;
+	struct ifreq ifr;
+	int err;
+
+	/*
+	 *	Fetch the caller's info block. 
+	 */
+	
+	err = copy_from_user(&ifr, arg, sizeof(struct ifreq));
+	if (err)
+		return -EFAULT;
+
+	dev = dev_get_by_index(ifr.ifr_ifindex);
+	if (!dev)
+		return -ENODEV;
+
+	strcpy(ifr.ifr_name, dev->name);
+
+	err = copy_to_user(arg, &ifr, sizeof(struct ifreq));
+	return (err)?-EFAULT:0;
+}
+
+/*
+ *	Perform a SIOCGIFCONF call. This structure will change
+ *	size eventually, and there is nothing I can do about it.
+ *	Thus we will need a 'compatibility mode'.
+ */
+
+static int dev_ifconf(char *arg)
+{
+	struct ifconf ifc;
+	struct device *dev;
+	char *pos;
+	int len;
+	int total;
+	int i;
+
+	/*
+	 *	Fetch the caller's info block. 
+	 */
+	
+	if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
+		return -EFAULT;
+
+	pos = ifc.ifc_buf;
+	len = ifc.ifc_len;
+
+	/*
+	 *	Loop over the interfaces, and write an info block for each. 
+	 */
+
+	total = 0;
+	for (dev = dev_base; dev != NULL; dev = dev->next) {
+		for (i=0; i<NPROTO; i++) {
+			if (gifconf_list[i]) {
+				int done;
+				if (pos==NULL) {
+					done = gifconf_list[i](dev, NULL, 0);
+				} else {
+					done = gifconf_list[i](dev, pos+total, len-total);
+				}
+				if (done<0)
+					return -EFAULT;
+				total += done;
+			}
+		}
+  	}
+
+	/*
+	 *	All done.  Write the updated control block back to the caller. 
+	 */
+	ifc.ifc_len = total;
+
+	if (copy_to_user(arg, &ifc, sizeof(struct ifconf)))
+		return -EFAULT; 
+
+	/* 
+	 * 	Both BSD and Solaris return 0 here, so we do too.
+	 */
+	return 0;
+}
+
+/*
+ *	This is invoked by the /proc filesystem handler to display a device
+ *	in detail.
+ */
+
+#ifdef CONFIG_PROC_FS
+static int sprintf_stats(char *buffer, struct device *dev)
+{
+	struct net_device_stats *stats = (dev->get_stats ? dev->get_stats(dev): NULL);
+	int size;
+	
+	if (stats)
+		size = sprintf(buffer, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu %8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
+ 		   dev->name,
+		   stats->rx_bytes,
+		   stats->rx_packets, stats->rx_errors,
+		   stats->rx_dropped + stats->rx_missed_errors,
+		   stats->rx_fifo_errors,
+		   stats->rx_length_errors + stats->rx_over_errors
+		   + stats->rx_crc_errors + stats->rx_frame_errors,
+		   stats->rx_compressed, stats->multicast,
+		   stats->tx_bytes,
+		   stats->tx_packets, stats->tx_errors, stats->tx_dropped,
+		   stats->tx_fifo_errors, stats->collisions,
+		   stats->tx_carrier_errors + stats->tx_aborted_errors
+		   + stats->tx_window_errors + stats->tx_heartbeat_errors,
+		   stats->tx_compressed);
+	else
+		size = sprintf(buffer, "%6s: No statistics available.\n", dev->name);
+
+	return size;
+}
+
+/*
+ *	Called from the PROCfs module. This now uses the new arbitrary sized /proc/net interface
+ *	to create /proc/net/dev
+ */
+ 
+int dev_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
+{
+	int len=0;
+	off_t begin=0;
+	off_t pos=0;
+	int size;
+	
+	struct device *dev;
+
+
+	size = sprintf(buffer, 
+		"Inter-|   Receive                                                |  Transmit\n"
+		" face |bytes    packets errs drop fifo frame compressed multicast|bytes    packets errs drop fifo colls carrier compressed\n");
+	
+	pos+=size;
+	len+=size;
+	
+
+	for (dev = dev_base; dev != NULL; dev = dev->next) 
+	{
+		size = sprintf_stats(buffer+len, dev);
+		len+=size;
+		pos=begin+len;
+				
+		if(pos<offset)
+		{
+			len=0;
+			begin=pos;
+		}
+		if(pos>offset+length)
+			break;
+	}
+	
+	*start=buffer+(offset-begin);	/* Start of wanted data */
+	len-=(offset-begin);		/* Start slop */
+	if(len>length)
+		len=length;		/* Ending slop */
+	return len;
+}
+
+static int dev_proc_stats(char *buffer, char **start, off_t offset,
+			  int length, int *eof, void *data)
+{
+	int len;
+
+	len = sprintf(buffer, "%08x %08x %08x %08x %08x\n",
+		      atomic_read(&netdev_rx_dropped),
+#ifdef CONFIG_NET_HW_FLOWCONTROL
+		      netdev_throttle_events,
+#else
+		      0,
+#endif
+#ifdef CONFIG_NET_FASTROUTE
+		      dev_fastroute_stat.hits,
+		      dev_fastroute_stat.succeed,
+		      dev_fastroute_stat.deferred
+#else
+		      0, 0, 0
+#endif
+		      );
+
+	len -= offset;
+
+	if (len > length)
+		len = length;
+	if(len < 0)
+		len = 0;
+
+	*start = buffer + offset;
+	*eof = 1;
+
+	return len;
+}
+
+#endif	/* CONFIG_PROC_FS */
+
+
+#ifdef CONFIG_NET_RADIO
+#ifdef CONFIG_PROC_FS
+
+/*
+ * Print one entry of /proc/net/wireless
+ * This is a clone of /proc/net/dev (just above)
+ */
+static int sprintf_wireless_stats(char *buffer, struct device *dev)
+{
+	/* Get stats from the driver */
+	struct iw_statistics *stats = (dev->get_wireless_stats ?
+				       dev->get_wireless_stats(dev) :
+				       (struct iw_statistics *) NULL);
+	int size;
+
+	if(stats != (struct iw_statistics *) NULL)
+		size = sprintf(buffer,
+			       "%6s: %02x  %3d%c %3d%c  %3d%c %5d %5d %5d\n",
+			       dev->name,
+			       stats->status,
+			       stats->qual.qual,
+			       stats->qual.updated & 1 ? '.' : ' ',
+			       stats->qual.level,
+			       stats->qual.updated & 2 ? '.' : ' ',
+			       stats->qual.noise,
+			       stats->qual.updated & 3 ? '.' : ' ',
+			       stats->discard.nwid,
+			       stats->discard.code,
+			       stats->discard.misc);
+	else
+		size = 0;
+
+	return size;
+}
+
+/*
+ * Print info for /proc/net/wireless (print all entries)
+ * This is a clone of /proc/net/dev (just above)
+ */
+int dev_get_wireless_info(char * buffer, char **start, off_t offset,
+			  int length, int dummy)
+{
+	int		len = 0;
+	off_t		begin = 0;
+	off_t		pos = 0;
+	int		size;
+	
+	struct device *	dev;
+
+	size = sprintf(buffer,
+		       "Inter-|sta|  Quality       |  Discarded packets\n"
+		       " face |tus|link level noise| nwid crypt  misc\n");
+	
+	pos+=size;
+	len+=size;
+
+	for(dev = dev_base; dev != NULL; dev = dev->next) 
+	{
+		size = sprintf_wireless_stats(buffer+len, dev);
+		len+=size;
+		pos=begin+len;
+
+		if(pos < offset)
+		{
+			len=0;
+			begin=pos;
+		}
+		if(pos > offset + length)
+			break;
+	}
+
+	*start = buffer + (offset - begin);	/* Start of wanted data */
+	len -= (offset - begin);		/* Start slop */
+	if(len > length)
+		len = length;		/* Ending slop */
+
+	return len;
+}
+#endif	/* CONFIG_PROC_FS */
+#endif	/* CONFIG_NET_RADIO */
+
+void dev_set_promiscuity(struct device *dev, int inc)
+{
+	unsigned short old_flags = dev->flags;
+
+	dev->flags |= IFF_PROMISC;
+	if ((dev->promiscuity += inc) == 0)
+		dev->flags &= ~IFF_PROMISC;
+	if (dev->flags^old_flags) {
+#ifdef CONFIG_NET_FASTROUTE
+		if (dev->flags&IFF_PROMISC) {
+			netdev_fastroute_obstacles++;
+			dev_clear_fastroute(dev);
+		} else
+			netdev_fastroute_obstacles--;
+#endif
+		dev_mc_upload(dev);
+		printk(KERN_INFO "device %s %s promiscuous mode\n",
+		       dev->name, (dev->flags&IFF_PROMISC) ? "entered" : "left");
+	}
+}
+
+void dev_set_allmulti(struct device *dev, int inc)
+{
+	unsigned short old_flags = dev->flags;
+
+	dev->flags |= IFF_ALLMULTI;
+	if ((dev->allmulti += inc) == 0)
+		dev->flags &= ~IFF_ALLMULTI;
+	if (dev->flags^old_flags)
+		dev_mc_upload(dev);
+}
+
+int dev_change_flags(struct device *dev, unsigned flags)
+{
+	int ret;
+	int old_flags = dev->flags;
+
+	/*
+	 *	Set the flags on our device.
+	 */
+
+	dev->flags = (flags & (IFF_DEBUG|IFF_NOTRAILERS|IFF_RUNNING|IFF_NOARP|
+			       IFF_SLAVE|IFF_MASTER|IFF_DYNAMIC|
+			       IFF_MULTICAST|IFF_PORTSEL|IFF_AUTOMEDIA)) |
+				       (dev->flags & (IFF_UP|IFF_VOLATILE|IFF_PROMISC|IFF_ALLMULTI));
+
+	/*
+	 *	Load in the correct multicast list now the flags have changed.
+	 */				
+
+	dev_mc_upload(dev);
+
+	/*
+	 *	Have we downed the interface. We handle IFF_UP ourselves
+	 *	according to user attempts to set it, rather than blindly
+	 *	setting it.
+	 */
+
+	ret = 0;
+	if ((old_flags^flags)&IFF_UP)	/* Bit is different  ? */
+	{
+		ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
+
+		if (ret == 0) 
+			dev_mc_upload(dev);
+	}
+
+	if (dev->flags&IFF_UP &&
+	    ((old_flags^dev->flags)&~(IFF_UP|IFF_RUNNING|IFF_PROMISC|IFF_ALLMULTI|IFF_VOLATILE)))
+		notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
+
+	if ((flags^dev->gflags)&IFF_PROMISC) {
+		int inc = (flags&IFF_PROMISC) ? +1 : -1;
+		dev->gflags ^= IFF_PROMISC;
+		dev_set_promiscuity(dev, inc);
+	}
+
+	/* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
+	   is important. Some (broken) drivers set IFF_PROMISC, when
+	   IFF_ALLMULTI is requested not asking us and not reporting.
+	 */
+	if ((flags^dev->gflags)&IFF_ALLMULTI) {
+		int inc = (flags&IFF_ALLMULTI) ? +1 : -1;
+		dev->gflags ^= IFF_ALLMULTI;
+		dev_set_allmulti(dev, inc);
+	}
+
+	return ret;
+}
+
+/*
+ *	Perform the SIOCxIFxxx calls. 
+ */
+ 
+static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
+{
+	struct device *dev;
+	int err;
+
+	if ((dev = dev_get(ifr->ifr_name)) == NULL)
+		return -ENODEV;
+
+	switch(cmd) 
+	{
+		case SIOCGIFFLAGS:	/* Get interface flags */
+			ifr->ifr_flags = (dev->flags&~(IFF_PROMISC|IFF_ALLMULTI))
+				|(dev->gflags&(IFF_PROMISC|IFF_ALLMULTI));
+			return 0;
+
+		case SIOCSIFFLAGS:	/* Set interface flags */
+			return dev_change_flags(dev, ifr->ifr_flags);
+		
+		case SIOCGIFMETRIC:	/* Get the metric on the interface (currently unused) */
+			ifr->ifr_metric = 0;
+			return 0;
+			
+		case SIOCSIFMETRIC:	/* Set the metric on the interface (currently unused) */
+			return -EOPNOTSUPP;
+	
+		case SIOCGIFMTU:	/* Get the MTU of a device */
+			ifr->ifr_mtu = dev->mtu;
+			return 0;
+	
+		case SIOCSIFMTU:	/* Set the MTU of a device */
+			if (ifr->ifr_mtu == dev->mtu)
+				return 0;
+
+			/*
+			 *	MTU must be positive.
+			 */
+			 
+			if (ifr->ifr_mtu<=0)
+				return -EINVAL;
+
+			if (dev->change_mtu)
+				err = dev->change_mtu(dev, ifr->ifr_mtu);
+			else {
+				dev->mtu = ifr->ifr_mtu;
+				err = 0;
+			}
+			if (!err && dev->flags&IFF_UP)
+				notifier_call_chain(&netdev_chain, NETDEV_CHANGEMTU, dev);
+			return err;
+
+		case SIOCGIFHWADDR:
+			memcpy(ifr->ifr_hwaddr.sa_data,dev->dev_addr, MAX_ADDR_LEN);
+			ifr->ifr_hwaddr.sa_family=dev->type;
+			return 0;
+				
+		case SIOCSIFHWADDR:
+			if(dev->set_mac_address==NULL)
+				return -EOPNOTSUPP;
+			if(ifr->ifr_hwaddr.sa_family!=dev->type)
+				return -EINVAL;
+			err=dev->set_mac_address(dev,&ifr->ifr_hwaddr);
+			if (!err)
+				notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev);
+			return err;
+			
+		case SIOCSIFHWBROADCAST:
+			if(ifr->ifr_hwaddr.sa_family!=dev->type)
+				return -EINVAL;
+			memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, MAX_ADDR_LEN);
+			notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev);
+			return 0;
+
+		case SIOCGIFMAP:
+			ifr->ifr_map.mem_start=dev->mem_start;
+			ifr->ifr_map.mem_end=dev->mem_end;
+			ifr->ifr_map.base_addr=dev->base_addr;
+			ifr->ifr_map.irq=dev->irq;
+			ifr->ifr_map.dma=dev->dma;
+			ifr->ifr_map.port=dev->if_port;
+			return 0;
+			
+		case SIOCSIFMAP:
+			if (dev->set_config)
+				return dev->set_config(dev,&ifr->ifr_map);
+			return -EOPNOTSUPP;
+			
+		case SIOCADDMULTI:
+			if(dev->set_multicast_list==NULL ||
+			   ifr->ifr_hwaddr.sa_family!=AF_UNSPEC)
+				return -EINVAL;
+			dev_mc_add(dev,ifr->ifr_hwaddr.sa_data, dev->addr_len, 1);
+			return 0;
+
+		case SIOCDELMULTI:
+			if(dev->set_multicast_list==NULL ||
+			   ifr->ifr_hwaddr.sa_family!=AF_UNSPEC)
+				return -EINVAL;
+			dev_mc_delete(dev,ifr->ifr_hwaddr.sa_data,dev->addr_len, 1);
+			return 0;
+
+		case SIOCGIFINDEX:
+			ifr->ifr_ifindex = dev->ifindex;
+			return 0;
+
+		case SIOCGIFTXQLEN:
+			ifr->ifr_qlen = dev->tx_queue_len;
+			return 0;
+
+		case SIOCSIFTXQLEN:
+			if(ifr->ifr_qlen<0)
+				return -EINVAL;
+			dev->tx_queue_len = ifr->ifr_qlen;
+			return 0;
+
+		case SIOCSIFNAME:
+			if (dev->flags&IFF_UP)
+				return -EBUSY;
+			if (dev_get(ifr->ifr_newname))
+				return -EEXIST;
+			memcpy(dev->name, ifr->ifr_newname, IFNAMSIZ);
+			dev->name[IFNAMSIZ-1] = 0;
+			notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
+			return 0;
+
+		/*
+		 *	Unknown or private ioctl
+		 */
+
+		default:
+			if(cmd >= SIOCDEVPRIVATE &&
+			   cmd <= SIOCDEVPRIVATE + 15) {
+				if (dev->do_ioctl)
+					return dev->do_ioctl(dev, ifr, cmd);
+				return -EOPNOTSUPP;
+			}
+
+#ifdef CONFIG_NET_RADIO
+			if(cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
+				if (dev->do_ioctl)
+					return dev->do_ioctl(dev, ifr, cmd);
+				return -EOPNOTSUPP;
+			}
+#endif	/* CONFIG_NET_RADIO */
+
+	}
+	return -EINVAL;
+}
+
+
+/*
+ *	This function handles all "interface"-type I/O control requests. The actual
+ *	'doing' part of this is dev_ifsioc above.
+ */
+
+int dev_ioctl(unsigned int cmd, void *arg)
+{
+	struct ifreq ifr;
+	int ret;
+	char *colon;
+
+	/* One special case: SIOCGIFCONF takes ifconf argument
+	   and requires shared lock, because it sleeps writing
+	   to user space.
+	 */
+	   
+	if (cmd == SIOCGIFCONF) {
+		rtnl_shlock();
+		ret = dev_ifconf((char *) arg);
+		rtnl_shunlock();
+		return ret;
+	}
+	if (cmd == SIOCGIFNAME) {
+		return dev_ifname((struct ifreq *)arg);
+	}
+
+	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
+		return -EFAULT;
+
+	ifr.ifr_name[IFNAMSIZ-1] = 0;
+
+	colon = strchr(ifr.ifr_name, ':');
+	if (colon)
+		*colon = 0;
+
+	/*
+	 *	See which interface the caller is talking about. 
+	 */
+	 
+	switch(cmd) 
+	{
+		/*
+		 *	These ioctl calls:
+		 *	- can be done by all.
+		 *	- atomic and do not require locking.
+		 *	- return a value
+		 */
+		 
+		case SIOCGIFFLAGS:
+		case SIOCGIFMETRIC:
+		case SIOCGIFMTU:
+		case SIOCGIFHWADDR:
+		case SIOCGIFSLAVE:
+		case SIOCGIFMAP:
+		case SIOCGIFINDEX:
+		case SIOCGIFTXQLEN:
+			dev_load(ifr.ifr_name);
+			ret = dev_ifsioc(&ifr, cmd);
+			if (!ret) {
+				if (colon)
+					*colon = ':';
+				if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
+					return -EFAULT;
+			}
+			return ret;
+
+		/*
+		 *	These ioctl calls:
+		 *	- require superuser power.
+		 *	- require strict serialization.
+		 *	- do not return a value
+		 */
+		 
+		case SIOCSIFFLAGS:
+		case SIOCSIFMETRIC:
+		case SIOCSIFMTU:
+		case SIOCSIFMAP:
+		case SIOCSIFHWADDR:
+		case SIOCSIFSLAVE:
+		case SIOCADDMULTI:
+		case SIOCDELMULTI:
+		case SIOCSIFHWBROADCAST:
+		case SIOCSIFTXQLEN:
+		case SIOCSIFNAME:
+			if (!capable(CAP_NET_ADMIN))
+				return -EPERM;
+			dev_load(ifr.ifr_name);
+			rtnl_lock();
+			ret = dev_ifsioc(&ifr, cmd);
+			rtnl_unlock();
+			return ret;
+	
+		case SIOCGIFMEM:
+			/* Get the per device memory space. We can add this but currently
+			   do not support it */
+		case SIOCSIFMEM:
+			/* Set the per device memory buffer space. Not applicable in our case */
+		case SIOCSIFLINK:
+			return -EINVAL;
+
+		/*
+		 *	Unknown or private ioctl.
+		 */	
+		 
+		default:
+			if (cmd >= SIOCDEVPRIVATE &&
+			    cmd <= SIOCDEVPRIVATE + 15) {
+				dev_load(ifr.ifr_name);
+				rtnl_lock();
+				ret = dev_ifsioc(&ifr, cmd);
+				rtnl_unlock();
+				if (!ret && copy_to_user(arg, &ifr, sizeof(struct ifreq)))
+					return -EFAULT;
+				return ret;
+			}
+#ifdef CONFIG_NET_RADIO
+			if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
+				dev_load(ifr.ifr_name);
+				if (IW_IS_SET(cmd)) {
+					if (!suser())
+						return -EPERM;
+					rtnl_lock();
+				}
+				ret = dev_ifsioc(&ifr, cmd);
+				if (IW_IS_SET(cmd))
+					rtnl_unlock();
+				if (!ret && IW_IS_GET(cmd) &&
+				    copy_to_user(arg, &ifr, sizeof(struct ifreq)))
+					return -EFAULT;
+				return ret;
+			}
+#endif	/* CONFIG_NET_RADIO */
+			return -EINVAL;
+	}
+}
+
+int dev_new_index(void)
+{
+	static int ifindex;
+	for (;;) {
+		if (++ifindex <= 0)
+			ifindex=1;
+		if (dev_get_by_index(ifindex) == NULL)
+			return ifindex;
+	}
+}
+
+static int dev_boot_phase = 1;
+
+
+int register_netdevice(struct device *dev)
+{
+	struct device *d, **dp;
+
+	if (dev_boot_phase) {
+		/* This is NOT bug, but I am not sure, that all the
+		   devices, initialized before netdev module is started
+		   are sane. 
+
+		   Now they are chained to device boot list
+		   and probed later. If a module is initialized
+		   before netdev, but assumes that dev->init
+		   is really called by register_netdev(), it will fail.
+
+		   So that this message should be printed for a while.
+		 */
+		printk(KERN_INFO "early initialization of device %s is deferred\n", dev->name);
+
+		/* Check for existence, and append to tail of chain */
+		for (dp=&dev_base; (d=*dp) != NULL; dp=&d->next) {
+			if (d == dev || strcmp(d->name, dev->name) == 0)
+				return -EEXIST;
+		}
+		dev->next = NULL;
+		*dp = dev;
+		return 0;
+	}
+
+	dev->iflink = -1;
+
+	/* Init, if this function is available */
+	if (dev->init && dev->init(dev) != 0)
+		return -EIO;
+
+	/* Check for existence, and append to tail of chain */
+	for (dp=&dev_base; (d=*dp) != NULL; dp=&d->next) {
+		if (d == dev || strcmp(d->name, dev->name) == 0)
+			return -EEXIST;
+	}
+	dev->next = NULL;
+	dev_init_scheduler(dev);
+	dev->ifindex = dev_new_index();
+	if (dev->iflink == -1)
+		dev->iflink = dev->ifindex;
+	*dp = dev;
+
+	/* Notify protocols, that a new device appeared. */
+	notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
+
+	return 0;
+}
+
+int unregister_netdevice(struct device *dev)
+{
+	struct device *d, **dp;
+
+	if (dev_boot_phase == 0) {
+		/* If device is running, close it.
+		   It is very bad idea, really we should
+		   complain loudly here, but random hackery
+		   in linux/drivers/net likes it.
+		 */
+		if (dev->flags & IFF_UP)
+			dev_close(dev);
+
+#ifdef CONFIG_NET_FASTROUTE
+		dev_clear_fastroute(dev);
+#endif
+
+		/* Shutdown queueing discipline. */
+		dev_shutdown(dev);
+
+		/* Notify protocols, that we are about to destroy
+		   this device. They should clean all the things.
+		 */
+		notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
+
+		/*
+		 *	Flush the multicast chain
+		 */
+		dev_mc_discard(dev);
+
+		/* To avoid pointers looking to nowhere,
+		   we wait for end of critical section */
+		dev_lock_wait();
+	}
+
+	/* And unlink it from device chain. */
+	for (dp = &dev_base; (d=*dp) != NULL; dp=&d->next) {
+		if (d == dev) {
+			*dp = d->next;
+			synchronize_bh();
+			d->next = NULL;
+
+			if (dev->destructor)
+				dev->destructor(dev);
+			return 0;
+		}
+	}
+	return -ENODEV;
+}
+
+
+/*
+ *	Initialize the DEV module. At boot time this walks the device list and
+ *	unhooks any devices that fail to initialise (normally hardware not 
+ *	present) and leaves us with a valid list of present and active devices.
+ *
+ */
+extern int lance_init(void);
+extern int bpq_init(void);
+extern int scc_init(void);
+extern void sdla_setup(void);
+extern void dlci_setup(void);
+extern int dmascc_init(void);
+extern int sm_init(void);
+
+extern int baycom_ser_fdx_init(void);
+extern int baycom_ser_hdx_init(void);
+extern int baycom_par_init(void);
+
+extern int lapbeth_init(void);
+extern void arcnet_init(void);
+extern void ip_auto_config(void);
+#ifdef CONFIG_8xx
+extern int cpm_enet_init(void);
+#endif /* CONFIG_8xx */
+
+#ifdef CONFIG_PROC_FS
+static struct proc_dir_entry proc_net_dev = {
+	PROC_NET_DEV, 3, "dev",
+	S_IFREG | S_IRUGO, 1, 0, 0,
+	0, &proc_net_inode_operations,
+	dev_get_info
+};
+#endif
+
+#ifdef CONFIG_NET_RADIO
+#ifdef CONFIG_PROC_FS
+static struct proc_dir_entry proc_net_wireless = {
+	PROC_NET_WIRELESS, 8, "wireless",
+	S_IFREG | S_IRUGO, 1, 0, 0,
+	0, &proc_net_inode_operations,
+	dev_get_wireless_info
+};
+#endif	/* CONFIG_PROC_FS */
+#endif	/* CONFIG_NET_RADIO */
+
+__initfunc(int net_dev_init(void))
+{
+	struct device *dev, **dp;
+
+#ifdef CONFIG_NET_SCHED
+	pktsched_init();
+#endif
+
+	/*
+	 *	Initialise the packet receive queue.
+	 */
+	 
+	skb_queue_head_init(&backlog);
+	
+	/*
+	 *	The bridge has to be up before the devices
+	 */
+
+#ifdef CONFIG_BRIDGE	 
+	br_init();
+#endif	
+	
+	/*
+	 * This is Very Ugly(tm).
+	 *
+	 * Some devices want to be initialized early..
+	 */
+
+#if defined(CONFIG_SCC)
+	scc_init();
+#endif
+#if defined(CONFIG_DMASCC)
+	dmascc_init();
+#endif	
+#if defined(CONFIG_BPQETHER)
+	bpq_init();
+#endif
+#if defined(CONFIG_DLCI)
+	dlci_setup();
+#endif
+#if defined(CONFIG_SDLA)
+	sdla_setup();
+#endif
+#if defined(CONFIG_BAYCOM_PAR)
+	baycom_par_init();
+#endif
+#if defined(CONFIG_BAYCOM_SER_FDX)
+	baycom_ser_fdx_init();
+#endif
+#if defined(CONFIG_BAYCOM_SER_HDX)
+	baycom_ser_hdx_init();
+#endif
+#if defined(CONFIG_SOUNDMODEM)
+	sm_init();
+#endif
+#if defined(CONFIG_LAPBETHER)
+	lapbeth_init();
+#endif
+#if defined(CONFIG_PLIP)
+	plip_init();
+#endif
+#if defined(CONFIG_ARCNET)
+	arcnet_init();
+#endif
+#if defined(CONFIG_8xx)
+        cpm_enet_init();
+#endif
+	/*
+	 *	SLHC if present needs attaching so other people see it
+	 *	even if not opened.
+	 */
+	 
+#ifdef CONFIG_INET	 
+#if (defined(CONFIG_SLIP) && defined(CONFIG_SLIP_COMPRESSED)) \
+	 || defined(CONFIG_PPP) \
+    || (defined(CONFIG_ISDN) && defined(CONFIG_ISDN_PPP))
+	slhc_install();
+#endif	
+#endif
+
+#ifdef CONFIG_NET_PROFILE
+	net_profile_init();
+	NET_PROFILE_REGISTER(dev_queue_xmit);
+	NET_PROFILE_REGISTER(net_bh);
+#if 0
+	NET_PROFILE_REGISTER(net_bh_skb);
+#endif
+#endif
+	/*
+	 *	Add the devices.
+	 *	If the call to dev->init fails, the dev is removed
+	 *	from the chain disconnecting the device until the
+	 *	next reboot.
+	 */
+
+	dp = &dev_base;
+	while ((dev = *dp) != NULL)
+	{
+		dev->iflink = -1;
+		if (dev->init && dev->init(dev)) 
+		{
+			/*
+			 *	It failed to come up. Unhook it.
+			 */
+			*dp = dev->next;
+			synchronize_bh();
+		} 
+		else
+		{
+			dp = &dev->next;
+			dev->ifindex = dev_new_index();
+			if (dev->iflink == -1)
+				dev->iflink = dev->ifindex;
+			dev_init_scheduler(dev);
+		}
+	}
+
+#ifdef CONFIG_PROC_FS
+	proc_net_register(&proc_net_dev);
+	{
+		struct proc_dir_entry *ent = create_proc_entry("net/dev_stat", 0, 0);
+		ent->read_proc = dev_proc_stats;
+	}
+#endif
+
+#ifdef CONFIG_NET_RADIO
+#ifdef CONFIG_PROC_FS
+	proc_net_register(&proc_net_wireless);
+#endif	/* CONFIG_PROC_FS */
+#endif	/* CONFIG_NET_RADIO */
+
+	init_bh(NET_BH, net_bh);
+
+	dev_boot_phase = 0;
+
+	dev_mcast_init();
+
+#ifdef CONFIG_IP_PNP
+	ip_auto_config();
+#endif
+
+	return 0;
+}
diff --git a/pfinet/linux-src/net/core/dev_mcast.c b/pfinet/linux-src/net/core/dev_mcast.c
new file mode 100644
index 00000000..bce3f4a4
--- /dev/null
+++ b/pfinet/linux-src/net/core/dev_mcast.c
@@ -0,0 +1,252 @@
+/*
+ *	Linux NET3:	Multicast List maintenance. 
+ *
+ *	Authors:
+ *		Tim Kordas <tjk@nostromo.eeap.cwru.edu> 
+ *		Richard Underwood <richard@wuzz.demon.co.uk>
+ *
+ *	Stir fried together from the IP multicast and CAP patches above
+ *		Alan Cox <Alan.Cox@linux.org>	
+ *
+ *	Fixes:
+ *		Alan Cox	:	Update the device on a real delete
+ *					rather than any time but...
+ *		Alan Cox	:	IFF_ALLMULTI support.
+ *		Alan Cox	: 	New format set_multicast_list() calls.
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h> 
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/arp.h>
+
+
+/*
+ *	Device multicast list maintenance. 
+ *
+ *	This is used both by IP and by the user level maintenance functions. 
+ *	Unlike BSD we maintain a usage count on a given multicast address so 
+ *	that a casual user application can add/delete multicasts used by 
+ *	protocols without doing damage to the protocols when it deletes the
+ *	entries. It also helps IP as it tracks overlapping maps.
+ *
+ *	Device mc lists are changed by bh at least if IPv6 is enabled,
+ *	so that it must be bh protected.
+ */
+
+/*
+ *	Update the multicast list into the physical NIC controller.
+ */
+ 
+void dev_mc_upload(struct device *dev)
+{
+	/* Don't do anything till we up the interface
+	   [dev_open will call this function so the list will
+	    stay sane] */
+	    
+	if(!(dev->flags&IFF_UP))
+		return;
+
+	/*
+	 *	Devices with no set multicast don't get set 
+	 */
+
+	if(dev->set_multicast_list==NULL)
+		return;
+
+	start_bh_atomic();
+	dev->set_multicast_list(dev);
+	end_bh_atomic();
+}
+  
+/*
+ *	Delete a device level multicast
+ */
+ 
+int dev_mc_delete(struct device *dev, void *addr, int alen, int glbl)
+{
+	int err = 0;
+	struct dev_mc_list *dmi, **dmip;
+
+	start_bh_atomic();
+	for (dmip=&dev->mc_list; (dmi=*dmip)!=NULL; dmip=&dmi->next) {
+		/*
+		 *	Find the entry we want to delete. The device could
+		 *	have variable length entries so check these too.
+		 */
+		if (memcmp(dmi->dmi_addr,addr,dmi->dmi_addrlen)==0 && alen==dmi->dmi_addrlen) {
+			if (glbl) {
+				int old_glbl = dmi->dmi_gusers;
+				dmi->dmi_gusers = 0;
+				if (old_glbl == 0)
+					break;
+			}
+			if(--dmi->dmi_users)
+				goto done;
+
+			/*
+			 *	Last user. So delete the entry.
+			 */
+			*dmip = dmi->next;
+			dev->mc_count--;
+			kfree_s(dmi,sizeof(*dmi));
+			/*
+			 *	We have altered the list, so the card
+			 *	loaded filter is now wrong. Fix it
+			 */
+			end_bh_atomic();
+			dev_mc_upload(dev);
+			return 0;
+		}
+	}
+	err = -ENOENT;
+done:
+	end_bh_atomic();
+	return err;
+}
+
+/*
+ *	Add a device level multicast
+ */
+ 
+int dev_mc_add(struct device *dev, void *addr, int alen, int glbl)
+{
+	int err = 0;
+	struct dev_mc_list *dmi, *dmi1;
+
+	dmi1 = (struct dev_mc_list *)kmalloc(sizeof(*dmi), gfp_any());
+
+	start_bh_atomic();
+	for(dmi=dev->mc_list; dmi!=NULL; dmi=dmi->next) {
+		if (memcmp(dmi->dmi_addr,addr,dmi->dmi_addrlen)==0 && dmi->dmi_addrlen==alen) {
+			if (glbl) {
+				int old_glbl = dmi->dmi_gusers;
+				dmi->dmi_gusers = 1;
+				if (old_glbl)
+					goto done;
+			}
+			dmi->dmi_users++;
+			goto done;
+		}
+	}
+
+	if ((dmi=dmi1)==NULL)
+		return -ENOMEM;
+	memcpy(dmi->dmi_addr, addr, alen);
+	dmi->dmi_addrlen=alen;
+	dmi->next=dev->mc_list;
+	dmi->dmi_users=1;
+	dmi->dmi_gusers=glbl ? 1 : 0;
+	dev->mc_list=dmi;
+	dev->mc_count++;
+	end_bh_atomic();
+	dev_mc_upload(dev);
+	return 0;
+
+done:
+	end_bh_atomic();
+	if (dmi1)
+		kfree(dmi1);
+	return err;
+}
+
+/*
+ *	Discard multicast list when a device is downed
+ */
+
+void dev_mc_discard(struct device *dev)
+{
+	start_bh_atomic();
+	while (dev->mc_list!=NULL) {
+		struct dev_mc_list *tmp=dev->mc_list;
+		dev->mc_list=tmp->next;
+		if (tmp->dmi_users > tmp->dmi_gusers)
+			printk("dev_mc_discard: multicast leakage! dmi_users=%d\n", tmp->dmi_users);
+		kfree_s(tmp,sizeof(*tmp));
+	}
+	dev->mc_count=0;
+	end_bh_atomic();
+}
+
+#ifdef CONFIG_PROC_FS
+static int dev_mc_read_proc(char *buffer, char **start, off_t offset,
+			    int length, int *eof, void *data)
+{
+	off_t pos=0, begin=0;
+	struct dev_mc_list *m;
+	int len=0;
+	struct device *dev;
+
+	start_bh_atomic();
+
+	for (dev = dev_base; dev; dev = dev->next) {
+		for (m = dev->mc_list; m; m = m->next) {
+			int i;
+
+			len += sprintf(buffer+len,"%-4d %-15s %-5d %-5d ", dev->ifindex, dev->name,
+				       m->dmi_users, m->dmi_gusers);
+
+			for (i=0; i<m->dmi_addrlen; i++)
+				len += sprintf(buffer+len, "%02x", m->dmi_addr[i]);
+
+			len+=sprintf(buffer+len, "\n");
+
+			pos=begin+len;
+			if (pos < offset) {
+				len=0;
+				begin=pos;
+			}
+			if (pos > offset+length)
+				goto done;
+		}
+	}
+	*eof = 1;
+
+done:
+	end_bh_atomic();
+	*start=buffer+(offset-begin);
+	len-=(offset-begin);
+	if(len>length)
+		len=length;
+	if(len<0)
+		len=0;
+	return len;
+}
+#endif
+
+__initfunc(void dev_mcast_init(void))
+{
+#ifdef CONFIG_PROC_FS
+	struct proc_dir_entry *ent;
+
+	ent = create_proc_entry("net/dev_mcast", 0, 0);
+	ent->read_proc = dev_mc_read_proc;
+#endif
+}
+
diff --git a/pfinet/linux-src/net/core/dst.c b/pfinet/linux-src/net/core/dst.c
new file mode 100644
index 00000000..9007dde6
--- /dev/null
+++ b/pfinet/linux-src/net/core/dst.c
@@ -0,0 +1,145 @@
+/*
+ * net/dst.c	Protocol independent destination cache.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ */
+
+#include <asm/segment.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+
+#include <net/dst.h>
+
+struct dst_entry * dst_garbage_list;
+atomic_t	dst_total = ATOMIC_INIT(0);
+
+static unsigned long dst_gc_timer_expires;
+static unsigned long dst_gc_timer_inc = DST_GC_MAX;
+static void dst_run_gc(unsigned long);
+
+static struct timer_list dst_gc_timer =
+	{ NULL, NULL, DST_GC_MIN, 0L, dst_run_gc };
+
+#if RT_CACHE_DEBUG >= 2
+atomic_t hh_count;
+#endif
+
+static void dst_run_gc(unsigned long dummy)
+{
+	int    delayed = 0;
+	struct dst_entry * dst, **dstp;
+
+	del_timer(&dst_gc_timer);
+	dstp = &dst_garbage_list;
+	while ((dst = *dstp) != NULL) {
+		if (atomic_read(&dst->use)) {
+			dstp = &dst->next;
+			delayed++;
+			continue;
+		}
+		*dstp = dst->next;
+		dst_destroy(dst);
+	}
+	if (!dst_garbage_list) {
+		dst_gc_timer_inc = DST_GC_MAX;
+		return;
+	}
+	if ((dst_gc_timer_expires += dst_gc_timer_inc) > DST_GC_MAX)
+		dst_gc_timer_expires = DST_GC_MAX;
+	dst_gc_timer_inc += DST_GC_INC;
+	dst_gc_timer.expires = jiffies + dst_gc_timer_expires;
+#if RT_CACHE_DEBUG >= 2
+	printk("dst_total: %d/%d %ld\n",
+	       atomic_read(&dst_total), delayed,  dst_gc_timer_expires);
+#endif
+	add_timer(&dst_gc_timer);
+}
+
+static int dst_discard(struct sk_buff *skb)
+{
+	kfree_skb(skb);
+	return 0;
+}
+
+static int dst_blackhole(struct sk_buff *skb)
+{
+	kfree_skb(skb);
+	return 0;
+}
+
+void * dst_alloc(int size, struct dst_ops * ops)
+{
+	struct dst_entry * dst;
+
+	if (ops->gc && atomic_read(&ops->entries) > ops->gc_thresh) {
+		if (ops->gc())
+			return NULL;
+	}
+	dst = kmalloc(size, GFP_ATOMIC);
+	if (!dst)
+		return NULL;
+	memset(dst, 0, size);
+	dst->ops = ops;
+	atomic_set(&dst->refcnt, 0);
+	dst->lastuse = jiffies;
+	dst->input = dst_discard;
+	dst->output = dst_blackhole;
+	atomic_inc(&dst_total);
+	atomic_inc(&ops->entries);
+	return dst;
+}
+
+void __dst_free(struct dst_entry * dst)
+{
+	start_bh_atomic();
+	/* The first case (dev==NULL) is required, when
+	   protocol module is unloaded.
+	 */
+	if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) {
+		dst->input = dst_discard;
+		dst->output = dst_blackhole;
+		dst->dev = &loopback_dev;
+	}
+	dst->obsolete = 2;
+	dst->next = dst_garbage_list;
+	dst_garbage_list = dst;
+	if (dst_gc_timer_inc > DST_GC_INC) {
+		del_timer(&dst_gc_timer);
+		dst_gc_timer_inc = DST_GC_INC;
+		dst_gc_timer_expires = DST_GC_MIN;
+		dst_gc_timer.expires = jiffies + dst_gc_timer_expires;
+		add_timer(&dst_gc_timer);
+	}
+	end_bh_atomic();
+}
+
+void dst_destroy(struct dst_entry * dst)
+{
+	struct neighbour *neigh = dst->neighbour;
+	struct hh_cache *hh = dst->hh;
+
+	dst->hh = NULL;
+	if (hh && atomic_dec_and_test(&hh->hh_refcnt))
+		kfree(hh);
+
+	if (neigh) {
+		dst->neighbour = NULL;
+		neigh_release(neigh);
+	}
+
+	atomic_dec(&dst->ops->entries);
+
+	if (dst->ops->destroy)
+		dst->ops->destroy(dst);
+	atomic_dec(&dst_total);
+	kfree(dst);
+}
diff --git a/pfinet/linux-src/net/core/filter.c b/pfinet/linux-src/net/core/filter.c
new file mode 100644
index 00000000..8e1ffb62
--- /dev/null
+++ b/pfinet/linux-src/net/core/filter.c
@@ -0,0 +1,454 @@
+/*
+ * Linux Socket Filter - Kernel level socket filtering
+ *
+ * Author:
+ *     Jay Schulist <Jay.Schulist@spacs.k12.wi.us>
+ *
+ * Based on the design of:
+ *     - The Berkeley Packet Filter
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Andi Kleen - Fix a few bad bugs and races.
+ */
+
+#include <linux/config.h>
+#if defined(CONFIG_FILTER)
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/fcntl.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/if_packet.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <linux/errno.h>
+#include <linux/timer.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <linux/filter.h>
+
+/* No hurry in this branch */
+
+static u8 *load_pointer(struct sk_buff *skb, int k)
+{
+	u8 *ptr = NULL;
+
+	if (k>=SKF_NET_OFF)
+		ptr = skb->nh.raw + k - SKF_NET_OFF;
+	else if (k>=SKF_LL_OFF)
+		ptr = skb->mac.raw + k - SKF_LL_OFF;
+
+	if (ptr<skb->head && ptr < skb->tail)
+		return ptr;
+	return NULL;
+}
+
+/*
+ * Decode and apply filter instructions to the skb->data.
+ * Return length to keep, 0 for none. skb is the data we are
+ * filtering, filter is the array of filter instructions, and
+ * len is the number of filter blocks in the array.
+ */
+ 
+int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
+{
+	unsigned char *data = skb->data;
+	/* len is UNSIGNED. Byte wide insns relies only on implicit
+	   type casts to prevent reading arbitrary memory locations.
+	 */
+	unsigned int len = skb->len;
+	struct sock_filter *fentry;	/* We walk down these */
+	u32 A = 0;	   		/* Accumulator */
+	u32 X = 0;   			/* Index Register */
+	u32 mem[BPF_MEMWORDS];		/* Scratch Memory Store */
+	int k;
+	int pc;
+
+	/*
+	 * Process array of filter instructions.
+	 */
+
+	for(pc = 0; pc < flen; pc++)
+	{
+		fentry = &filter[pc];
+			
+		switch(fentry->code)
+		{
+			case BPF_ALU|BPF_ADD|BPF_X:
+				A += X;
+				continue;
+
+			case BPF_ALU|BPF_ADD|BPF_K:
+				A += fentry->k;
+				continue;
+
+			case BPF_ALU|BPF_SUB|BPF_X:
+				A -= X;
+				continue;
+
+			case BPF_ALU|BPF_SUB|BPF_K:
+				A -= fentry->k;
+				continue;
+
+			case BPF_ALU|BPF_MUL|BPF_X:
+				A *= X;
+				continue;
+
+			case BPF_ALU|BPF_MUL|BPF_K:
+				A *= fentry->k;
+				continue;
+
+			case BPF_ALU|BPF_DIV|BPF_X:
+				if(X == 0)
+					return (0);
+				A /= X;
+				continue;
+
+			case BPF_ALU|BPF_DIV|BPF_K:
+				if(fentry->k == 0)
+					return (0);
+				A /= fentry->k;
+				continue;
+
+			case BPF_ALU|BPF_AND|BPF_X:
+				A &= X;
+				continue;
+
+			case BPF_ALU|BPF_AND|BPF_K:
+				A &= fentry->k;
+				continue;
+
+			case BPF_ALU|BPF_OR|BPF_X:
+				A |= X;
+				continue;
+
+			case BPF_ALU|BPF_OR|BPF_K:
+				A |= fentry->k;
+				continue;
+
+			case BPF_ALU|BPF_LSH|BPF_X:
+				A <<= X;
+				continue;
+
+			case BPF_ALU|BPF_LSH|BPF_K:
+				A <<= fentry->k;
+				continue;
+
+			case BPF_ALU|BPF_RSH|BPF_X:
+				A >>= X;
+				continue;
+
+			case BPF_ALU|BPF_RSH|BPF_K:
+				A >>= fentry->k;
+				continue;
+
+			case BPF_ALU|BPF_NEG:
+				A = -A;
+				continue;
+
+			case BPF_JMP|BPF_JA:
+				pc += fentry->k;
+				continue;
+
+			case BPF_JMP|BPF_JGT|BPF_K:
+				pc += (A > fentry->k) ? fentry->jt : fentry->jf;
+				continue;
+
+			case BPF_JMP|BPF_JGE|BPF_K:
+				pc += (A >= fentry->k) ? fentry->jt : fentry->jf;
+				continue;
+
+			case BPF_JMP|BPF_JEQ|BPF_K:
+				pc += (A == fentry->k) ? fentry->jt : fentry->jf;
+				continue;
+
+			case BPF_JMP|BPF_JSET|BPF_K:
+				pc += (A & fentry->k) ? fentry->jt : fentry->jf;
+				continue;
+
+			case BPF_JMP|BPF_JGT|BPF_X:
+				pc += (A > X) ? fentry->jt : fentry->jf;
+				continue;
+
+			case BPF_JMP|BPF_JGE|BPF_X:
+				pc += (A >= X) ? fentry->jt : fentry->jf;
+				continue;
+
+			case BPF_JMP|BPF_JEQ|BPF_X:
+				pc += (A == X) ? fentry->jt : fentry->jf;
+				continue;
+
+			case BPF_JMP|BPF_JSET|BPF_X:
+				pc += (A & X) ? fentry->jt : fentry->jf;
+				continue;
+
+			case BPF_LD|BPF_W|BPF_ABS:
+				k = fentry->k;
+load_w:
+				if(k+sizeof(u32) <= len) {
+					A = ntohl(*(u32*)&data[k]);
+					continue;
+				}
+				if (k<0) {
+					u8 *ptr;
+
+					if (k>=SKF_AD_OFF)
+						break;
+					if ((ptr = load_pointer(skb, k)) != NULL) {
+						A = ntohl(*(u32*)ptr);
+						continue;
+					}
+				}
+				return 0;
+
+			case BPF_LD|BPF_H|BPF_ABS:
+				k = fentry->k;
+load_h:
+				if(k + sizeof(u16) <= len) {
+					A = ntohs(*(u16*)&data[k]);
+					continue;
+				}
+				if (k<0) {
+					u8 *ptr;
+
+					if (k>=SKF_AD_OFF)
+						break;
+					if ((ptr = load_pointer(skb, k)) != NULL) {
+						A = ntohs(*(u16*)ptr);
+						continue;
+					}
+				}
+				return 0;
+
+			case BPF_LD|BPF_B|BPF_ABS:
+				k = fentry->k;
+load_b:
+				if(k < len) {
+					A = data[k];
+					continue;
+				}
+				if (k<0) {
+					u8 *ptr;
+
+					if (k>=SKF_AD_OFF)
+						break;
+					if ((ptr = load_pointer(skb, k)) != NULL) {
+						A = *ptr;
+						continue;
+					}
+				}
+
+			case BPF_LD|BPF_W|BPF_LEN:
+				A = len;
+				continue;
+
+			case BPF_LDX|BPF_W|BPF_LEN:
+				X = len;
+				continue;
+
+			case BPF_LD|BPF_W|BPF_IND:
+				k = X + fentry->k;
+				goto load_w;
+
+                       case BPF_LD|BPF_H|BPF_IND:
+				k = X + fentry->k;
+				goto load_h;
+
+                       case BPF_LD|BPF_B|BPF_IND:
+				k = X + fentry->k;
+				goto load_b;
+
+			case BPF_LDX|BPF_B|BPF_MSH:
+				k = fentry->k;
+				if(k >= len)
+					return (0);
+				X = (data[k] & 0xf) << 2;
+				continue;
+
+			case BPF_LD|BPF_IMM:
+				A = fentry->k;
+				continue;
+
+			case BPF_LDX|BPF_IMM:
+				X = fentry->k;
+				continue;
+
+			case BPF_LD|BPF_MEM:
+				A = mem[fentry->k];
+				continue;
+
+			case BPF_LDX|BPF_MEM:
+				X = mem[fentry->k];
+				continue;
+
+			case BPF_MISC|BPF_TAX:
+				X = A;
+				continue;
+
+			case BPF_MISC|BPF_TXA:
+				A = X;
+				continue;
+
+			case BPF_RET|BPF_K:
+				return ((unsigned int)fentry->k);
+
+			case BPF_RET|BPF_A:
+				return ((unsigned int)A);
+
+			case BPF_ST:
+				mem[fentry->k] = A;
+				continue;
+
+			case BPF_STX:
+				mem[fentry->k] = X;
+				continue;
+
+			default:
+				/* Invalid instruction counts as RET */
+				return (0);
+		}
+
+		/* Handle ancillary data, which are impossible
+		   (or very difficult) to get parsing packet contents.
+		 */
+		switch (k-SKF_AD_OFF) {
+		case SKF_AD_PROTOCOL:
+			A = htons(skb->protocol);
+			continue;
+		case SKF_AD_PKTTYPE:
+			A = skb->pkt_type;
+			continue;
+		case SKF_AD_IFINDEX:
+			A = skb->dev->ifindex;
+			continue;
+		default:
+			return 0;
+		}
+	}
+
+	return (0);
+}
+
+/*
+ * Check the user's filter code. If we let some ugly
+ * filter code slip through kaboom!
+ */
+
+int sk_chk_filter(struct sock_filter *filter, int flen)
+{
+	struct sock_filter *ftest;
+        int pc;
+
+       /*
+        * Check the filter code now.
+        */
+	for(pc = 0; pc < flen; pc++)
+	{
+		/*
+                 *	All jumps are forward as they are not signed
+                 */
+                 
+                ftest = &filter[pc];
+		if(BPF_CLASS(ftest->code) == BPF_JMP)
+		{
+			/*
+			 *	But they mustn't jump off the end.
+			 */
+			if(BPF_OP(ftest->code) == BPF_JA)
+			{
+				/* Note, the large ftest->k might cause
+				   loops. Compare this with conditional
+				   jumps below, where offsets are limited. --ANK (981016)
+				 */
+				if (ftest->k >= (unsigned)(flen-pc-1))
+					return (-EINVAL);
+			}
+                        else
+			{
+				/*
+				 *	For conditionals both must be safe
+				 */
+ 				if(pc + ftest->jt +1 >= flen || pc + ftest->jf +1 >= flen)
+					return (-EINVAL);
+			}
+                }
+
+                /*
+                 *	Check that memory operations use valid addresses.
+                 */
+                 
+                if (ftest->k >= BPF_MEMWORDS)
+                {
+                	/*
+                	 *	But it might not be a memory operation...
+                	 */
+			switch (ftest->code) {
+			case BPF_ST:	
+			case BPF_STX:	
+			case BPF_LD|BPF_MEM:	
+			case BPF_LDX|BPF_MEM:	
+                		return -EINVAL;
+			}
+		}
+        }
+
+	/*
+	 *	The program must end with a return. We don't care where they
+	 *	jumped within the script (its always forwards) but in the
+	 *	end they _will_ hit this.
+	 */
+	 
+        return (BPF_CLASS(filter[flen - 1].code) == BPF_RET)?0:-EINVAL;
+}
+
+/*
+ * Attach the user's filter code. We first run some sanity checks on
+ * it to make sure it does not explode on us later.
+ */
+
+int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
+{
+	struct sk_filter *fp; 
+	unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
+	int err;
+
+	/* Make sure new filter is there and in the right amounts. */
+        if (fprog->filter == NULL || fprog->len > BPF_MAXINSNS)
+                return (-EINVAL);
+
+	fp = (struct sk_filter *)sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL);
+	if(fp == NULL)
+		return (-ENOMEM);
+
+	if (copy_from_user(fp->insns, fprog->filter, fsize)) {
+		sock_kfree_s(sk, fp, fsize+sizeof(*fp)); 
+		return -EFAULT;
+	}
+
+	atomic_set(&fp->refcnt, 1);
+	fp->len = fprog->len;
+
+	if ((err = sk_chk_filter(fp->insns, fp->len))==0) {
+		struct sk_filter *old_fp = sk->filter;
+		sk->filter = fp;
+		synchronize_bh();
+		fp = old_fp;
+	}
+
+	if (fp)
+		sk_filter_release(sk, fp);
+
+	return (err);
+}
+#endif /* CONFIG_FILTER */
diff --git a/pfinet/linux-src/net/core/firewall.c b/pfinet/linux-src/net/core/firewall.c
new file mode 100644
index 00000000..fc7b1a51
--- /dev/null
+++ b/pfinet/linux-src/net/core/firewall.c
@@ -0,0 +1,160 @@
+/*
+ *	Generic loadable firewalls. At the moment only IP will actually
+ *	use these, but people can add the others as they are needed.
+ *
+ *	Authors:	Dave Bonn (for IP)
+ *	much hacked by:	Alan Cox
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/firewall.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <asm/semaphore.h>
+
+struct semaphore firewall_sem = MUTEX; 
+static int firewall_policy[NPROTO];
+static struct firewall_ops *firewall_chain[NPROTO];
+
+/*
+ *	Register a firewall
+ */
+
+int register_firewall(int pf, struct firewall_ops *fw)
+{
+	struct firewall_ops **p;
+
+	if(pf<0||pf>=NPROTO)
+		return -EINVAL;
+
+	/*
+	 *	Don't allow two people to adjust at once.
+	 */
+
+	down(&firewall_sem); 
+
+	p=&firewall_chain[pf];
+
+	while(*p)
+	{
+		if(fw->fw_priority > (*p)->fw_priority)
+			break;
+		p=&((*p)->next);
+	}
+
+	/*
+	 * We need to use a memory barrier to make sure that this
+	 * works correctly even in SMP with weakly ordered writes.
+	 *
+	 * This is atomic wrt interrupts (and generally walking the
+	 * chain), but not wrt itself (so you can't call this from
+	 * an interrupt. Not that you'd want to).
+	 */
+
+	fw->next=*p;
+	mb();
+	*p = fw;
+
+	/*
+	 *	And release the sleep lock
+	 */
+
+ 	up(&firewall_sem); 
+	return 0;
+}
+
+/*
+ *	Unregister a firewall
+ */
+
+int unregister_firewall(int pf, struct firewall_ops *fw)
+{
+	struct firewall_ops **nl;
+
+	if(pf<0||pf>=NPROTO)
+		return -EINVAL;
+
+	/*
+	 *	Don't allow two people to adjust at once.
+	 */
+
+	down(&firewall_sem); 
+
+	nl=&firewall_chain[pf];
+
+	while(*nl!=NULL)
+	{
+		if(*nl==fw)
+		{
+			struct firewall_ops *f=fw->next;
+			*nl = f;
+			up(&firewall_sem); 
+			synchronize_bh();
+			return 0;
+		}
+		nl=&((*nl)->next);
+	}
+	up(&firewall_sem);
+	return -ENOENT;
+}
+
+int call_fw_firewall(int pf, struct device *dev, void *phdr, void *arg, struct sk_buff **skb)
+{
+	struct firewall_ops *fw=firewall_chain[pf];
+
+	while(fw!=NULL)
+	{
+		int rc=fw->fw_forward(fw,pf,dev,phdr,arg,skb);
+		if(rc!=FW_SKIP)
+			return rc;
+		fw=fw->next;
+	}
+	return firewall_policy[pf];
+}
+
+/*
+ *	Actual invocation of the chains
+ */
+
+int call_in_firewall(int pf, struct device *dev, void *phdr, void *arg, struct sk_buff **skb)
+{
+	struct firewall_ops *fw=firewall_chain[pf];
+
+	while(fw!=NULL)
+	{
+		int rc=fw->fw_input(fw,pf,dev,phdr,arg,skb);
+		if(rc!=FW_SKIP)
+			return rc;
+		fw=fw->next;
+	}
+	return firewall_policy[pf];
+}
+
+int call_out_firewall(int pf, struct device *dev, void *phdr, void *arg, struct sk_buff **skb)
+{
+	struct firewall_ops *fw=firewall_chain[pf];
+
+	while(fw!=NULL)
+	{
+		int rc=fw->fw_output(fw,pf,dev,phdr,arg,skb);
+		if(rc!=FW_SKIP)
+			return rc;
+		fw=fw->next;
+	}
+	/* alan, is this right? */
+	return firewall_policy[pf];
+}
+
+EXPORT_SYMBOL(register_firewall);
+EXPORT_SYMBOL(unregister_firewall);
+EXPORT_SYMBOL(call_in_firewall);
+EXPORT_SYMBOL(call_out_firewall);
+EXPORT_SYMBOL(call_fw_firewall);
+
+__initfunc(void fwchain_init(void))
+{
+	int i;
+	for(i=0;i<NPROTO;i++)
+		firewall_policy[i]=FW_ACCEPT;
+}
diff --git a/pfinet/linux-src/net/core/iovec.c b/pfinet/linux-src/net/core/iovec.c
new file mode 100644
index 00000000..c20f8530
--- /dev/null
+++ b/pfinet/linux-src/net/core/iovec.c
@@ -0,0 +1,278 @@
+/*
+ *	iovec manipulation routines.
+ *
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ *	Fixes:
+ *		Andrew Lunn	:	Errors in iovec copying.
+ *		Pedro Roque	:	Added memcpy_fromiovecend and
+ *					csum_..._fromiovecend.
+ *		Andi Kleen	:	fixed error handling for 2.1
+ *		Alexey Kuznetsov:	2.1 optimisations
+ *		Andi Kleen	:	Fix csum*fromiovecend for IPv6.
+ */
+
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/malloc.h>
+#include <linux/net.h>
+#include <linux/in6.h>
+#include <asm/uaccess.h>
+#include <asm/byteorder.h>
+#include <net/checksum.h>
+
+/*
+ *	Verify iovec. The caller must ensure that the iovec is big enough
+ *	to hold the message iovec.
+ *
+ *	Save time not doing verify_area. copy_*_user will make this work
+ *	in any case.
+ */
+
+int verify_iovec(struct msghdr *m, struct iovec *iov, char *address, int mode)
+{
+	int size, err, ct;
+	
+	if(m->msg_namelen)
+	{
+		if(mode==VERIFY_READ)
+		{
+			err=move_addr_to_kernel(m->msg_name, m->msg_namelen, address);
+			if(err<0)
+				goto out;
+		}
+		
+		m->msg_name = address;
+	} else
+		m->msg_name = NULL;
+
+	err = -EFAULT;
+	size = m->msg_iovlen * sizeof(struct iovec);
+	if (copy_from_user(iov, m->msg_iov, size))
+		goto out;
+	m->msg_iov=iov;
+
+	for (err = 0, ct = 0; ct < m->msg_iovlen; ct++) {
+		err += iov[ct].iov_len;
+		/* Goal is not to verify user data, but to prevent returning
+		   negative value, which is interpreted as errno.
+		   Overflow is still possible, but it is harmless.
+		 */
+		if (err < 0)
+			return -EMSGSIZE;
+	}
+out:
+	return err;
+}
+
+/*
+ *	Copy kernel to iovec. Returns -EFAULT on error.
+ *
+ *	Note: this modifies the original iovec.
+ */
+ 
+int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len)
+{
+	int err = -EFAULT; 
+
+	while(len>0)
+	{
+		if(iov->iov_len)
+		{
+			int copy = min(iov->iov_len, len);
+			if (copy_to_user(iov->iov_base, kdata, copy))
+				goto out;
+			kdata+=copy;
+			len-=copy;
+			iov->iov_len-=copy;
+			iov->iov_base+=copy;
+		}
+		iov++;
+	}
+	err = 0;
+out:
+	return err; 
+}
+
+/*
+ *	In kernel copy to iovec. Returns -EFAULT on error.
+ *
+ *	Note: this modifies the original iovec.
+ */
+ 
+void memcpy_tokerneliovec(struct iovec *iov, unsigned char *kdata, int len)
+{
+	while(len>0)
+	{
+		if(iov->iov_len)
+		{
+			int copy = min(iov->iov_len, len);
+			memcpy(iov->iov_base, kdata, copy);
+			kdata+=copy;
+			len-=copy;
+			iov->iov_len-=copy;
+			iov->iov_base+=copy;
+		}
+		iov++;
+	}
+}
+
+
+/*
+ *	Copy iovec to kernel. Returns -EFAULT on error.
+ *
+ *	Note: this modifies the original iovec.
+ */
+ 
+int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len)
+{
+	int err = -EFAULT; 
+
+	while(len>0)
+	{
+		if(iov->iov_len)
+		{
+			int copy = min(len, iov->iov_len);
+			if (copy_from_user(kdata, iov->iov_base, copy))
+				goto out;
+			len-=copy;
+			kdata+=copy;
+			iov->iov_base+=copy;
+			iov->iov_len-=copy;
+		}
+		iov++;
+	}
+	err = 0;
+out:
+	return err; 
+}
+
+
+/*
+ *	For use with ip_build_xmit
+ */
+
+int memcpy_fromiovecend(unsigned char *kdata, struct iovec *iov, int offset,
+			int len)
+{
+	int err = -EFAULT;
+
+	/* Skip over the finished iovecs */
+	while(offset >= iov->iov_len)
+	{
+		offset -= iov->iov_len;
+		iov++;
+	}
+
+	while (len > 0)
+	{
+		u8 *base = iov->iov_base + offset;
+		int copy = min(len, iov->iov_len - offset);
+
+		offset = 0;
+		if (copy_from_user(kdata, base, copy))
+			goto out;
+		len   -= copy;
+		kdata += copy;
+		iov++;
+	}
+	err = 0;
+out:
+	return err;
+}
+
+/*
+ *	And now for the all-in-one: copy and checksum from a user iovec
+ *	directly to a datagram
+ *	Calls to csum_partial but the last must be in 32 bit chunks
+ *
+ *	ip_build_xmit must ensure that when fragmenting only the last
+ *	call to this function will be unaligned also.
+ */
+
+int csum_partial_copy_fromiovecend(unsigned char *kdata, struct iovec *iov,
+				 int offset, unsigned int len, int *csump)
+{
+	int csum = *csump;
+	int partial_cnt = 0, err = 0;
+
+	/* Skip over the finished iovecs */
+	while (offset >= iov->iov_len)
+	{
+		offset -= iov->iov_len;
+		iov++;
+	}
+
+	while (len > 0)
+	{
+		u8 *base = iov->iov_base + offset;
+		unsigned int copy = min(len, iov->iov_len - offset);
+
+		offset = 0;
+		/* There is a remnant from previous iov. */
+		if (partial_cnt)
+		{
+			int par_len = 4 - partial_cnt;
+
+			/* iov component is too short ... */
+			if (par_len > copy) {
+				if (copy_from_user(kdata, base, copy))
+					goto out_fault;
+				kdata += copy;
+				base  += copy;
+				partial_cnt += copy;
+				len   -= copy;
+				iov++;
+				if (len)
+					continue;
+				*csump = csum_partial(kdata - partial_cnt,
+							 partial_cnt, csum);
+				goto out;
+			}
+			if (copy_from_user(kdata, base, par_len))
+				goto out_fault;
+			csum = csum_partial(kdata - partial_cnt, 4, csum);
+			kdata += par_len;
+			base  += par_len;
+			copy  -= par_len;
+			len   -= par_len;
+			partial_cnt = 0;
+		}
+
+		if (len > copy)
+		{
+			partial_cnt = copy % 4;
+			if (partial_cnt)
+			{
+				copy -= partial_cnt;
+				if (copy_from_user(kdata + copy, base + copy,
+				 		partial_cnt))
+					goto out_fault;
+			}
+		}
+
+		if (copy) {
+			csum = csum_and_copy_from_user(base, kdata, copy,
+							csum, &err);
+			if (err)
+				goto out;
+		}
+		len   -= copy + partial_cnt;
+		kdata += copy + partial_cnt;
+		iov++;
+	}
+        *csump = csum;
+out:
+	return err;
+
+out_fault:
+	err = -EFAULT;
+	goto out;
+}
diff --git a/pfinet/linux-src/net/core/neighbour.c b/pfinet/linux-src/net/core/neighbour.c
new file mode 100644
index 00000000..6afbfdcc
--- /dev/null
+++ b/pfinet/linux-src/net/core/neighbour.c
@@ -0,0 +1,1394 @@
+/*
+ *	Generic address resolution entity
+ *
+ *	Authors:
+ *	Pedro Roque		<roque@di.fc.ul.pt>
+ *	Alexey Kuznetsov	<kuznet@ms2.inr.ac.ru>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ *
+ *	Fixes:
+ *	Vitaly E. Lavrov	releasing NULL neighbor in neigh_add.
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/socket.h>
+#include <linux/sched.h>
+#include <linux/netdevice.h>
+#ifdef CONFIG_SYSCTL
+#include <linux/sysctl.h>
+#endif
+#include <net/neighbour.h>
+#include <net/dst.h>
+#include <net/sock.h>
+#include <linux/rtnetlink.h>
+
+/*
+   NOTE. The most unpleasent question is serialization of
+   accesses to resolved addresses. The problem is that addresses
+   are modified by bh, but they are referenced from normal
+   kernel thread. Before today no locking was made.
+   My reasoning was that corrupted address token will be copied
+   to packet with cosmologically small probability
+   (it is even difficult to estimate such small number)
+   and it is very silly to waste cycles in fast path to lock them.
+
+   But now I changed my mind, but not because previous statement
+   is wrong. Actually, neigh->ha MAY BE not opaque byte array,
+   but reference to some private data. In this case even neglibible
+   corruption probability becomes bug.
+
+   - hh cache is protected by rwlock. It assumes that
+     hh cache update procedure is short and fast, and that
+     read_lock is cheaper than start_bh_atomic().
+   - ha tokens, saved in neighbour entries, are protected
+     by bh_atomic().
+   - no protection is made in /proc reading. It is OK, because
+     /proc is broken by design in any case, and
+     corrupted output is normal behaviour there.
+
+     --ANK (981025)
+ */
+
+#define NEIGH_DEBUG 1
+
+#define NEIGH_PRINTK(x...) printk(x)
+#define NEIGH_NOPRINTK(x...) do { ; } while(0)
+#define NEIGH_PRINTK0 NEIGH_PRINTK
+#define NEIGH_PRINTK1 NEIGH_NOPRINTK
+#define NEIGH_PRINTK2 NEIGH_NOPRINTK
+
+#if NEIGH_DEBUG >= 1
+#undef NEIGH_PRINTK1
+#define NEIGH_PRINTK1 NEIGH_PRINTK
+#endif
+#if NEIGH_DEBUG >= 2
+#undef NEIGH_PRINTK2
+#define NEIGH_PRINTK2 NEIGH_PRINTK
+#endif
+
+static void neigh_timer_handler(unsigned long arg);
+#ifdef CONFIG_ARPD
+static void neigh_app_notify(struct neighbour *n);
+#endif
+static int pneigh_ifdown(struct neigh_table *tbl, struct device *dev);
+
+static int neigh_glbl_allocs;
+static struct neigh_table *neigh_tables;
+
+static int neigh_blackhole(struct sk_buff *skb)
+{
+	kfree_skb(skb);
+	return -ENETDOWN;
+}
+
+/*
+ * It is random distribution in the interval (1/2)*base...(3/2)*base.
+ * It corresponds to default IPv6 settings and is not overridable,
+ * because it is really reasonbale choice.
+ */
+
+unsigned long neigh_rand_reach_time(unsigned long base)
+{
+	return (net_random() % base) + (base>>1);
+}
+
+
+static int neigh_forced_gc(struct neigh_table *tbl)
+{
+	int shrunk = 0;
+	int i;
+
+	if (atomic_read(&tbl->lock))
+		return 0;
+
+	for (i=0; i<=NEIGH_HASHMASK; i++) {
+		struct neighbour *n, **np;
+
+		np = &tbl->hash_buckets[i];
+		while ((n = *np) != NULL) {
+			/* Neighbour record may be discarded if:
+			   - nobody refers to it.
+			   - it is not premanent
+			   - (NEW and probably wrong)
+			     INCOMPLETE entries are kept at least for
+			     n->parms->retrans_time, otherwise we could
+			     flood network with resolution requests.
+			     It is not clear, what is better table overflow
+			     or flooding.
+			 */
+			if (atomic_read(&n->refcnt) == 0 &&
+			    !(n->nud_state&NUD_PERMANENT) &&
+			    (n->nud_state != NUD_INCOMPLETE ||
+			     jiffies - n->used > n->parms->retrans_time)) {
+				*np = n->next;
+				n->tbl = NULL;
+				tbl->entries--;
+				shrunk = 1;
+				neigh_destroy(n);
+				continue;
+			}
+			np = &n->next;
+		}
+	}
+	
+	tbl->last_flush = jiffies;
+	return shrunk;
+}
+
+int neigh_ifdown(struct neigh_table *tbl, struct device *dev)
+{
+	int i;
+
+	if (atomic_read(&tbl->lock)) {
+		NEIGH_PRINTK1("neigh_ifdown: impossible event 1763\n");
+		return -EBUSY;
+	}
+
+	start_bh_atomic();
+	for (i=0; i<=NEIGH_HASHMASK; i++) {
+		struct neighbour *n, **np;
+
+		np = &tbl->hash_buckets[i];
+		while ((n = *np) != NULL) {
+			if (dev && n->dev != dev) {
+				np = &n->next;
+				continue;
+			}
+			*np = n->next;
+			n->tbl = NULL;
+			tbl->entries--;
+			if (atomic_read(&n->refcnt)) {
+				/* The most unpleasant situation.
+				   We must destroy neighbour entry,
+				   but someone still uses it.
+
+				   The destroy will be delayed until
+				   the last user releases us, but
+				   we must kill timers etc. and move
+				   it to safe state.
+				 */
+				if (n->nud_state & NUD_IN_TIMER)
+					del_timer(&n->timer);
+				n->parms = &tbl->parms;
+				skb_queue_purge(&n->arp_queue);
+				n->output = neigh_blackhole;
+				if (n->nud_state&NUD_VALID)
+					n->nud_state = NUD_NOARP;
+				else
+					n->nud_state = NUD_NONE;
+				NEIGH_PRINTK2("neigh %p is stray.\n", n);
+			} else
+				neigh_destroy(n);
+		}
+	}
+
+	del_timer(&tbl->proxy_timer);
+	skb_queue_purge(&tbl->proxy_queue);
+	pneigh_ifdown(tbl, dev);
+	end_bh_atomic();
+	return 0;
+}
+
+static struct neighbour *neigh_alloc(struct neigh_table *tbl, int creat)
+{
+	struct neighbour *n;
+	unsigned long now = jiffies;
+
+	if (tbl->entries > tbl->gc_thresh1) {
+		if (creat < 0)
+			return NULL;
+		if (tbl->entries > tbl->gc_thresh3 ||
+		    (tbl->entries > tbl->gc_thresh2 &&
+		     now - tbl->last_flush > 5*HZ)) {
+			if (neigh_forced_gc(tbl) == 0 &&
+			    tbl->entries > tbl->gc_thresh3)
+				return NULL;
+		}
+	}
+
+	n = kmalloc(tbl->entry_size, GFP_ATOMIC);
+	if (n == NULL)
+		return NULL;
+
+	memset(n, 0, tbl->entry_size);
+
+	skb_queue_head_init(&n->arp_queue);
+	n->updated = n->used = now;
+	n->nud_state = NUD_NONE;
+	n->output = neigh_blackhole;
+	n->parms = &tbl->parms;
+	init_timer(&n->timer);
+	n->timer.function = neigh_timer_handler;
+	n->timer.data = (unsigned long)n;
+	tbl->stats.allocs++;
+	neigh_glbl_allocs++;
+	return n;
+}
+
+
+struct neighbour * __neigh_lookup(struct neigh_table *tbl, const void *pkey,
+				    struct device *dev, int creat)
+{
+	struct neighbour *n;
+	u32 hash_val;
+	int key_len = tbl->key_len;
+
+	hash_val = *(u32*)(pkey + key_len - 4);
+	hash_val ^= (hash_val>>16);
+	hash_val ^= hash_val>>8;
+	hash_val ^= hash_val>>3;
+	hash_val = (hash_val^dev->ifindex)&NEIGH_HASHMASK;
+
+	for (n = tbl->hash_buckets[hash_val]; n; n = n->next) {
+		if (dev == n->dev &&
+		    memcmp(n->primary_key, pkey, key_len) == 0) {
+			atomic_inc(&n->refcnt);
+			return n;
+		}
+	}
+	if (!creat)
+		return NULL;
+
+	n = neigh_alloc(tbl, creat);
+	if (n == NULL)
+		return NULL;
+
+	memcpy(n->primary_key, pkey, key_len);
+	n->dev = dev;
+
+	/* Protocol specific setup. */
+	if (tbl->constructor &&	tbl->constructor(n) < 0) {
+		neigh_destroy(n);
+		return NULL;
+	}
+
+	/* Device specific setup. */
+	if (n->parms && n->parms->neigh_setup && n->parms->neigh_setup(n) < 0) {
+		neigh_destroy(n);
+		return NULL;
+	}
+
+	n->confirmed = jiffies - (n->parms->base_reachable_time<<1);
+	atomic_set(&n->refcnt, 1);
+	tbl->entries++;
+	n->next = tbl->hash_buckets[hash_val];
+	tbl->hash_buckets[hash_val] = n;
+	n->tbl = tbl;
+	NEIGH_PRINTK2("neigh %p is created.\n", n);
+	return n;
+}
+
+struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey,
+				    struct device *dev, int creat)
+{
+	struct pneigh_entry *n;
+	u32 hash_val;
+	int key_len = tbl->key_len;
+
+	hash_val = *(u32*)(pkey + key_len - 4);
+	hash_val ^= (hash_val>>16);
+	hash_val ^= hash_val>>8;
+	hash_val ^= hash_val>>4;
+	hash_val &= PNEIGH_HASHMASK;
+
+	for (n = tbl->phash_buckets[hash_val]; n; n = n->next) {
+		if (memcmp(n->key, pkey, key_len) == 0 &&
+		    (n->dev == dev || !n->dev))
+			return n;
+	}
+	if (!creat)
+		return NULL;
+
+	n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
+	if (n == NULL)
+		return NULL;
+
+	memcpy(n->key, pkey, key_len);
+	n->dev = dev;
+
+	if (tbl->pconstructor && tbl->pconstructor(n)) {
+		kfree(n);
+		return NULL;
+	}
+
+	n->next = tbl->phash_buckets[hash_val];
+	tbl->phash_buckets[hash_val] = n;
+	return n;
+}
+
+
+int pneigh_delete(struct neigh_table *tbl, const void *pkey, struct device *dev)
+{
+	struct pneigh_entry *n, **np;
+	u32 hash_val;
+	int key_len = tbl->key_len;
+
+	hash_val = *(u32*)(pkey + key_len - 4);
+	hash_val ^= (hash_val>>16);
+	hash_val ^= hash_val>>8;
+	hash_val ^= hash_val>>4;
+	hash_val &= PNEIGH_HASHMASK;
+
+	for (np = &tbl->phash_buckets[hash_val]; (n=*np) != NULL; np = &n->next) {
+		if (memcmp(n->key, pkey, key_len) == 0 && n->dev == dev) {
+			*np = n->next;
+			synchronize_bh();
+			if (tbl->pdestructor)
+				tbl->pdestructor(n);
+			kfree(n);
+			return 0;
+		}
+	}
+	return -ENOENT;
+}
+
+static int pneigh_ifdown(struct neigh_table *tbl, struct device *dev)
+{
+	struct pneigh_entry *n, **np;
+	u32 h;
+
+	for (h=0; h<=PNEIGH_HASHMASK; h++) {
+		np = &tbl->phash_buckets[h];
+		while ((n=*np) != NULL) {
+			if (n->dev == dev || dev == NULL) {
+				*np = n->next;
+				synchronize_bh();
+				if (tbl->pdestructor)
+					tbl->pdestructor(n);
+				kfree(n);
+				continue;
+			}
+			np = &n->next;
+		}
+	}
+	return -ENOENT;
+}
+
+
+/*
+ *	neighbour must already be out of the table;
+ *
+ */
+void neigh_destroy(struct neighbour *neigh)
+{	
+	struct hh_cache *hh;
+
+	if (neigh->tbl || atomic_read(&neigh->refcnt)) {
+		NEIGH_PRINTK1("neigh_destroy: neighbour is use tbl=%p, ref=%d: "
+		       "called from %p\n", neigh->tbl, atomic_read(&neigh->refcnt), __builtin_return_address(0));
+		return;
+	}
+
+	if (neigh->nud_state&NUD_IN_TIMER)
+		del_timer(&neigh->timer);
+
+	while ((hh = neigh->hh) != NULL) {
+		neigh->hh = hh->hh_next;
+		hh->hh_next = NULL;
+		hh->hh_output = neigh_blackhole;
+		if (atomic_dec_and_test(&hh->hh_refcnt))
+			kfree(hh);
+	}
+
+	if (neigh->ops && neigh->ops->destructor)
+		(neigh->ops->destructor)(neigh);
+
+	skb_queue_purge(&neigh->arp_queue);
+
+	NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
+
+	neigh_glbl_allocs--;
+	kfree(neigh);
+}
+
+/* Neighbour state is suspicious;
+   disable fast path.
+ */
+static void neigh_suspect(struct neighbour *neigh)
+{
+	struct hh_cache *hh;
+
+	NEIGH_PRINTK2("neigh %p is suspecteded.\n", neigh);
+
+	neigh->output = neigh->ops->output;
+
+	for (hh = neigh->hh; hh; hh = hh->hh_next)
+		hh->hh_output = neigh->ops->output;
+}
+
+/* Neighbour state is OK;
+   enable fast path.
+ */
+static void neigh_connect(struct neighbour *neigh)
+{
+	struct hh_cache *hh;
+
+	NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
+
+	neigh->output = neigh->ops->connected_output;
+
+	for (hh = neigh->hh; hh; hh = hh->hh_next)
+		hh->hh_output = neigh->ops->hh_output;
+}
+
+/*
+   Transitions NUD_STALE <-> NUD_REACHABLE do not occur
+   when fast path is built: we have no timers assotiated with
+   these states, we do not have time to check state when sending.
+   neigh_periodic_timer check periodically neigh->confirmed
+   time and moves NUD_REACHABLE -> NUD_STALE.
+
+   If a routine wants to know TRUE entry state, it calls
+   neigh_sync before checking state.
+ */
+
+static void neigh_sync(struct neighbour *n)
+{
+	unsigned long now = jiffies;
+	u8 state = n->nud_state;
+
+	if (state&(NUD_NOARP|NUD_PERMANENT))
+		return;
+	if (state&NUD_REACHABLE) {
+		if (now - n->confirmed > n->parms->reachable_time) {
+			n->nud_state = NUD_STALE;
+			neigh_suspect(n);
+		}
+	} else if (state&NUD_VALID) {
+		if (now - n->confirmed < n->parms->reachable_time) {
+			if (state&NUD_IN_TIMER)
+				del_timer(&n->timer);
+			n->nud_state = NUD_REACHABLE;
+			neigh_connect(n);
+		}
+	}
+}
+
+static void neigh_periodic_timer(unsigned long arg)
+{
+	struct neigh_table *tbl = (struct neigh_table*)arg;
+	unsigned long now = jiffies;
+	int i;
+
+	if (atomic_read(&tbl->lock)) {
+		tbl->gc_timer.expires = now + 1*HZ;
+		add_timer(&tbl->gc_timer);
+		return;
+	}
+
+	/*
+	 *	periodicly recompute ReachableTime from random function
+	 */
+	
+	if (now - tbl->last_rand > 300*HZ) {
+		struct neigh_parms *p;
+		tbl->last_rand = now;
+		for (p=&tbl->parms; p; p = p->next)
+			p->reachable_time = neigh_rand_reach_time(p->base_reachable_time);
+	}
+
+	for (i=0; i <= NEIGH_HASHMASK; i++) {
+		struct neighbour *n, **np;
+
+		np = &tbl->hash_buckets[i];
+		while ((n = *np) != NULL) {
+			unsigned state = n->nud_state;
+
+			if (state&(NUD_PERMANENT|NUD_IN_TIMER))
+				goto next_elt;
+
+			if ((long)(n->used - n->confirmed) < 0)
+				n->used = n->confirmed;
+
+			if (atomic_read(&n->refcnt) == 0 &&
+			    (state == NUD_FAILED || now - n->used > n->parms->gc_staletime)) {
+				*np = n->next;
+				n->tbl = NULL;
+				n->next = NULL;
+				tbl->entries--;
+				neigh_destroy(n);
+				continue;
+			}
+
+			if (n->nud_state&NUD_REACHABLE &&
+			    now - n->confirmed > n->parms->reachable_time) {
+				n->nud_state = NUD_STALE;
+				neigh_suspect(n);
+			}
+
+next_elt:
+			np = &n->next;
+		}
+	}
+
+	tbl->gc_timer.expires = now + tbl->gc_interval;
+	add_timer(&tbl->gc_timer);
+}
+
+static __inline__ int neigh_max_probes(struct neighbour *n)
+{
+	struct neigh_parms *p = n->parms;
+	return p->ucast_probes + p->app_probes + p->mcast_probes;
+}
+
+
+/* Called when a timer expires for a neighbour entry. */
+
+static void neigh_timer_handler(unsigned long arg) 
+{
+	unsigned long now = jiffies;
+	struct neighbour *neigh = (struct neighbour*)arg;
+	unsigned state = neigh->nud_state;
+
+	if (!(state&NUD_IN_TIMER)) {
+		NEIGH_PRINTK1("neigh: timer & !nud_in_timer\n");
+		return;
+	}
+
+	if ((state&NUD_VALID) &&
+	    now - neigh->confirmed < neigh->parms->reachable_time) {
+		neigh->nud_state = NUD_REACHABLE;
+		NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
+		neigh_connect(neigh);
+		return;
+	}
+	if (state == NUD_DELAY) {
+		NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
+		neigh->nud_state = NUD_PROBE;
+		neigh->probes = 0;
+	}
+
+	if (neigh->probes >= neigh_max_probes(neigh)) {
+		struct sk_buff *skb;
+
+		neigh->nud_state = NUD_FAILED;
+		neigh->tbl->stats.res_failed++;
+		NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
+
+		/* It is very thin place. report_unreachable is very complicated
+		   routine. Particularly, it can hit the same neighbour entry!
+		   
+		   So that, we try to be accurate and avoid dead loop. --ANK
+		 */
+		while(neigh->nud_state==NUD_FAILED && (skb=__skb_dequeue(&neigh->arp_queue)) != NULL)
+			neigh->ops->error_report(neigh, skb);
+		skb_queue_purge(&neigh->arp_queue);
+		return;
+	}
+
+	neigh->timer.expires = now + neigh->parms->retrans_time;
+	add_timer(&neigh->timer);
+
+	neigh->ops->solicit(neigh, skb_peek(&neigh->arp_queue));
+	neigh->probes++;
+}
+
+int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
+{
+	start_bh_atomic();
+	if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE))) {
+		if (!(neigh->nud_state&(NUD_STALE|NUD_INCOMPLETE))) {
+			if (neigh->tbl == NULL) {
+				NEIGH_PRINTK2("neigh %p used after death.\n", neigh);
+				if (skb)
+					kfree_skb(skb);
+				end_bh_atomic();
+				return 1;
+			}
+			if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
+				neigh->probes = neigh->parms->ucast_probes;
+				neigh->nud_state = NUD_INCOMPLETE;
+				neigh->timer.expires = jiffies + neigh->parms->retrans_time;
+				add_timer(&neigh->timer);
+
+				neigh->ops->solicit(neigh, skb);
+				neigh->probes++;
+			} else {
+				neigh->nud_state = NUD_FAILED;
+				if (skb)
+					kfree_skb(skb);
+				end_bh_atomic();
+				return 1;
+			}
+		}
+		if (neigh->nud_state == NUD_INCOMPLETE) {
+			if (skb) {
+				if (skb_queue_len(&neigh->arp_queue) >= neigh->parms->queue_len) {
+					struct sk_buff *buff;
+					buff = neigh->arp_queue.prev;
+					__skb_unlink(buff, &neigh->arp_queue);
+					kfree_skb(buff);
+				}
+				__skb_queue_head(&neigh->arp_queue, skb);
+			}
+			end_bh_atomic();
+			return 1;
+		}
+		if (neigh->nud_state == NUD_STALE) {
+			NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
+			neigh->nud_state = NUD_DELAY;
+			neigh->timer.expires = jiffies + neigh->parms->delay_probe_time;
+			add_timer(&neigh->timer);
+		}
+	}
+	end_bh_atomic();
+	return 0;
+}
+
+static __inline__ void neigh_update_hhs(struct neighbour *neigh)
+{
+	struct hh_cache *hh;
+	void (*update)(struct hh_cache*, struct device*, unsigned char*) =
+		neigh->dev->header_cache_update;
+
+	if (update) {
+		for (hh=neigh->hh; hh; hh=hh->hh_next) {
+			write_lock_irq(&hh->hh_lock);
+			update(hh, neigh->dev, neigh->ha);
+			write_unlock_irq(&hh->hh_lock);
+		}
+	}
+}
+
+
+
+/* Generic update routine.
+   -- lladdr is new lladdr or NULL, if it is not supplied.
+   -- new    is new state.
+   -- override==1 allows to override existing lladdr, if it is different.
+   -- arp==0 means that the change is administrative.
+ */
+
+int neigh_update(struct neighbour *neigh, u8 *lladdr, u8 new, int override, int arp)
+{
+	u8 old = neigh->nud_state;
+	struct device *dev = neigh->dev;
+
+	if (arp && (old&(NUD_NOARP|NUD_PERMANENT)))
+		return -EPERM;
+
+	if (!(new&NUD_VALID)) {
+		if (old&NUD_IN_TIMER)
+			del_timer(&neigh->timer);
+		if (old&NUD_CONNECTED)
+			neigh_suspect(neigh);
+		neigh->nud_state = new;
+		return 0;
+	}
+
+	/* Compare new lladdr with cached one */
+	if (dev->addr_len == 0) {
+		/* First case: device needs no address. */
+		lladdr = neigh->ha;
+	} else if (lladdr) {
+		/* The second case: if something is already cached
+		   and a new address is proposed:
+		   - compare new & old
+		   - if they are different, check override flag
+		 */
+		if (old&NUD_VALID) {
+			if (memcmp(lladdr, neigh->ha, dev->addr_len) == 0)
+				lladdr = neigh->ha;
+			else if (!override)
+				return -EPERM;
+		}
+	} else {
+		/* No address is supplied; if we know something,
+		   use it, otherwise discard the request.
+		 */
+		if (!(old&NUD_VALID))
+			return -EINVAL;
+		lladdr = neigh->ha;
+	}
+
+	neigh_sync(neigh);
+	old = neigh->nud_state;
+	if (new&NUD_CONNECTED)
+		neigh->confirmed = jiffies;
+	neigh->updated = jiffies;
+
+	/* If entry was valid and address is not changed,
+	   do not change entry state, if new one is STALE.
+	 */
+	if (old&NUD_VALID) {
+		if (lladdr == neigh->ha)
+			if (new == old || (new == NUD_STALE && (old&NUD_CONNECTED)))
+				return 0;
+	}
+	if (old&NUD_IN_TIMER)
+		del_timer(&neigh->timer);
+	neigh->nud_state = new;
+	if (lladdr != neigh->ha) {
+		memcpy(&neigh->ha, lladdr, dev->addr_len);
+		neigh_update_hhs(neigh);
+		neigh->confirmed = jiffies - (neigh->parms->base_reachable_time<<1);
+#ifdef CONFIG_ARPD
+		if (neigh->parms->app_probes)
+			neigh_app_notify(neigh);
+#endif
+	}
+	if (new == old)
+		return 0;
+	if (new&NUD_CONNECTED)
+		neigh_connect(neigh);
+	else
+		neigh_suspect(neigh);
+	if (!(old&NUD_VALID)) {
+		struct sk_buff *skb;
+
+		/* Again: avoid dead loop if something went wrong */
+
+		while (neigh->nud_state&NUD_VALID &&
+		       (skb=__skb_dequeue(&neigh->arp_queue)) != NULL) {
+			struct neighbour *n1 = neigh;
+			/* On shaper/eql skb->dst->neighbour != neigh :( */
+			if (skb->dst && skb->dst->neighbour)
+				n1 = skb->dst->neighbour;
+			n1->output(skb);
+		}
+		skb_queue_purge(&neigh->arp_queue);
+	}
+	return 0;
+}
+
+struct neighbour * neigh_event_ns(struct neigh_table *tbl,
+				  u8 *lladdr, void *saddr,
+				  struct device *dev)
+{
+	struct neighbour *neigh;
+
+	neigh = __neigh_lookup(tbl, saddr, dev, lladdr || !dev->addr_len);
+	if (neigh)
+		neigh_update(neigh, lladdr, NUD_STALE, 1, 1);
+	return neigh;
+}
+
+static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst, u16 protocol)
+{
+	struct hh_cache	*hh = NULL;
+	struct device *dev = dst->dev;
+
+	for (hh=n->hh; hh; hh = hh->hh_next)
+		if (hh->hh_type == protocol)
+			break;
+
+	if (!hh && (hh = kmalloc(sizeof(*hh), GFP_ATOMIC)) != NULL) {
+		memset(hh, 0, sizeof(struct hh_cache));
+		hh->hh_type = protocol;
+		atomic_set(&hh->hh_refcnt, 0);
+		hh->hh_next = NULL;
+		if (dev->hard_header_cache(n, hh)) {
+			kfree(hh);
+			hh = NULL;
+		} else {
+			atomic_inc(&hh->hh_refcnt);
+			hh->hh_next = n->hh;
+			n->hh = hh;
+			if (n->nud_state&NUD_CONNECTED)
+				hh->hh_output = n->ops->hh_output;
+			else
+				hh->hh_output = n->ops->output;
+		}
+	}
+	if (hh)	{
+		atomic_inc(&hh->hh_refcnt);
+		dst->hh = hh;
+	}
+}
+
+/* This function can be used in contexts, where only old dev_queue_xmit
+   worked, f.e. if you want to override normal output path (eql, shaper),
+   but resoltution is not made yet.
+ */
+
+int neigh_compat_output(struct sk_buff *skb)
+{
+	struct device *dev = skb->dev;
+
+	__skb_pull(skb, skb->nh.raw - skb->data);
+
+	if (dev->hard_header &&
+	    dev->hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL, skb->len) < 0 &&
+	    dev->rebuild_header(skb))
+		return 0;
+
+	return dev_queue_xmit(skb);
+}
+
+/* Slow and careful. */
+
+int neigh_resolve_output(struct sk_buff *skb)
+{
+	struct dst_entry *dst = skb->dst;
+	struct neighbour *neigh;
+
+	if (!dst || !(neigh = dst->neighbour))
+		goto discard;
+
+	__skb_pull(skb, skb->nh.raw - skb->data);
+
+	if (neigh_event_send(neigh, skb) == 0) {
+		int err;
+		struct device *dev = neigh->dev;
+		if (dev->hard_header_cache && dst->hh == NULL) {
+			start_bh_atomic();
+			if (dst->hh == NULL)
+				neigh_hh_init(neigh, dst, dst->ops->protocol);
+			err = dev->hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, NULL, skb->len);
+			end_bh_atomic();
+		} else {
+			start_bh_atomic();
+			err = dev->hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, NULL, skb->len);
+			end_bh_atomic();
+		}
+		if (err >= 0)
+			return neigh->ops->queue_xmit(skb);
+		kfree_skb(skb);
+		return -EINVAL;
+	}
+	return 0;
+
+discard:
+	NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n", dst, dst ? dst->neighbour : NULL);
+	kfree_skb(skb);
+	return -EINVAL;
+}
+
+/* As fast as possible without hh cache */
+
+int neigh_connected_output(struct sk_buff *skb)
+{
+	int err;
+	struct dst_entry *dst = skb->dst;
+	struct neighbour *neigh = dst->neighbour;
+	struct device *dev = neigh->dev;
+
+	__skb_pull(skb, skb->nh.raw - skb->data);
+
+	start_bh_atomic();
+	err = dev->hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, NULL, skb->len);
+	end_bh_atomic();
+	if (err >= 0)
+		return neigh->ops->queue_xmit(skb);
+	kfree_skb(skb);
+	return -EINVAL;
+}
+
+static void neigh_proxy_process(unsigned long arg)
+{
+	struct neigh_table *tbl = (struct neigh_table *)arg;
+	long sched_next = 0;
+	unsigned long now = jiffies;
+	struct sk_buff *skb = tbl->proxy_queue.next;
+
+	while (skb != (struct sk_buff*)&tbl->proxy_queue) {
+		struct sk_buff *back = skb;
+		long tdif = back->stamp.tv_usec - now;
+
+		skb = skb->next;
+		if (tdif <= 0) {
+			__skb_unlink(back, &tbl->proxy_queue);
+			if (tbl->proxy_redo)
+				tbl->proxy_redo(back);
+			else
+				kfree_skb(back);
+		} else if (!sched_next || tdif < sched_next)
+			sched_next = tdif;
+	}
+	del_timer(&tbl->proxy_timer);
+	if (sched_next) {
+		tbl->proxy_timer.expires = jiffies + sched_next;
+		add_timer(&tbl->proxy_timer);
+	}
+}
+
+void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
+		    struct sk_buff *skb)
+{
+	unsigned long now = jiffies;
+	long sched_next = net_random()%p->proxy_delay;
+
+	if (tbl->proxy_queue.qlen > p->proxy_qlen) {
+		kfree_skb(skb);
+		return;
+	}
+	skb->stamp.tv_sec = 0;
+	skb->stamp.tv_usec = now + sched_next;
+	if (del_timer(&tbl->proxy_timer)) {
+		long tval = tbl->proxy_timer.expires - now;
+		if (tval < sched_next)
+			sched_next = tval;
+	}
+	tbl->proxy_timer.expires = now + sched_next;
+	dst_release(skb->dst);
+	skb->dst = NULL;
+	__skb_queue_tail(&tbl->proxy_queue, skb);
+	add_timer(&tbl->proxy_timer);
+}
+
+
+struct neigh_parms *neigh_parms_alloc(struct device *dev, struct neigh_table *tbl)
+{
+	struct neigh_parms *p;
+	p = kmalloc(sizeof(*p), GFP_KERNEL);
+	if (p) {
+		memcpy(p, &tbl->parms, sizeof(*p));
+		p->tbl = tbl;
+		p->reachable_time = neigh_rand_reach_time(p->base_reachable_time);
+		if (dev && dev->neigh_setup) {
+			if (dev->neigh_setup(dev, p)) {
+				kfree(p);
+				return NULL;
+			}
+		}
+		p->next = tbl->parms.next;
+		tbl->parms.next = p;
+	}
+	return p;
+}
+
+void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
+{
+	struct neigh_parms **p;
+	
+	if (parms == NULL || parms == &tbl->parms)
+		return;
+	for (p = &tbl->parms.next; *p; p = &(*p)->next) {
+		if (*p == parms) {
+			*p = parms->next;
+			synchronize_bh();
+#ifdef CONFIG_SYSCTL
+			neigh_sysctl_unregister(parms);
+#endif
+			kfree(parms);
+			return;
+		}
+	}
+	NEIGH_PRINTK1("neigh_release_parms: not found\n");
+}
+
+
+void neigh_table_init(struct neigh_table *tbl)
+{
+	unsigned long now = jiffies;
+
+	tbl->parms.reachable_time = neigh_rand_reach_time(tbl->parms.base_reachable_time);
+
+	init_timer(&tbl->gc_timer);
+	tbl->gc_timer.data = (unsigned long)tbl;
+	tbl->gc_timer.function = neigh_periodic_timer;
+	tbl->gc_timer.expires = now + tbl->gc_interval + tbl->parms.reachable_time;
+	add_timer(&tbl->gc_timer);
+
+	init_timer(&tbl->proxy_timer);
+	tbl->proxy_timer.data = (unsigned long)tbl;
+	tbl->proxy_timer.function = neigh_proxy_process;
+	skb_queue_head_init(&tbl->proxy_queue);
+
+	tbl->last_flush = now;
+	tbl->last_rand = now + tbl->parms.reachable_time*20;
+	tbl->next = neigh_tables;
+	neigh_tables = tbl;
+}
+
+int neigh_table_clear(struct neigh_table *tbl)
+{
+	struct neigh_table **tp;
+
+	start_bh_atomic();
+	del_timer(&tbl->gc_timer);
+	del_timer(&tbl->proxy_timer);
+	skb_queue_purge(&tbl->proxy_queue);
+	neigh_ifdown(tbl, NULL);
+	end_bh_atomic();
+	if (tbl->entries)
+		printk(KERN_CRIT "neighbour leakage\n");
+	for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
+		if (*tp == tbl) {
+			*tp = tbl->next;
+			synchronize_bh();
+			break;
+		}
+	}
+#ifdef CONFIG_SYSCTL
+	neigh_sysctl_unregister(&tbl->parms);
+#endif
+	return 0;
+}
+
+#ifdef CONFIG_RTNETLINK
+
+
+int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+	struct ndmsg *ndm = NLMSG_DATA(nlh);
+	struct rtattr **nda = arg;
+	struct neigh_table *tbl;
+	struct device *dev = NULL;
+
+	if (ndm->ndm_ifindex) {
+		if ((dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL)
+			return -ENODEV;
+	}
+
+	for (tbl=neigh_tables; tbl; tbl = tbl->next) {
+		int err = 0;
+		struct neighbour *n;
+
+		if (tbl->family != ndm->ndm_family)
+			continue;
+
+		if (nda[NDA_DST-1] == NULL ||
+		    nda[NDA_DST-1]->rta_len != RTA_LENGTH(tbl->key_len))
+			return -EINVAL;
+
+		if (ndm->ndm_flags&NTF_PROXY)
+			return pneigh_delete(tbl, RTA_DATA(nda[NDA_DST-1]), dev);
+
+		if (dev == NULL)
+			return -EINVAL;
+
+		start_bh_atomic();
+		n = __neigh_lookup(tbl, RTA_DATA(nda[NDA_DST-1]), dev, 0);
+		if (n) {
+			err = neigh_update(n, NULL, NUD_FAILED, 1, 0);
+			neigh_release(n);
+		}
+		end_bh_atomic();
+		return err;
+	}
+
+	return -EADDRNOTAVAIL;
+}
+
+int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+	struct ndmsg *ndm = NLMSG_DATA(nlh);
+	struct rtattr **nda = arg;
+	struct neigh_table *tbl;
+	struct device *dev = NULL;
+
+	if (ndm->ndm_ifindex) {
+		if ((dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL)
+			return -ENODEV;
+	}
+
+	for (tbl=neigh_tables; tbl; tbl = tbl->next) {
+		int err = 0;
+		struct neighbour *n;
+
+		if (tbl->family != ndm->ndm_family)
+			continue;
+		if (nda[NDA_DST-1] == NULL ||
+		    nda[NDA_DST-1]->rta_len != RTA_LENGTH(tbl->key_len))
+			return -EINVAL;
+		if (ndm->ndm_flags&NTF_PROXY) {
+			if (pneigh_lookup(tbl, RTA_DATA(nda[NDA_DST-1]), dev, 1))
+				return 0;
+			return -ENOBUFS;
+		}
+		if (dev == NULL)
+			return -EINVAL;
+		if (nda[NDA_LLADDR-1] != NULL &&
+		    nda[NDA_LLADDR-1]->rta_len != RTA_LENGTH(dev->addr_len))
+			return -EINVAL;
+		start_bh_atomic();
+		n = __neigh_lookup(tbl, RTA_DATA(nda[NDA_DST-1]), dev, 0);
+		if (n) {
+			if (nlh->nlmsg_flags&NLM_F_EXCL)
+				err = -EEXIST;
+		} else if (!(nlh->nlmsg_flags&NLM_F_CREATE))
+			err = -ENOENT;
+		else {
+			n = __neigh_lookup(tbl, RTA_DATA(nda[NDA_DST-1]), dev, 1);
+			if (n == NULL)
+				err = -ENOBUFS;
+		}
+		if (err == 0) {
+			err = neigh_update(n, nda[NDA_LLADDR-1] ? RTA_DATA(nda[NDA_LLADDR-1]) : NULL,
+					   ndm->ndm_state,
+					   nlh->nlmsg_flags&NLM_F_REPLACE, 0);
+		}
+		if (n)
+			neigh_release(n);
+		end_bh_atomic();
+		return err;
+	}
+
+	return -EADDRNOTAVAIL;
+}
+
+
+static int neigh_fill_info(struct sk_buff *skb, struct neighbour *n,
+			   u32 pid, u32 seq, int event)
+{
+	unsigned long now = jiffies;
+	struct ndmsg *ndm;
+	struct nlmsghdr  *nlh;
+	unsigned char	 *b = skb->tail;
+	struct nda_cacheinfo ci;
+
+	nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ndm));
+	ndm = NLMSG_DATA(nlh);
+	ndm->ndm_family = n->ops->family;
+	ndm->ndm_flags = n->flags;
+	ndm->ndm_type = n->type;
+	ndm->ndm_state = n->nud_state;
+	ndm->ndm_ifindex = n->dev->ifindex;
+	RTA_PUT(skb, NDA_DST, n->tbl->key_len, n->primary_key);
+	if (n->nud_state&NUD_VALID)
+		RTA_PUT(skb, NDA_LLADDR, n->dev->addr_len, n->ha);
+	ci.ndm_used = now - n->used;
+	ci.ndm_confirmed = now - n->confirmed;
+	ci.ndm_updated = now - n->updated;
+	ci.ndm_refcnt = atomic_read(&n->refcnt);
+	RTA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
+	nlh->nlmsg_len = skb->tail - b;
+	return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+
+static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct neighbour *n;
+	int h, s_h;
+	int idx, s_idx;
+
+	s_h = cb->args[1];
+	s_idx = idx = cb->args[2];
+	for (h=0; h <= NEIGH_HASHMASK; h++) {
+		if (h < s_h) continue;
+		if (h > s_h)
+			s_idx = 0;
+		start_bh_atomic();
+		for (n = tbl->hash_buckets[h], idx = 0; n;
+		     n = n->next, idx++) {
+			if (idx < s_idx)
+				continue;
+			if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
+					    cb->nlh->nlmsg_seq, RTM_NEWNEIGH) <= 0) {
+				end_bh_atomic();
+				cb->args[1] = h;
+				cb->args[2] = idx;
+				return -1;
+			}
+		}
+		end_bh_atomic();
+	}
+
+	cb->args[1] = h;
+	cb->args[2] = idx;
+	return skb->len;
+}
+
+int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	int t;
+	int s_t;
+	struct neigh_table *tbl;
+	int family = ((struct rtgenmsg*)NLMSG_DATA(cb->nlh))->rtgen_family;
+
+	s_t = cb->args[0];
+
+	for (tbl=neigh_tables, t=0; tbl; tbl = tbl->next, t++) {
+		if (t < s_t) continue;
+		if (family && tbl->family != family)
+			continue;
+		if (t > s_t)
+			memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
+		if (neigh_dump_table(tbl, skb, cb) < 0) 
+			break;
+	}
+
+	cb->args[0] = t;
+
+	return skb->len;
+}
+
+#ifdef CONFIG_ARPD
+void neigh_app_ns(struct neighbour *n)
+{
+	struct sk_buff *skb;
+	struct nlmsghdr  *nlh;
+	int size = NLMSG_SPACE(sizeof(struct ndmsg)+256);
+
+	skb = alloc_skb(size, GFP_ATOMIC);
+	if (!skb)
+		return;
+
+	if (neigh_fill_info(skb, n, 0, 0, RTM_GETNEIGH) < 0) {
+		kfree_skb(skb);
+		return;
+	}
+	nlh = (struct nlmsghdr*)skb->data;
+	nlh->nlmsg_flags = NLM_F_REQUEST;
+	NETLINK_CB(skb).dst_groups = RTMGRP_NEIGH;
+	netlink_broadcast(rtnl, skb, 0, RTMGRP_NEIGH, GFP_ATOMIC);
+}
+
+static void neigh_app_notify(struct neighbour *n)
+{
+	struct sk_buff *skb;
+	struct nlmsghdr  *nlh;
+	int size = NLMSG_SPACE(sizeof(struct ndmsg)+256);
+
+	skb = alloc_skb(size, GFP_ATOMIC);
+	if (!skb)
+		return;
+
+	if (neigh_fill_info(skb, n, 0, 0, RTM_NEWNEIGH) < 0) {
+		kfree_skb(skb);
+		return;
+	}
+	nlh = (struct nlmsghdr*)skb->data;
+	NETLINK_CB(skb).dst_groups = RTMGRP_NEIGH;
+	netlink_broadcast(rtnl, skb, 0, RTMGRP_NEIGH, GFP_ATOMIC);
+}
+
+
+
+#endif
+
+
+#endif
+
+#ifdef CONFIG_SYSCTL
+
+struct neigh_sysctl_table
+{
+	struct ctl_table_header *sysctl_header;
+	ctl_table neigh_vars[17];
+	ctl_table neigh_dev[2];
+	ctl_table neigh_neigh_dir[2];
+	ctl_table neigh_proto_dir[2];
+	ctl_table neigh_root_dir[2];
+} neigh_sysctl_template = {
+	NULL,
+        {{NET_NEIGH_MCAST_SOLICIT, "mcast_solicit",
+         NULL, sizeof(int), 0644, NULL,
+         &proc_dointvec},
+	{NET_NEIGH_UCAST_SOLICIT, "ucast_solicit",
+         NULL, sizeof(int), 0644, NULL,
+         &proc_dointvec},
+	{NET_NEIGH_APP_SOLICIT, "app_solicit",
+         NULL, sizeof(int), 0644, NULL,
+         &proc_dointvec},
+	{NET_NEIGH_RETRANS_TIME, "retrans_time",
+         NULL, sizeof(int), 0644, NULL,
+         &proc_dointvec},
+	{NET_NEIGH_REACHABLE_TIME, "base_reachable_time",
+         NULL, sizeof(int), 0644, NULL,
+         &proc_dointvec_jiffies},
+	{NET_NEIGH_DELAY_PROBE_TIME, "delay_first_probe_time",
+         NULL, sizeof(int), 0644, NULL,
+         &proc_dointvec_jiffies},
+	{NET_NEIGH_GC_STALE_TIME, "gc_stale_time",
+         NULL, sizeof(int), 0644, NULL,
+         &proc_dointvec_jiffies},
+	{NET_NEIGH_UNRES_QLEN, "unres_qlen",
+         NULL, sizeof(int), 0644, NULL,
+         &proc_dointvec},
+	{NET_NEIGH_PROXY_QLEN, "proxy_qlen",
+         NULL, sizeof(int), 0644, NULL,
+         &proc_dointvec},
+	{NET_NEIGH_ANYCAST_DELAY, "anycast_delay",
+         NULL, sizeof(int), 0644, NULL,
+         &proc_dointvec},
+	{NET_NEIGH_PROXY_DELAY, "proxy_delay",
+         NULL, sizeof(int), 0644, NULL,
+         &proc_dointvec},
+	{NET_NEIGH_LOCKTIME, "locktime",
+         NULL, sizeof(int), 0644, NULL,
+         &proc_dointvec},
+	{NET_NEIGH_GC_INTERVAL, "gc_interval",
+         NULL, sizeof(int), 0644, NULL,
+         &proc_dointvec_jiffies},
+	{NET_NEIGH_GC_THRESH1, "gc_thresh1",
+         NULL, sizeof(int), 0644, NULL,
+         &proc_dointvec},
+	{NET_NEIGH_GC_THRESH2, "gc_thresh2",
+         NULL, sizeof(int), 0644, NULL,
+         &proc_dointvec},
+	{NET_NEIGH_GC_THRESH3, "gc_thresh3",
+         NULL, sizeof(int), 0644, NULL,
+         &proc_dointvec},
+	 {0}},
+
+	{{NET_PROTO_CONF_DEFAULT, "default", NULL, 0, 0555, NULL},{0}},
+	{{0, "neigh", NULL, 0, 0555, NULL},{0}},
+	{{0, NULL, NULL, 0, 0555, NULL},{0}},
+	{{CTL_NET, "net", NULL, 0, 0555, NULL},{0}}
+};
+
+int neigh_sysctl_register(struct device *dev, struct neigh_parms *p,
+			  int p_id, int pdev_id, char *p_name)
+{
+	struct neigh_sysctl_table *t;
+
+	t = kmalloc(sizeof(*t), GFP_KERNEL);
+	if (t == NULL)
+		return -ENOBUFS;
+	memcpy(t, &neigh_sysctl_template, sizeof(*t));
+	t->neigh_vars[0].data = &p->mcast_probes;
+	t->neigh_vars[1].data = &p->ucast_probes;
+	t->neigh_vars[2].data = &p->app_probes;
+	t->neigh_vars[3].data = &p->retrans_time;
+	t->neigh_vars[4].data = &p->base_reachable_time;
+	t->neigh_vars[5].data = &p->delay_probe_time;
+	t->neigh_vars[6].data = &p->gc_staletime;
+	t->neigh_vars[7].data = &p->queue_len;
+	t->neigh_vars[8].data = &p->proxy_qlen;
+	t->neigh_vars[9].data = &p->anycast_delay;
+	t->neigh_vars[10].data = &p->proxy_delay;
+	t->neigh_vars[11].data = &p->locktime;
+	if (dev) {
+		t->neigh_dev[0].procname = dev->name;
+		t->neigh_dev[0].ctl_name = dev->ifindex;
+		memset(&t->neigh_vars[12], 0, sizeof(ctl_table));
+	} else {
+		t->neigh_vars[12].data = (int*)(p+1);
+		t->neigh_vars[13].data = (int*)(p+1) + 1;
+		t->neigh_vars[14].data = (int*)(p+1) + 2;
+		t->neigh_vars[15].data = (int*)(p+1) + 3;
+	}
+	t->neigh_neigh_dir[0].ctl_name = pdev_id;
+
+	t->neigh_proto_dir[0].procname = p_name;
+	t->neigh_proto_dir[0].ctl_name = p_id;
+
+	t->neigh_dev[0].child = t->neigh_vars;
+	t->neigh_neigh_dir[0].child = t->neigh_dev;
+	t->neigh_proto_dir[0].child = t->neigh_neigh_dir;
+	t->neigh_root_dir[0].child = t->neigh_proto_dir;
+
+	t->sysctl_header = register_sysctl_table(t->neigh_root_dir, 0);
+	if (t->sysctl_header == NULL) {
+		kfree(t);
+		return -ENOBUFS;
+	}
+	p->sysctl_table = t;
+	return 0;
+}
+
+void neigh_sysctl_unregister(struct neigh_parms *p)
+{
+	if (p->sysctl_table) {
+		struct neigh_sysctl_table *t = p->sysctl_table;
+		p->sysctl_table = NULL;
+		unregister_sysctl_table(t->sysctl_header);
+		kfree(t);
+	}
+}
+
+#endif	/* CONFIG_SYSCTL */
diff --git a/pfinet/linux-src/net/core/profile.c b/pfinet/linux-src/net/core/profile.c
new file mode 100644
index 00000000..fc7464b7
--- /dev/null
+++ b/pfinet/linux-src/net/core/profile.c
@@ -0,0 +1,305 @@
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/ip.h>
+#include <linux/inet.h>
+#include <net/checksum.h>
+
+#include <asm/processor.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+#include <net/profile.h>
+
+#ifdef CONFIG_NET_PROFILE
+
+atomic_t net_profile_active;
+struct timeval net_profile_adjust;
+
+NET_PROFILE_DEFINE(total);
+
+struct net_profile_slot *net_profile_chain = &net_prof_total;
+
+#ifdef __alpha__
+__u32 alpha_lo;
+long alpha_hi;
+
+static void alpha_tick(unsigned long);
+
+static struct timer_list alpha_timer =
+	{ NULL, NULL, 0, 0L, alpha_tick };
+
+void alpha_tick(unsigned long dummy)
+{
+	struct timeval dummy_stamp;
+	net_profile_stamp(&dummy_stamp);
+	alpha_timer.expires = jiffies + 4*HZ;
+	add_timer(&alpha_timer);
+}
+
+#endif
+
+void net_profile_irq_adjust(struct timeval *entered, struct timeval* leaved)
+{
+	struct net_profile_slot *s;
+
+	net_profile_sub(entered, leaved);
+	for (s = net_profile_chain; s; s = s->next) {
+		if (s->active)
+			net_profile_add(leaved, &s->irq);
+	}
+}
+
+
+#ifdef CONFIG_PROC_FS
+static int profile_read_proc(char *buffer, char **start, off_t offset,
+			     int length, int *eof, void *data)
+{
+	off_t pos=0;
+	off_t begin=0;
+	int len=0;
+	struct net_profile_slot *s;
+
+	len+= sprintf(buffer, "Slot            Hits       Hi         Lo         OnIrqHi    OnIrqLo    Ufl\n");
+
+	if (offset == 0) {
+		cli();
+		net_prof_total.active = 1;
+		atomic_inc(&net_profile_active);
+		NET_PROFILE_LEAVE(total);
+		sti();
+	}
+	for (s = net_profile_chain; s; s = s->next) {
+		struct net_profile_slot tmp;
+
+		cli();
+		tmp = *s;
+
+		/* Wrong, but pretty close to truth */
+
+		s->accumulator.tv_sec = 0;
+		s->accumulator.tv_usec = 0;
+		s->irq.tv_sec = 0;
+		s->irq.tv_usec = 0;
+		s->hits = 0;
+		s->underflow = 0;
+		/* Repair active count, it is possible, only if code has a bug */
+		if (s->active) {
+			s->active = 0;
+			atomic_dec(&net_profile_active);
+		}
+		sti();
+
+		net_profile_sub(&tmp.irq, &tmp.accumulator);
+
+		len += sprintf(buffer+len,"%-15s %-10d %-10ld %-10lu %-10lu %-10lu %d/%d",
+			       tmp.id,
+			       tmp.hits,
+			       tmp.accumulator.tv_sec,
+			       tmp.accumulator.tv_usec,
+			       tmp.irq.tv_sec,
+			       tmp.irq.tv_usec,
+			       tmp.underflow, tmp.active);
+
+			buffer[len++]='\n';
+		
+			pos=begin+len;
+			if(pos<offset) {
+				len=0;
+				begin=pos;
+			}
+			if(pos>offset+length)
+				goto done;
+	}
+	*eof = 1;
+
+done:
+	*start=buffer+(offset-begin);
+	len-=(offset-begin);
+	if(len>length)
+		len=length;
+	if (len < 0) {
+		len = 0;
+		printk(KERN_CRIT "Yep, guys... our template for proc_*_read is crappy :-)\n");
+	}
+	if (offset == 0) {
+		cli();
+		net_prof_total.active = 0;
+		net_prof_total.hits = 0;
+		net_profile_stamp(&net_prof_total.entered);
+		sti();
+	}
+	return len;
+}
+#endif
+
+struct iphdr whitehole_iph;
+int whitehole_count;
+
+static int whitehole_xmit(struct sk_buff *skb, struct device *dev)
+{
+	struct net_device_stats *stats;
+	dev_kfree_skb(skb);
+	stats = (struct net_device_stats *)dev->priv;
+	stats->tx_packets++;
+	stats->tx_bytes+=skb->len;
+
+	return 0;
+}
+
+static void whitehole_inject(unsigned long);
+int whitehole_init(struct device *dev);
+
+static struct timer_list whitehole_timer =
+	{ NULL, NULL, 0, 0L, whitehole_inject };
+
+static struct device whitehole_dev = {
+	"whitehole", 0x0, 0x0, 0x0, 0x0, 0, 0, 0, 0, 0, NULL, whitehole_init, };
+
+static int whitehole_open(struct device *dev)
+{
+	whitehole_count = 100000;
+	whitehole_timer.expires = jiffies + 5*HZ;
+	add_timer(&whitehole_timer);
+	return 0;
+}
+
+static int whitehole_close(struct device *dev)
+{
+	del_timer(&whitehole_timer);
+	return 0;
+}
+
+static void whitehole_inject(unsigned long dummy)
+{
+	struct net_device_stats *stats = (struct net_device_stats *)whitehole_dev.priv;
+	extern int netdev_dropping;
+
+	do {
+		struct iphdr *iph;
+		struct sk_buff *skb = alloc_skb(128, GFP_ATOMIC);
+		if (!skb)
+			break;
+		skb_reserve(skb, 32);
+		iph = (struct iphdr*)skb_put(skb, sizeof(*iph));
+		skb->mac.raw = ((u8*)iph) - 14;
+		memcpy(iph, &whitehole_iph, sizeof(*iph));
+		skb->protocol = __constant_htons(ETH_P_IP);
+		skb->dev = &whitehole_dev;
+		skb->pkt_type = PACKET_HOST;
+		stats->rx_packets++;
+		stats->rx_bytes += skb->len;
+		netif_rx(skb);
+		whitehole_count--;
+	} while (netdev_dropping == 0 && whitehole_count>0);
+	if (whitehole_count > 0) {
+		whitehole_timer.expires = jiffies + 1;
+		add_timer(&whitehole_timer);
+	}
+}
+
+static struct net_device_stats *whitehole_get_stats(struct device *dev)
+{
+	struct net_device_stats *stats = (struct net_device_stats *) dev->priv;
+	return stats;
+}
+
+__initfunc(int whitehole_init(struct device *dev))
+{
+	dev->priv = kmalloc(sizeof(struct net_device_stats), GFP_KERNEL);
+	if (dev->priv == NULL)
+		return -ENOBUFS;
+	memset(dev->priv, 0, sizeof(struct net_device_stats));
+	dev->get_stats	= whitehole_get_stats;
+	dev->hard_start_xmit = whitehole_xmit;
+	dev->open = whitehole_open;
+	dev->stop = whitehole_close;
+	ether_setup(dev);
+	dev->tx_queue_len = 0;
+	dev->flags |= IFF_NOARP;
+	dev->flags &= ~(IFF_BROADCAST|IFF_MULTICAST);
+	dev->iflink = 0;
+	whitehole_iph.ihl = 5;
+	whitehole_iph.version = 4;
+	whitehole_iph.ttl = 2;
+	whitehole_iph.saddr = in_aton("193.233.7.21");
+	whitehole_iph.daddr = in_aton("193.233.7.10");
+	whitehole_iph.tot_len = htons(20);
+	whitehole_iph.check = ip_compute_csum((void *)&whitehole_iph, 20);
+	return 0;
+}
+
+int net_profile_register(struct net_profile_slot *slot)
+{
+	cli();
+	slot->next = net_profile_chain;
+	net_profile_chain = slot;
+	sti();
+	return 0;
+}
+
+int net_profile_unregister(struct net_profile_slot *slot)
+{
+	struct net_profile_slot **sp, *s;
+
+	for (sp = &net_profile_chain; (s = *sp) != NULL; sp = &s->next) {
+		if (s == slot) {
+			cli();
+			*sp = s->next;
+			sti();
+			return 0;
+		}
+	}
+	return -ESRCH;
+}
+
+
+__initfunc(int net_profile_init(void))
+{
+	int i;
+
+#ifdef CONFIG_PROC_FS
+	struct proc_dir_entry *ent;
+
+	ent = create_proc_entry("net/profile", 0, 0);
+	ent->read_proc = profile_read_proc;
+#endif
+
+	register_netdevice(&whitehole_dev);
+
+	printk("Evaluating net profiler cost ...");
+#if CPU == 586 || CPU == 686
+	if (!(boot_cpu_data.x86_capability & X86_FEATURE_TSC)) {
+		printk(KERN_ERR "Sorry, your CPU does not support TSC. Net profiler disabled.\n");
+		return -1;
+	}
+#endif
+	start_bh_atomic();
+#ifdef __alpha__
+	alpha_tick(0);
+#endif
+	for (i=0; i<1024; i++) {
+		NET_PROFILE_ENTER(total);
+		NET_PROFILE_LEAVE(total);
+	}
+	if (net_prof_total.accumulator.tv_sec) {
+		printk(" too high!\n");
+	} else {
+		net_profile_adjust.tv_usec = net_prof_total.accumulator.tv_usec>>10;
+		printk("%ld units\n", net_profile_adjust.tv_usec);
+	}
+	net_prof_total.hits = 0;
+	net_profile_stamp(&net_prof_total.entered);
+	end_bh_atomic();
+	return 0;
+}
+
+#endif
diff --git a/pfinet/linux-src/net/core/rtnetlink.c b/pfinet/linux-src/net/core/rtnetlink.c
new file mode 100644
index 00000000..7f89e54a
--- /dev/null
+++ b/pfinet/linux-src/net/core/rtnetlink.c
@@ -0,0 +1,512 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		Routing netlink socket interface: protocol independent part.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ *	Fixes:
+ *	Vitaly E. Lavrov		RTA_OK arithmetics was wrong.
+ *	Alexey Zhuravlev		ifi_change does something useful 
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/capability.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/string.h>
+
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/arp.h>
+#include <net/route.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+
+atomic_t rtnl_rlockct;
+struct wait_queue *rtnl_wait;
+
+
+void rtnl_lock()
+{
+	rtnl_shlock();
+	rtnl_exlock();
+}
+
+void rtnl_unlock()
+{
+	rtnl_exunlock();
+	rtnl_shunlock();
+}
+
+int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len)
+{
+	memset(tb, 0, sizeof(struct rtattr*)*maxattr);
+
+	while (RTA_OK(rta, len)) {
+		unsigned flavor = rta->rta_type;
+		if (flavor && flavor <= maxattr)
+			tb[flavor-1] = rta;
+		rta = RTA_NEXT(rta, len);
+	}
+	return 0;
+}
+
+#ifdef CONFIG_RTNETLINK
+struct sock *rtnl;
+
+unsigned long rtnl_wlockct;
+
+struct rtnetlink_link * rtnetlink_links[NPROTO];
+
+#define _S	1	/* superuser privileges required */
+#define _X	2	/* exclusive access to tables required */
+#define _G	4	/* GET request */
+
+static const int rtm_min[(RTM_MAX+1-RTM_BASE)/4] =
+{
+	NLMSG_LENGTH(sizeof(struct ifinfomsg)),
+	NLMSG_LENGTH(sizeof(struct ifaddrmsg)),
+	NLMSG_LENGTH(sizeof(struct rtmsg)),
+	NLMSG_LENGTH(sizeof(struct ndmsg)),
+	NLMSG_LENGTH(sizeof(struct rtmsg)),
+	NLMSG_LENGTH(sizeof(struct tcmsg)),
+	NLMSG_LENGTH(sizeof(struct tcmsg)),
+	NLMSG_LENGTH(sizeof(struct tcmsg))
+};
+
+static const int rta_max[(RTM_MAX+1-RTM_BASE)/4] =
+{
+	IFLA_MAX,
+	IFA_MAX,
+	RTA_MAX,
+	NDA_MAX,
+	RTA_MAX,
+	TCA_MAX,
+	TCA_MAX,
+	TCA_MAX
+};
+
+void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data)
+{
+	struct rtattr *rta;
+	int size = RTA_LENGTH(attrlen);
+
+	rta = (struct rtattr*)skb_put(skb, RTA_ALIGN(size));
+	rta->rta_type = attrtype;
+	rta->rta_len = size;
+	memcpy(RTA_DATA(rta), data, attrlen);
+}
+
+int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo)
+{
+	int err = 0;
+
+	NETLINK_CB(skb).dst_groups = group;
+	if (echo)
+		atomic_inc(&skb->users);
+	netlink_broadcast(rtnl, skb, pid, group, GFP_KERNEL);
+	if (echo)
+		err = netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
+	return err;
+}
+
+static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct device *dev,
+				 int type, u32 pid, u32 seq, u32 change)
+{
+	struct ifinfomsg *r;
+	struct nlmsghdr  *nlh;
+	unsigned char	 *b = skb->tail;
+
+	nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*r));
+	if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
+	r = NLMSG_DATA(nlh);
+	r->ifi_family = AF_UNSPEC;
+	r->ifi_type = dev->type;
+	r->ifi_index = dev->ifindex;
+	r->ifi_flags = dev->flags;
+	r->ifi_change = change;
+
+	RTA_PUT(skb, IFLA_IFNAME, strlen(dev->name)+1, dev->name);
+	if (dev->addr_len) {
+		RTA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr);
+		RTA_PUT(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast);
+	}
+	if (1) {
+		unsigned mtu = dev->mtu;
+		RTA_PUT(skb, IFLA_MTU, sizeof(mtu), &mtu);
+	}
+	if (dev->ifindex != dev->iflink)
+		RTA_PUT(skb, IFLA_LINK, sizeof(int), &dev->iflink);
+	if (dev->qdisc_sleeping)
+		RTA_PUT(skb, IFLA_QDISC,
+			strlen(dev->qdisc_sleeping->ops->id) + 1,
+			dev->qdisc_sleeping->ops->id);
+	if (dev->get_stats) {
+		struct net_device_stats *stats = dev->get_stats(dev);
+		if (stats)
+			RTA_PUT(skb, IFLA_STATS, sizeof(*stats), stats);
+	}
+	nlh->nlmsg_len = skb->tail - b;
+	return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	int idx;
+	int s_idx = cb->args[0];
+	struct device *dev;
+
+	for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
+		if (idx < s_idx)
+			continue;
+		if (rtnetlink_fill_ifinfo(skb, dev, RTM_NEWLINK, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, 0) <= 0)
+			break;
+	}
+	cb->args[0] = idx;
+
+	return skb->len;
+}
+
+int rtnetlink_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	int idx;
+	int s_idx = cb->family;
+
+	if (s_idx == 0)
+		s_idx = 1;
+	for (idx=1; idx<NPROTO; idx++) {
+		int type = cb->nlh->nlmsg_type-RTM_BASE;
+		if (idx < s_idx || idx == PF_PACKET)
+			continue;
+		if (rtnetlink_links[idx] == NULL ||
+		    rtnetlink_links[idx][type].dumpit == NULL)
+			continue;
+		if (idx > s_idx)
+			memset(&cb->args[0], 0, sizeof(cb->args));
+		if (rtnetlink_links[idx][type].dumpit(skb, cb) == 0)
+			continue;
+		if (skb_tailroom(skb) < 256)
+			break;
+	}
+	cb->family = idx;
+
+	return skb->len;
+}
+
+void rtmsg_ifinfo(int type, struct device *dev)
+{
+	struct sk_buff *skb;
+	int size = NLMSG_GOODSIZE;
+
+	skb = alloc_skb(size, GFP_KERNEL);
+	if (!skb)
+		return;
+
+	if (rtnetlink_fill_ifinfo(skb, dev, type, 0, 0, ~0U) < 0) {
+		kfree_skb(skb);
+		return;
+	}
+	NETLINK_CB(skb).dst_groups = RTMGRP_LINK;
+	netlink_broadcast(rtnl, skb, 0, RTMGRP_LINK, GFP_KERNEL);
+}
+
+static int rtnetlink_done(struct netlink_callback *cb)
+{
+	if (cap_raised(NETLINK_CB(cb->skb).eff_cap, CAP_NET_ADMIN) && cb->nlh->nlmsg_flags&NLM_F_ATOMIC)
+		rtnl_shunlock();
+	return 0;
+}
+
+/* Process one rtnetlink message. */
+
+extern __inline__ int
+rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
+{
+	struct rtnetlink_link *link;
+	struct rtnetlink_link *link_tab;
+	struct rtattr	*rta[RTATTR_MAX];
+
+	int exclusive = 0;
+	int sz_idx, kind;
+	int min_len;
+	int family;
+	int type;
+	int err;
+
+	/* Only requests are handled by kernel now */
+	if (!(nlh->nlmsg_flags&NLM_F_REQUEST))
+		return 0;
+
+	type = nlh->nlmsg_type;
+
+	/* A control message: ignore them */
+	if (type < RTM_BASE)
+		return 0;
+
+	/* Unknown message: reply with EINVAL */
+	if (type > RTM_MAX)
+		goto err_inval;
+
+	type -= RTM_BASE;
+
+	/* All the messages must have at least 1 byte length */
+	if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(struct rtgenmsg)))
+		return 0;
+
+	family = ((struct rtgenmsg*)NLMSG_DATA(nlh))->rtgen_family;
+	if (family > NPROTO) {
+		*errp = -EAFNOSUPPORT;
+		return -1;
+	}
+
+	link_tab = rtnetlink_links[family];
+	if (link_tab == NULL)
+		link_tab = rtnetlink_links[PF_UNSPEC];
+	link = &link_tab[type];
+
+	sz_idx = type>>2;
+	kind = type&3;
+
+	if (kind != 2 && !cap_raised(NETLINK_CB(skb).eff_cap, CAP_NET_ADMIN)) {
+		*errp = -EPERM;
+		return -1;
+	}
+
+	if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
+		u32 rlen;
+
+		if (link->dumpit == NULL)
+			link = &(rtnetlink_links[PF_UNSPEC][type]);
+
+		if (link->dumpit == NULL)
+			goto err_inval;
+
+		/* Super-user locks all the tables to get atomic snapshot */
+		if (cap_raised(NETLINK_CB(skb).eff_cap, CAP_NET_ADMIN)
+		    && nlh->nlmsg_flags&NLM_F_ATOMIC)
+			atomic_inc(&rtnl_rlockct);
+		if ((*errp = netlink_dump_start(rtnl, skb, nlh,
+						link->dumpit,
+						rtnetlink_done)) != 0) {
+			if (cap_raised(NETLINK_CB(skb).eff_cap, CAP_NET_ADMIN) && nlh->nlmsg_flags&NLM_F_ATOMIC)
+				atomic_dec(&rtnl_rlockct);
+			return -1;
+		}
+		rlen = NLMSG_ALIGN(nlh->nlmsg_len);
+		if (rlen > skb->len)
+			rlen = skb->len;
+		skb_pull(skb, rlen);
+		return -1;
+	}
+
+	if (kind != 2) {
+		if (rtnl_exlock_nowait()) {
+			*errp = 0;
+			return -1;
+		}
+		exclusive = 1;
+	}
+
+	memset(&rta, 0, sizeof(rta));
+
+	min_len = rtm_min[sz_idx];
+	if (nlh->nlmsg_len < min_len)
+		goto err_inval;
+
+	if (nlh->nlmsg_len > min_len) {
+		int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
+		struct rtattr *attr = (void*)nlh + NLMSG_ALIGN(min_len);
+
+		while (RTA_OK(attr, attrlen)) {
+			unsigned flavor = attr->rta_type;
+			if (flavor) {
+				if (flavor > rta_max[sz_idx])
+					goto err_inval;
+				rta[flavor-1] = attr;
+			}
+			attr = RTA_NEXT(attr, attrlen);
+		}
+	}
+
+	if (link->doit == NULL)
+		link = &(rtnetlink_links[PF_UNSPEC][type]);
+	if (link->doit == NULL)
+		goto err_inval;
+	err = link->doit(skb, nlh, (void *)&rta);
+
+	if (exclusive)
+		rtnl_exunlock();
+	*errp = err;
+	return err;
+
+err_inval:
+	if (exclusive)
+		rtnl_exunlock();
+	*errp = -EINVAL;
+	return -1;
+}
+
+/* 
+ * Process one packet of messages.
+ * Malformed skbs with wrong lengths of messages are discarded silently.
+ */
+
+extern __inline__ int rtnetlink_rcv_skb(struct sk_buff *skb)
+{
+	int err;
+	struct nlmsghdr * nlh;
+
+	while (skb->len >= NLMSG_SPACE(0)) {
+		u32 rlen;
+
+		nlh = (struct nlmsghdr *)skb->data;
+		if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
+			return 0;
+		rlen = NLMSG_ALIGN(nlh->nlmsg_len);
+		if (rlen > skb->len)
+			rlen = skb->len;
+		if (rtnetlink_rcv_msg(skb, nlh, &err)) {
+			/* Not error, but we must interrupt processing here:
+			 *   Note, that in this case we do not pull message
+			 *   from skb, it will be processed later.
+			 */
+			if (err == 0)
+				return -1;
+			netlink_ack(skb, nlh, err);
+		} else if (nlh->nlmsg_flags&NLM_F_ACK)
+			netlink_ack(skb, nlh, 0);
+		skb_pull(skb, rlen);
+	}
+
+	return 0;
+}
+
+/*
+ *  rtnetlink input queue processing routine:
+ *	- try to acquire shared lock. If it is failed, defer processing.
+ *	- feed skbs to rtnetlink_rcv_skb, until it refuse a message,
+ *	  that will occur, when a dump started and/or acquisition of
+ *	  exclusive lock failed.
+ */
+
+static void rtnetlink_rcv(struct sock *sk, int len)
+{
+	struct sk_buff *skb;
+
+	if (rtnl_shlock_nowait())
+		return;
+
+	while ((skb = skb_dequeue(&sk->receive_queue)) != NULL) {
+		if (rtnetlink_rcv_skb(skb)) {
+			if (skb->len)
+				skb_queue_head(&sk->receive_queue, skb);
+			else
+				kfree_skb(skb);
+			break;
+		}
+		kfree_skb(skb);
+	}
+
+	rtnl_shunlock();
+}
+
+static struct rtnetlink_link link_rtnetlink_table[RTM_MAX-RTM_BASE+1] =
+{
+	{ NULL,			NULL,			},
+	{ NULL,			NULL,			},
+	{ NULL,			rtnetlink_dump_ifinfo,	},
+	{ NULL,			NULL,			},
+
+	{ NULL,			NULL,			},
+	{ NULL,			NULL,			},
+	{ NULL,			rtnetlink_dump_all,	},
+	{ NULL,			NULL,			},
+
+	{ NULL,			NULL,			},
+	{ NULL,			NULL,			},
+	{ NULL,			rtnetlink_dump_all,	},
+	{ NULL,			NULL,			},
+
+	{ neigh_add,		NULL,			},
+	{ neigh_delete,		NULL,			},
+	{ NULL,			neigh_dump_info,	},
+	{ NULL,			NULL,			},
+
+	{ NULL,			NULL,			},
+	{ NULL,			NULL,			},
+	{ NULL,			NULL,			},
+	{ NULL,			NULL,			},
+};
+
+
+static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+	struct device *dev = ptr;
+	switch (event) {
+	case NETDEV_UNREGISTER:
+		rtmsg_ifinfo(RTM_DELLINK, dev);
+		break;
+	default:
+		rtmsg_ifinfo(RTM_NEWLINK, dev);
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+struct notifier_block rtnetlink_dev_notifier = {
+	rtnetlink_event,
+	NULL,
+	0
+};
+
+
+__initfunc(void rtnetlink_init(void))
+{
+#ifdef RTNL_DEBUG
+	printk("Initializing RT netlink socket\n");
+#endif
+	rtnl = netlink_kernel_create(NETLINK_ROUTE, rtnetlink_rcv);
+	if (rtnl == NULL)
+		panic("rtnetlink_init: cannot initialize rtnetlink\n");
+	register_netdevice_notifier(&rtnetlink_dev_notifier);
+	rtnetlink_links[PF_UNSPEC] = link_rtnetlink_table;
+	rtnetlink_links[PF_PACKET] = link_rtnetlink_table;
+}
+
+
+
+#endif
diff --git a/pfinet/linux-src/net/core/scm.c b/pfinet/linux-src/net/core/scm.c
new file mode 100644
index 00000000..cdb5f3d0
--- /dev/null
+++ b/pfinet/linux-src/net/core/scm.c
@@ -0,0 +1,280 @@
+/* scm.c - Socket level control messages processing.
+ *
+ * Author:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *              Alignment and value checking mods by Craig Metz
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/signal.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/stat.h>
+#include <linux/socket.h>
+#include <linux/file.h>
+#include <linux/fcntl.h>
+#include <linux/net.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#include <linux/inet.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/rarp.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/scm.h>
+
+
+/*
+ *	Only allow a user to send credentials, that they could set with 
+ *	setu(g)id.
+ */
+
+static __inline__ int scm_check_creds(struct ucred *creds)
+{
+	if ((creds->pid == current->pid || capable(CAP_SYS_ADMIN)) &&
+	    ((creds->uid == current->uid || creds->uid == current->euid ||
+	      creds->uid == current->suid) || capable(CAP_SETUID)) &&
+	    ((creds->gid == current->gid || creds->gid == current->egid ||
+	      creds->gid == current->sgid) || capable(CAP_SETGID))) {
+	       return 0;
+	}
+	return -EPERM;
+}
+
+static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
+{
+	int *fdp = (int*)CMSG_DATA(cmsg);
+	struct scm_fp_list *fpl = *fplp;
+	struct file **fpp;
+	int i, num;
+
+	num = (cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr)))/sizeof(int);
+
+	if (num <= 0)
+		return 0;
+
+	if (num > SCM_MAX_FD)
+		return -EINVAL;
+
+	if (!fpl)
+	{
+		fpl = kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL);
+		if (!fpl)
+			return -ENOMEM;
+		*fplp = fpl;
+		fpl->count = 0;
+	}
+	fpp = &fpl->fp[fpl->count];
+
+	if (fpl->count + num > SCM_MAX_FD)
+		return -EINVAL;
+	
+	/*
+	 *	Verify the descriptors and increment the usage count.
+	 */
+	 
+	for (i=0; i< num; i++)
+	{
+		int fd = fdp[i];
+		struct file *file;
+
+		if (fd < 0 || !(file = fget(fd)))
+			return -EBADF;
+		*fpp++ = file;
+		fpl->count++;
+	}
+	return num;
+}
+
+void __scm_destroy(struct scm_cookie *scm)
+{
+	struct scm_fp_list *fpl = scm->fp;
+	int i;
+
+	if (fpl) {
+		scm->fp = NULL;
+		for (i=fpl->count-1; i>=0; i--)
+			fput(fpl->fp[i]);
+		kfree(fpl);
+	}
+}
+
+int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
+{
+	struct cmsghdr *cmsg;
+	int err;
+
+	for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg))
+	{
+		err = -EINVAL;
+
+		/* Verify that cmsg_len is at least sizeof(struct cmsghdr) */
+		/* The first check was omitted in <= 2.2.5. The reasoning was
+		   that parser checks cmsg_len in any case, so that
+		   additional check would be work duplication.
+		   But if cmsg_level is not SOL_SOCKET, we do not check 
+		   for too short ancillary data object at all! Oops.
+		   OK, let's add it...
+		 */
+		if (cmsg->cmsg_len < sizeof(struct cmsghdr) ||
+		    (unsigned long)(((char*)cmsg - (char*)msg->msg_control)
+				    + cmsg->cmsg_len) > msg->msg_controllen)
+			goto error;
+
+		if (cmsg->cmsg_level != SOL_SOCKET)
+			continue;
+
+		switch (cmsg->cmsg_type)
+		{
+		case SCM_RIGHTS:
+			err=scm_fp_copy(cmsg, &p->fp);
+			if (err<0)
+				goto error;
+			break;
+		case SCM_CREDENTIALS:
+			if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct ucred)))
+				goto error;
+			memcpy(&p->creds, CMSG_DATA(cmsg), sizeof(struct ucred));
+			err = scm_check_creds(&p->creds);
+			if (err)
+				goto error;
+			break;
+		default:
+			goto error;
+		}
+	}
+
+	if (p->fp && !p->fp->count)
+	{
+		kfree(p->fp);
+		p->fp = NULL;
+	}
+
+	err = -EINVAL; 
+	if (msg->msg_flags & MSG_CTLFLAGS)
+		goto error;
+
+	return 0;
+	
+error:
+	scm_destroy(p);
+	return err;
+}
+
+int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data)
+{
+	struct cmsghdr *cm = (struct cmsghdr*)msg->msg_control;
+	struct cmsghdr cmhdr;
+	int cmlen = CMSG_LEN(len);
+	int err;
+
+	if (cm==NULL || msg->msg_controllen < sizeof(*cm)) {
+		msg->msg_flags |= MSG_CTRUNC;
+		return 0; /* XXX: return error? check spec. */
+	}
+	if (msg->msg_controllen < cmlen) {
+		msg->msg_flags |= MSG_CTRUNC;
+		cmlen = msg->msg_controllen;
+	}
+	cmhdr.cmsg_level = level;
+	cmhdr.cmsg_type = type;
+	cmhdr.cmsg_len = cmlen;
+
+	err = -EFAULT;
+	if (copy_to_user(cm, &cmhdr, sizeof cmhdr))
+		goto out; 
+	if (copy_to_user(CMSG_DATA(cm), data, cmlen - sizeof(struct cmsghdr)))
+		goto out;
+	cmlen = CMSG_SPACE(len);
+	msg->msg_control += cmlen;
+	msg->msg_controllen -= cmlen;
+	err = 0;
+out:
+	return err;
+}
+
+void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
+{
+	struct cmsghdr *cm = (struct cmsghdr*)msg->msg_control;
+
+	int fdmax = (msg->msg_controllen - sizeof(struct cmsghdr))/sizeof(int);
+	int fdnum = scm->fp->count;
+	struct file **fp = scm->fp->fp;
+	int *cmfptr;
+	int err = 0, i;
+
+	if (fdnum < fdmax)
+		fdmax = fdnum;
+
+	for (i=0, cmfptr=(int*)CMSG_DATA(cm); i<fdmax; i++, cmfptr++)
+	{
+		int new_fd;
+		err = get_unused_fd();
+		if (err < 0)
+			break;
+		new_fd = err;
+		err = put_user(new_fd, cmfptr);
+		if (err) {
+			put_unused_fd(new_fd);
+			break;
+		}
+		/* Bump the usage count and install the file. */
+		fp[i]->f_count++;
+		current->files->fd[new_fd] = fp[i];
+	}
+
+	if (i > 0)
+	{
+		int cmlen = CMSG_LEN(i*sizeof(int));
+		if (!err)
+			err = put_user(SOL_SOCKET, &cm->cmsg_level);
+		if (!err)
+			err = put_user(SCM_RIGHTS, &cm->cmsg_type);
+		if (!err)
+			err = put_user(cmlen, &cm->cmsg_len);
+		if (!err) {
+			cmlen = CMSG_SPACE(i*sizeof(int));
+			msg->msg_control += cmlen;
+			msg->msg_controllen -= cmlen;
+		}
+	}
+	if (i < fdnum)
+		msg->msg_flags |= MSG_CTRUNC;
+
+	/*
+	 * All of the files that fit in the message have had their
+	 * usage counts incremented, so we just free the list.
+	 */
+	__scm_destroy(scm);
+}
+
+struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
+{
+	struct scm_fp_list *new_fpl;
+	int i;
+
+	if (!fpl)
+		return NULL;
+
+	new_fpl = kmalloc(sizeof(*fpl), GFP_KERNEL);
+	if (new_fpl) {
+		memcpy(new_fpl, fpl, sizeof(*fpl));
+
+		for (i=fpl->count-1; i>=0; i--)
+			fpl->fp[i]->f_count++;
+	}
+	return new_fpl;
+}
diff --git a/pfinet/linux-src/net/core/skbuff.c b/pfinet/linux-src/net/core/skbuff.c
new file mode 100644
index 00000000..b7636437
--- /dev/null
+++ b/pfinet/linux-src/net/core/skbuff.c
@@ -0,0 +1,385 @@
+/*
+ *	Routines having to do with the 'struct sk_buff' memory handlers.
+ *
+ *	Authors:	Alan Cox <iiitac@pyr.swan.ac.uk>
+ *			Florian La Roche <rzsfl@rz.uni-sb.de>
+ *
+ *	Version:	$Id: skbuff.c,v 1.55 1999/02/23 08:12:27 davem Exp $
+ *
+ *	Fixes:	
+ *		Alan Cox	:	Fixed the worst of the load balancer bugs.
+ *		Dave Platt	:	Interrupt stacking fix.
+ *	Richard Kooijman	:	Timestamp fixes.
+ *		Alan Cox	:	Changed buffer format.
+ *		Alan Cox	:	destructor hook for AF_UNIX etc.
+ *		Linus Torvalds	:	Better skb_clone.
+ *		Alan Cox	:	Added skb_copy.
+ *		Alan Cox	:	Added all the changed routines Linus
+ *					only put in the headers
+ *		Ray VanTassle	:	Fixed --skb->lock in free
+ *		Alan Cox	:	skb_copy copy arp field
+ *		Andi Kleen	:	slabified it.
+ *
+ *	NOTE:
+ *		The __skb_ routines should be called with interrupts 
+ *	disabled, or you better be *real* sure that the operation is atomic 
+ *	with respect to whatever list is being frobbed (e.g. via lock_sock()
+ *	or via disabling bottom half handlers, etc).
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+
+/*
+ *	The functions in this file will not compile correctly with gcc 2.4.x
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/malloc.h>
+#include <linux/netdevice.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/dst.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <net/sock.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+/*
+ * Skb list spinlock
+ */
+spinlock_t skb_queue_lock = SPIN_LOCK_UNLOCKED;
+
+/*
+ *	Resource tracking variables
+ */
+
+static atomic_t net_skbcount = ATOMIC_INIT(0);
+static atomic_t net_allocs = ATOMIC_INIT(0);
+static atomic_t net_fails  = ATOMIC_INIT(0);
+
+extern atomic_t ip_frag_mem;
+
+static kmem_cache_t *skbuff_head_cache;
+
+/*
+ *	Keep out-of-line to prevent kernel bloat.
+ *	__builtin_return_address is not used because it is not always
+ *	reliable. 
+ */
+
+void skb_over_panic(struct sk_buff *skb, int sz, void *here)
+{
+	panic("skput:over: %p:%d put:%d dev:%s", 
+		here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
+}
+
+void skb_under_panic(struct sk_buff *skb, int sz, void *here)
+{
+        panic("skput:under: %p:%d put:%d dev:%s",
+                here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
+}
+
+void show_net_buffers(void)
+{
+	printk("Networking buffers in use          : %u\n",
+	       atomic_read(&net_skbcount));
+	printk("Total network buffer allocations   : %u\n",
+	       atomic_read(&net_allocs));
+	printk("Total failed network buffer allocs : %u\n",
+	       atomic_read(&net_fails));
+#ifdef CONFIG_INET
+	printk("IP fragment buffer size            : %u\n",
+	       atomic_read(&ip_frag_mem));
+#endif	
+}
+
+/* 	Allocate a new skbuff. We do this ourselves so we can fill in a few
+ *	'private' fields and also do memory statistics to find all the
+ *	[BEEP] leaks.
+ * 
+ */
+
+struct sk_buff *alloc_skb(unsigned int size,int gfp_mask)
+{
+	struct sk_buff *skb;
+	u8 *data;
+
+	if (in_interrupt() && (gfp_mask & __GFP_WAIT)) {
+		static int count = 0;
+		if (++count < 5) {
+			printk(KERN_ERR "alloc_skb called nonatomically "
+			       "from interrupt %p\n", __builtin_return_address(0));
+		}
+		gfp_mask &= ~__GFP_WAIT;
+	}
+
+	/* Get the HEAD */
+	skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
+	if (skb == NULL) 
+		goto nohead;
+
+	/* Get the DATA. Size must match skb_add_mtu(). */
+	size = ((size + 15) & ~15); 
+	data = kmalloc(size + sizeof(atomic_t), gfp_mask);
+	if (data == NULL)
+		goto nodata;
+
+	/* Note that this counter is useless now - you can just look in the
+	 * skbuff_head entry in /proc/slabinfo. We keep it only for emergency
+	 * cases.
+	 */
+	atomic_inc(&net_allocs);
+
+	skb->truesize = size;
+
+	atomic_inc(&net_skbcount);
+
+	/* Load the data pointers. */
+	skb->head = data;
+	skb->data = data;
+	skb->tail = data;
+	skb->end = data + size;
+
+	/* Set up other state */
+	skb->len = 0;
+	skb->is_clone = 0;
+	skb->cloned = 0;
+
+	atomic_set(&skb->users, 1); 
+	atomic_set(skb_datarefp(skb), 1);
+	return skb;
+
+nodata:
+	kmem_cache_free(skbuff_head_cache, skb);
+nohead:
+	atomic_inc(&net_fails);
+	return NULL;
+}
+
+
+/*
+ *	Slab constructor for a skb head. 
+ */ 
+static inline void skb_headerinit(void *p, kmem_cache_t *cache, 
+				  unsigned long flags)
+{
+	struct sk_buff *skb = p;
+
+	skb->destructor = NULL;
+	skb->pkt_type = PACKET_HOST;	/* Default type */
+	skb->pkt_bridged = 0;		/* Not bridged */
+	skb->prev = skb->next = NULL;
+	skb->list = NULL;
+	skb->sk = NULL;
+	skb->stamp.tv_sec=0;	/* No idea about time */
+	skb->ip_summed = 0;
+	skb->security = 0;	/* By default packets are insecure */
+	skb->dst = NULL;
+#ifdef CONFIG_IP_FIREWALL
+        skb->fwmark = 0;
+#endif
+	memset(skb->cb, 0, sizeof(skb->cb));
+	skb->priority = 0;
+}
+
+/*
+ *	Free an skbuff by memory without cleaning the state. 
+ */
+void kfree_skbmem(struct sk_buff *skb)
+{
+	if (!skb->cloned || atomic_dec_and_test(skb_datarefp(skb)))  
+		kfree(skb->head);
+
+	kmem_cache_free(skbuff_head_cache, skb);
+	atomic_dec(&net_skbcount);
+}
+
+/*
+ *	Free an sk_buff. Release anything attached to the buffer. Clean the state.
+ */
+
+void __kfree_skb(struct sk_buff *skb)
+{
+	if (skb->list)
+	 	printk(KERN_WARNING "Warning: kfree_skb passed an skb still "
+		       "on a list (from %p).\n", __builtin_return_address(0));
+
+	dst_release(skb->dst);
+	if(skb->destructor)
+		skb->destructor(skb);
+	skb_headerinit(skb, NULL, 0);  /* clean state */
+	kfree_skbmem(skb);
+}
+
+/*
+ *	Duplicate an sk_buff. The new one is not owned by a socket.
+ */
+
+struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
+{
+	struct sk_buff *n;
+	
+	n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
+	if (!n)
+		return NULL;
+
+	memcpy(n, skb, sizeof(*n));
+	atomic_inc(skb_datarefp(skb));
+	skb->cloned = 1;
+       
+	atomic_inc(&net_allocs);
+	atomic_inc(&net_skbcount);
+	dst_clone(n->dst);
+	n->cloned = 1;
+	n->next = n->prev = NULL;
+	n->list = NULL;
+	n->sk = NULL;
+	n->is_clone = 1;
+	atomic_set(&n->users, 1);
+	n->destructor = NULL;
+	return n;
+}
+
+/*
+ *	This is slower, and copies the whole data area 
+ */
+ 
+struct sk_buff *skb_copy(struct sk_buff *skb, int gfp_mask)
+{
+	struct sk_buff *n;
+	unsigned long offset;
+
+	/*
+	 *	Allocate the copy buffer
+	 */
+	 
+	n=alloc_skb(skb->end - skb->head, gfp_mask);
+	if(n==NULL)
+		return NULL;
+
+	/*
+	 *	Shift between the two data areas in bytes
+	 */
+	 
+	offset=n->head-skb->head;
+
+	/* Set the data pointer */
+	skb_reserve(n,skb->data-skb->head);
+	/* Set the tail pointer and length */
+	skb_put(n,skb->len);
+	/* Copy the bytes */
+	memcpy(n->head,skb->head,skb->end-skb->head);
+	n->csum = skb->csum;
+	n->list=NULL;
+	n->sk=NULL;
+	n->dev=skb->dev;
+	n->priority=skb->priority;
+	n->protocol=skb->protocol;
+	n->dst=dst_clone(skb->dst);
+	n->h.raw=skb->h.raw+offset;
+	n->nh.raw=skb->nh.raw+offset;
+	n->mac.raw=skb->mac.raw+offset;
+	memcpy(n->cb, skb->cb, sizeof(skb->cb));
+	n->used=skb->used;
+	n->is_clone=0;
+	atomic_set(&n->users, 1);
+	n->pkt_type=skb->pkt_type;
+	n->stamp=skb->stamp;
+	n->destructor = NULL;
+	n->security=skb->security;
+#ifdef CONFIG_IP_FIREWALL
+        n->fwmark = skb->fwmark;
+#endif
+	return n;
+}
+
+struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, int newheadroom)
+{
+	struct sk_buff *n;
+	unsigned long offset;
+	int headroom = skb_headroom(skb);
+
+	/*
+	 *	Allocate the copy buffer
+	 */
+ 	 
+	n=alloc_skb(skb->truesize+newheadroom-headroom, GFP_ATOMIC);
+	if(n==NULL)
+		return NULL;
+
+	skb_reserve(n,newheadroom);
+
+	/*
+	 *	Shift between the two data areas in bytes
+	 */
+	 
+	offset=n->data-skb->data;
+
+	/* Set the tail pointer and length */
+	skb_put(n,skb->len);
+	/* Copy the bytes */
+	memcpy(n->data,skb->data,skb->len);
+	n->list=NULL;
+	n->sk=NULL;
+	n->priority=skb->priority;
+	n->protocol=skb->protocol;
+	n->dev=skb->dev;
+	n->dst=dst_clone(skb->dst);
+	n->h.raw=skb->h.raw+offset;
+	n->nh.raw=skb->nh.raw+offset;
+	n->mac.raw=skb->mac.raw+offset;
+	memcpy(n->cb, skb->cb, sizeof(skb->cb));
+	n->used=skb->used;
+	n->is_clone=0;
+	atomic_set(&n->users, 1);
+	n->pkt_type=skb->pkt_type;
+	n->stamp=skb->stamp;
+	n->destructor = NULL;
+	n->security=skb->security;
+#ifdef CONFIG_IP_FIREWALL
+        n->fwmark = skb->fwmark;
+#endif
+
+	return n;
+}
+
+#if 0
+/* 
+ * 	Tune the memory allocator for a new MTU size.
+ */
+void skb_add_mtu(int mtu)
+{
+	/* Must match allocation in alloc_skb */
+	mtu = ((mtu + 15) & ~15) + sizeof(atomic_t);
+
+	kmem_add_cache_size(mtu);
+}
+#endif
+
+void __init skb_init(void)
+{
+	skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
+					      sizeof(struct sk_buff),
+					      0,
+					      SLAB_HWCACHE_ALIGN,
+					      skb_headerinit, NULL);
+	if (!skbuff_head_cache)
+		panic("cannot create skbuff cache");
+}
diff --git a/pfinet/linux-src/net/core/sock.c b/pfinet/linux-src/net/core/sock.c
new file mode 100644
index 00000000..e0eb41a0
--- /dev/null
+++ b/pfinet/linux-src/net/core/sock.c
@@ -0,0 +1,1051 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		Generic socket support routines. Memory allocators, socket lock/release
+ *		handler for protocols to use and generic option handler.
+ *
+ *
+ * Version:	$Id: sock.c,v 1.80 1999/05/08 03:04:34 davem Exp $
+ *
+ * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
+ *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *		Florian La Roche, <flla@stud.uni-sb.de>
+ *		Alan Cox, <A.Cox@swansea.ac.uk>
+ *
+ * Fixes:
+ *		Alan Cox	: 	Numerous verify_area() problems
+ *		Alan Cox	:	Connecting on a connecting socket
+ *					now returns an error for tcp.
+ *		Alan Cox	:	sock->protocol is set correctly.
+ *					and is not sometimes left as 0.
+ *		Alan Cox	:	connect handles icmp errors on a
+ *					connect properly. Unfortunately there
+ *					is a restart syscall nasty there. I
+ *					can't match BSD without hacking the C
+ *					library. Ideas urgently sought!
+ *		Alan Cox	:	Disallow bind() to addresses that are
+ *					not ours - especially broadcast ones!!
+ *		Alan Cox	:	Socket 1024 _IS_ ok for users. (fencepost)
+ *		Alan Cox	:	sock_wfree/sock_rfree don't destroy sockets,
+ *					instead they leave that for the DESTROY timer.
+ *		Alan Cox	:	Clean up error flag in accept
+ *		Alan Cox	:	TCP ack handling is buggy, the DESTROY timer
+ *					was buggy. Put a remove_sock() in the handler
+ *					for memory when we hit 0. Also altered the timer
+ *					code. The ACK stuff can wait and needs major 
+ *					TCP layer surgery.
+ *		Alan Cox	:	Fixed TCP ack bug, removed remove sock
+ *					and fixed timer/inet_bh race.
+ *		Alan Cox	:	Added zapped flag for TCP
+ *		Alan Cox	:	Move kfree_skb into skbuff.c and tidied up surplus code
+ *		Alan Cox	:	for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
+ *		Alan Cox	:	kfree_s calls now are kfree_skbmem so we can track skb resources
+ *		Alan Cox	:	Supports socket option broadcast now as does udp. Packet and raw need fixing.
+ *		Alan Cox	:	Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
+ *		Rick Sladkey	:	Relaxed UDP rules for matching packets.
+ *		C.E.Hawkins	:	IFF_PROMISC/SIOCGHWADDR support
+ *	Pauline Middelink	:	identd support
+ *		Alan Cox	:	Fixed connect() taking signals I think.
+ *		Alan Cox	:	SO_LINGER supported
+ *		Alan Cox	:	Error reporting fixes
+ *		Anonymous	:	inet_create tidied up (sk->reuse setting)
+ *		Alan Cox	:	inet sockets don't set sk->type!
+ *		Alan Cox	:	Split socket option code
+ *		Alan Cox	:	Callbacks
+ *		Alan Cox	:	Nagle flag for Charles & Johannes stuff
+ *		Alex		:	Removed restriction on inet fioctl
+ *		Alan Cox	:	Splitting INET from NET core
+ *		Alan Cox	:	Fixed bogus SO_TYPE handling in getsockopt()
+ *		Adam Caldwell	:	Missing return in SO_DONTROUTE/SO_DEBUG code
+ *		Alan Cox	:	Split IP from generic code
+ *		Alan Cox	:	New kfree_skbmem()
+ *		Alan Cox	:	Make SO_DEBUG superuser only.
+ *		Alan Cox	:	Allow anyone to clear SO_DEBUG
+ *					(compatibility fix)
+ *		Alan Cox	:	Added optimistic memory grabbing for AF_UNIX throughput.
+ *		Alan Cox	:	Allocator for a socket is settable.
+ *		Alan Cox	:	SO_ERROR includes soft errors.
+ *		Alan Cox	:	Allow NULL arguments on some SO_ opts
+ *		Alan Cox	: 	Generic socket allocation to make hooks
+ *					easier (suggested by Craig Metz).
+ *		Michael Pall	:	SO_ERROR returns positive errno again
+ *              Steve Whitehouse:       Added default destructor to free
+ *                                      protocol private data.
+ *              Steve Whitehouse:       Added various other default routines
+ *                                      common to several socket families.
+ *              Chris Evans     :       Call suser() check last on F_SETOWN
+ *		Jay Schulist	:	Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
+ *		Andi Kleen	:	Add sock_kmalloc()/sock_kfree_s()
+ *		Andi Kleen	:	Fix write_space callback
+ *
+ * To Fix:
+ *
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/poll.h>
+#include <linux/init.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/arp.h>
+#include <net/rarp.h>
+#include <net/route.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/raw.h>
+#include <net/icmp.h>
+#include <linux/ipsec.h>
+
+#ifdef CONFIG_FILTER
+#include <linux/filter.h>
+#endif
+
+#define min(a,b)	((a)<(b)?(a):(b))
+
+/* Run time adjustable parameters. */
+__u32 sysctl_wmem_max = SK_WMEM_MAX;
+__u32 sysctl_rmem_max = SK_RMEM_MAX;
+__u32 sysctl_wmem_default = SK_WMEM_MAX;
+__u32 sysctl_rmem_default = SK_RMEM_MAX;
+
+/* Maximal space eaten by iovec or ancilliary data plus some space */
+int sysctl_optmem_max = sizeof(unsigned long)*(2*UIO_MAXIOV + 512);
+
+/*
+ *	This is meant for all protocols to use and covers goings on
+ *	at the socket level. Everything here is generic.
+ */
+
+int sock_setsockopt(struct socket *sock, int level, int optname,
+		    char *optval, int optlen)
+{
+	struct sock *sk=sock->sk;
+#ifdef CONFIG_FILTER
+	struct sk_filter *filter;
+#endif
+	int val;
+	int valbool;
+	int err;
+	struct linger ling;
+	int ret = 0;
+	
+	/*
+	 *	Options without arguments
+	 */
+
+#ifdef SO_DONTLINGER		/* Compatibility item... */
+	switch(optname)
+	{
+		case SO_DONTLINGER:
+			sk->linger=0;
+			return 0;
+	}
+#endif	
+		
+  	if(optlen<sizeof(int))
+  		return(-EINVAL);
+  	
+	err = get_user(val, (int *)optval);
+	if (err)
+		return err;
+	
+  	valbool = val?1:0;
+  	
+  	switch(optname) 
+  	{
+		case SO_DEBUG:	
+			if(val && !capable(CAP_NET_ADMIN))
+			{
+				ret = -EACCES;
+			}
+			else
+				sk->debug=valbool;
+			break;
+		case SO_REUSEADDR:
+			sk->reuse = valbool;
+			break;
+		case SO_TYPE:
+		case SO_ERROR:
+			ret = -ENOPROTOOPT;
+		  	break;
+		case SO_DONTROUTE:
+			sk->localroute=valbool;
+			break;
+		case SO_BROADCAST:
+			sk->broadcast=valbool;
+			break;
+		case SO_SNDBUF:
+			/* Don't error on this BSD doesn't and if you think
+			   about it this is right. Otherwise apps have to
+			   play 'guess the biggest size' games. RCVBUF/SNDBUF
+			   are treated in BSD as hints */
+			   
+			if (val > sysctl_wmem_max)
+				val = sysctl_wmem_max;
+
+			sk->sndbuf = max(val*2,2048);
+
+			/*
+			 *	Wake up sending tasks if we
+			 *	upped the value.
+			 */
+			sk->write_space(sk);
+			break;
+
+		case SO_RCVBUF:
+			/* Don't error on this BSD doesn't and if you think
+			   about it this is right. Otherwise apps have to
+			   play 'guess the biggest size' games. RCVBUF/SNDBUF
+			   are treated in BSD as hints */
+			  
+			if (val > sysctl_rmem_max)
+				val = sysctl_rmem_max;
+
+			/* FIXME: is this lower bound the right one? */
+			sk->rcvbuf = max(val*2,256);
+			break;
+
+		case SO_KEEPALIVE:
+#ifdef CONFIG_INET
+			if (sk->protocol == IPPROTO_TCP)
+			{
+				tcp_set_keepalive(sk, valbool);
+			}
+#endif
+			sk->keepopen = valbool;
+			break;
+
+	 	case SO_OOBINLINE:
+			sk->urginline = valbool;
+			break;
+
+	 	case SO_NO_CHECK:
+			sk->no_check = valbool;
+			break;
+
+		case SO_PRIORITY:
+			if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN)) 
+				sk->priority = val;
+			else
+				return(-EPERM);
+			break;
+
+		case SO_LINGER:
+			if(optlen<sizeof(ling))
+				return -EINVAL;	/* 1003.1g */
+			err = copy_from_user(&ling,optval,sizeof(ling));
+			if (err)
+			{
+				ret = -EFAULT;
+				break;
+			}
+			if(ling.l_onoff==0)
+				sk->linger=0;
+			else
+			{
+				sk->lingertime=ling.l_linger;
+				sk->linger=1;
+			}
+			break;
+
+		case SO_BSDCOMPAT:
+			sk->bsdism = valbool;
+			break;
+
+		case SO_PASSCRED:
+			sock->passcred = valbool;
+			break;
+			
+			
+#ifdef CONFIG_NETDEVICES
+		case SO_BINDTODEVICE:
+		{
+			char devname[IFNAMSIZ]; 
+
+			/* Sorry... */ 
+			if (!capable(CAP_NET_RAW)) 
+				return -EPERM; 
+
+			/* Bind this socket to a particular device like "eth0",
+			 * as specified in the passed interface name. If the
+			 * name is "" or the option length is zero the socket 
+			 * is not bound. 
+			 */ 
+
+			if (!valbool) {
+				sk->bound_dev_if = 0;
+			} else {
+				if (optlen > IFNAMSIZ) 
+					optlen = IFNAMSIZ; 
+				if (copy_from_user(devname, optval, optlen))
+					return -EFAULT;
+
+				/* Remove any cached route for this socket. */
+				lock_sock(sk);
+				dst_release(xchg(&sk->dst_cache, NULL));
+				release_sock(sk);
+
+				if (devname[0] == '\0') {
+					sk->bound_dev_if = 0;
+				} else {
+					struct device *dev = dev_get(devname);
+					if (!dev)
+						return -EINVAL;
+					sk->bound_dev_if = dev->ifindex;
+				}
+				return 0;
+			}
+		}
+#endif
+
+
+#ifdef CONFIG_FILTER
+		case SO_ATTACH_FILTER:
+			ret = -EINVAL;
+			if (optlen == sizeof(struct sock_fprog)) {
+				struct sock_fprog fprog;
+
+				ret = -EFAULT;
+				if (copy_from_user(&fprog, optval, sizeof(fprog)))
+					break;
+
+				ret = sk_attach_filter(&fprog, sk);
+			}
+			break;
+
+		case SO_DETACH_FILTER:
+			filter = sk->filter;
+                        if(filter) {
+				sk->filter = NULL;
+				synchronize_bh();
+				sk_filter_release(sk, filter);
+				return 0;
+			}
+			return -ENOENT;
+#endif
+		/* We implement the SO_SNDLOWAT etc to
+		   not be settable (1003.1g 5.3) */
+		default:
+		  	return(-ENOPROTOOPT);
+  	}
+	return ret;
+}
+
+
+int sock_getsockopt(struct socket *sock, int level, int optname,
+		    char *optval, int *optlen)
+{
+	struct sock *sk = sock->sk;
+	
+	union
+	{
+  		int val;
+  		struct linger ling;
+		struct timeval tm;
+	} v;
+	
+	int lv=sizeof(int),len;
+  	
+  	if(get_user(len,optlen))
+  		return -EFAULT;
+
+  	switch(optname) 
+  	{
+		case SO_DEBUG:		
+			v.val = sk->debug;
+			break;
+		
+		case SO_DONTROUTE:
+			v.val = sk->localroute;
+			break;
+		
+		case SO_BROADCAST:
+			v.val= sk->broadcast;
+			break;
+
+		case SO_SNDBUF:
+			v.val=sk->sndbuf;
+			break;
+		
+		case SO_RCVBUF:
+			v.val =sk->rcvbuf;
+			break;
+
+		case SO_REUSEADDR:
+			v.val = sk->reuse;
+			break;
+
+		case SO_KEEPALIVE:
+			v.val = sk->keepopen;
+			break;
+
+		case SO_TYPE:
+			v.val = sk->type;		  		
+			break;
+
+		case SO_ERROR:
+			v.val = -sock_error(sk);
+			if(v.val==0)
+				v.val=xchg(&sk->err_soft,0);
+			break;
+
+		case SO_OOBINLINE:
+			v.val = sk->urginline;
+			break;
+	
+		case SO_NO_CHECK:
+			v.val = sk->no_check;
+			break;
+
+		case SO_PRIORITY:
+			v.val = sk->priority;
+			break;
+		
+		case SO_LINGER:	
+			lv=sizeof(v.ling);
+			v.ling.l_onoff=sk->linger;
+ 			v.ling.l_linger=sk->lingertime;
+			break;
+					
+		case SO_BSDCOMPAT:
+			v.val = sk->bsdism;
+			break;
+			
+		case SO_RCVTIMEO:
+		case SO_SNDTIMEO:
+			lv=sizeof(struct timeval);
+			v.tm.tv_sec=0;
+			v.tm.tv_usec=0;
+			break;
+
+		case SO_RCVLOWAT:
+		case SO_SNDLOWAT:
+			v.val=1;
+			break; 
+
+		case SO_PASSCRED:
+			v.val = sock->passcred;
+			break;
+
+		case SO_PEERCRED:
+			lv=sizeof(sk->peercred);
+			len=min(len, lv);
+			if(copy_to_user((void*)optval, &sk->peercred, len))
+				return -EFAULT;
+			goto lenout;
+			
+		default:
+			return(-ENOPROTOOPT);
+	}
+	len=min(len,lv);
+	if(copy_to_user(optval,&v,len))
+		return -EFAULT;
+lenout:
+  	if(put_user(len, optlen))
+  		return -EFAULT;
+  	return 0;
+}
+
+static kmem_cache_t *sk_cachep;
+
+/*
+ *	All socket objects are allocated here. This is for future
+ *	usage.
+ */
+ 
+struct sock *sk_alloc(int family, int priority, int zero_it)
+{
+	struct sock *sk = kmem_cache_alloc(sk_cachep, priority);
+
+	if(sk) {
+		if (zero_it) 
+			memset(sk, 0, sizeof(struct sock));
+		sk->family = family;
+	}
+
+	return sk;
+}
+
+void sk_free(struct sock *sk)
+{
+#ifdef CONFIG_FILTER
+	struct sk_filter *filter;
+#endif
+	if (sk->destruct)
+		sk->destruct(sk);
+
+#ifdef CONFIG_FILTER
+	filter = sk->filter;
+	if (filter) {
+		sk_filter_release(sk, filter);
+		sk->filter = NULL;
+	}
+#endif
+
+	if (atomic_read(&sk->omem_alloc))
+		printk(KERN_DEBUG "sk_free: optmem leakage (%d bytes) detected.\n", atomic_read(&sk->omem_alloc));
+
+	kmem_cache_free(sk_cachep, sk);
+}
+
+void __init sk_init(void)
+{
+	sk_cachep = kmem_cache_create("sock", sizeof(struct sock), 0,
+				      SLAB_HWCACHE_ALIGN, 0, 0);
+
+}
+
+/*
+ *	Simple resource managers for sockets.
+ */
+
+
+/* 
+ * Write buffer destructor automatically called from kfree_skb. 
+ */
+void sock_wfree(struct sk_buff *skb)
+{
+	struct sock *sk = skb->sk;
+
+	/* In case it might be waiting for more memory. */
+	atomic_sub(skb->truesize, &sk->wmem_alloc);
+	sk->write_space(sk);
+}
+
+/* 
+ * Read buffer destructor automatically called from kfree_skb. 
+ */
+void sock_rfree(struct sk_buff *skb)
+{
+	struct sock *sk = skb->sk;
+
+	atomic_sub(skb->truesize, &sk->rmem_alloc);
+}
+
+
+/*
+ * Allocate a skb from the socket's send buffer.
+ */
+struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, int priority)
+{
+	if (force || atomic_read(&sk->wmem_alloc) < sk->sndbuf) {
+		struct sk_buff * skb = alloc_skb(size, priority);
+		if (skb) {
+			atomic_add(skb->truesize, &sk->wmem_alloc);
+			skb->destructor = sock_wfree;
+			skb->sk = sk;
+			return skb;
+		}
+	}
+	return NULL;
+}
+
+/*
+ * Allocate a skb from the socket's receive buffer.
+ */ 
+struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, int priority)
+{
+	if (force || atomic_read(&sk->rmem_alloc) < sk->rcvbuf) {
+		struct sk_buff *skb = alloc_skb(size, priority);
+		if (skb) {
+			atomic_add(skb->truesize, &sk->rmem_alloc);
+			skb->destructor = sock_rfree;
+			skb->sk = sk;
+			return skb;
+		}
+	}
+	return NULL;
+}
+
+/* 
+ * Allocate a memory block from the socket's option memory buffer.
+ */ 
+void *sock_kmalloc(struct sock *sk, int size, int priority)
+{
+	if (atomic_read(&sk->omem_alloc)+size < sysctl_optmem_max) {
+		void *mem;
+		/* First do the add, to avoid the race if kmalloc
+ 		 * might sleep.
+		 */
+		atomic_add(size, &sk->omem_alloc);
+		mem = kmalloc(size, priority);
+		if (mem)
+			return mem;
+		atomic_sub(size, &sk->omem_alloc);
+	}
+	return NULL;
+}
+
+/*
+ * Free an option memory block.
+ */
+void sock_kfree_s(struct sock *sk, void *mem, int size)
+{
+	kfree_s(mem, size); 
+	atomic_sub(size, &sk->omem_alloc);
+}
+
+/* FIXME: this is insane. We are trying suppose to be controlling how
+ * how much space we have for data bytes, not packet headers.
+ * This really points out that we need a better system for doing the
+ * receive buffer. -- erics
+ * WARNING: This is currently ONLY used in tcp. If you need it else where
+ * this will probably not be what you want. Possibly these two routines
+ * should move over to the ipv4 directory.
+ */
+unsigned long sock_rspace(struct sock *sk)
+{
+	int amt = 0;
+
+	if (sk != NULL) {
+		/* This used to have some bizarre complications that
+		 * to attempt to reserve some amount of space. This doesn't
+	 	 * make sense, since the number returned here does not
+		 * actually reflect allocated space, but rather the amount
+		 * of space we committed to. We gamble that we won't
+		 * run out of memory, and returning a smaller number does
+		 * not change the gamble. If we lose the gamble tcp still
+		 * works, it may just slow down for retransmissions.
+		 */
+		amt = sk->rcvbuf - atomic_read(&sk->rmem_alloc);
+		if (amt < 0) 
+			amt = 0;
+	}
+	return amt;
+}
+
+
+/* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
+   I think, these locks should be removed for datagram sockets.
+ */
+static void sock_wait_for_wmem(struct sock * sk)
+{
+	struct wait_queue wait = { current, NULL };
+
+	sk->socket->flags &= ~SO_NOSPACE;
+	add_wait_queue(sk->sleep, &wait);
+	for (;;) {
+		if (signal_pending(current))
+			break;
+		current->state = TASK_INTERRUPTIBLE;
+		if (atomic_read(&sk->wmem_alloc) < sk->sndbuf)
+			break;
+		if (sk->shutdown & SEND_SHUTDOWN)
+			break;
+		if (sk->err)
+			break;
+		schedule();
+	}
+	current->state = TASK_RUNNING;
+	remove_wait_queue(sk->sleep, &wait);
+}
+
+
+/*
+ *	Generic send/receive buffer handlers
+ */
+
+struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, 
+			unsigned long fallback, int noblock, int *errcode)
+{
+	int err;
+	struct sk_buff *skb;
+
+	while (1) {
+		unsigned long try_size = size;
+
+		err = sock_error(sk);
+		if (err != 0)
+			goto failure;
+
+		/*
+		 *	We should send SIGPIPE in these cases according to
+		 *	1003.1g draft 6.4. If we (the user) did a shutdown()
+		 *	call however we should not. 
+		 *
+		 *	Note: This routine isnt just used for datagrams and
+		 *	anyway some datagram protocols have a notion of
+		 *	close down.
+		 */
+
+		err = -EPIPE;
+		if (sk->shutdown&SEND_SHUTDOWN)
+			goto failure;
+
+		if (fallback) {
+			/* The buffer get won't block, or use the atomic queue.
+			 * It does produce annoying no free page messages still.
+			 */
+			skb = sock_wmalloc(sk, size, 0, GFP_BUFFER);
+			if (skb)
+				break;
+			try_size = fallback;
+		}
+		skb = sock_wmalloc(sk, try_size, 0, sk->allocation);
+		if (skb)
+			break;
+
+		/*
+		 *	This means we have too many buffers for this socket already.
+		 */
+
+		sk->socket->flags |= SO_NOSPACE;
+		err = -EAGAIN;
+		if (noblock)
+			goto failure;
+		err = -ERESTARTSYS;
+		if (signal_pending(current))
+			goto failure;
+		sock_wait_for_wmem(sk);
+	}
+
+	return skb;
+
+failure:
+	*errcode = err;
+	return NULL;
+}
+
+
+void __release_sock(struct sock *sk)
+{
+#ifdef CONFIG_INET
+	if (!sk->prot || !sk->backlog_rcv)
+		return;
+		
+	/* See if we have any packets built up. */
+	start_bh_atomic();
+	while (!skb_queue_empty(&sk->back_log)) {
+		struct sk_buff * skb = sk->back_log.next;
+		__skb_unlink(skb, &sk->back_log);
+		sk->backlog_rcv(sk, skb);
+	}
+	end_bh_atomic();
+#endif  
+}
+
+
+/*
+ *	Generic socket manager library. Most simpler socket families
+ *	use this to manage their socket lists. At some point we should
+ *	hash these. By making this generic we get the lot hashed for free.
+ */
+ 
+void sklist_remove_socket(struct sock **list, struct sock *sk)
+{
+	struct sock *s;
+
+	start_bh_atomic();
+
+	s= *list;
+	if(s==sk)
+	{
+		*list = s->next;
+		end_bh_atomic();
+		return;
+	}
+	while(s && s->next)
+	{
+		if(s->next==sk)
+		{
+			s->next=sk->next;
+			break;
+		}
+		s=s->next;
+	}
+	end_bh_atomic();
+}
+
+void sklist_insert_socket(struct sock **list, struct sock *sk)
+{
+	start_bh_atomic();
+	sk->next= *list;
+	*list=sk;
+	end_bh_atomic();
+}
+
+/*
+ *	This is only called from user mode. Thus it protects itself against
+ *	interrupt users but doesn't worry about being called during work.
+ *	Once it is removed from the queue no interrupt or bottom half will
+ *	touch it and we are (fairly 8-) ) safe.
+ */
+
+void sklist_destroy_socket(struct sock **list, struct sock *sk);
+
+/*
+ *	Handler for deferred kills.
+ */
+
+static void sklist_destroy_timer(unsigned long data)
+{
+	struct sock *sk=(struct sock *)data;
+	sklist_destroy_socket(NULL,sk);
+}
+
+/*
+ *	Destroy a socket. We pass NULL for a list if we know the
+ *	socket is not on a list.
+ */
+ 
+void sklist_destroy_socket(struct sock **list,struct sock *sk)
+{
+	struct sk_buff *skb;
+	if(list)
+		sklist_remove_socket(list, sk);
+
+	while((skb=skb_dequeue(&sk->receive_queue))!=NULL)
+	{
+		kfree_skb(skb);
+	}
+
+	if(atomic_read(&sk->wmem_alloc) == 0 &&
+	   atomic_read(&sk->rmem_alloc) == 0 &&
+	   sk->dead)
+	{
+		sk_free(sk);
+	}
+	else
+	{
+		/*
+		 *	Someone is using our buffers still.. defer
+		 */
+		init_timer(&sk->timer);
+		sk->timer.expires=jiffies+SOCK_DESTROY_TIME;
+		sk->timer.function=sklist_destroy_timer;
+		sk->timer.data = (unsigned long)sk;
+		add_timer(&sk->timer);
+	}
+}
+
+/*
+ * Set of default routines for initialising struct proto_ops when
+ * the protocol does not support a particular function. In certain
+ * cases where it makes no sense for a protocol to have a "do nothing"
+ * function, some default processing is provided.
+ */
+
+int sock_no_dup(struct socket *newsock, struct socket *oldsock)
+{
+	struct sock *sk = oldsock->sk;
+
+	return net_families[sk->family]->create(newsock, sk->protocol);
+}
+
+int sock_no_release(struct socket *sock, struct socket *peersock)
+{
+	return 0;
+}
+
+int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
+{
+	return -EOPNOTSUPP;
+}
+
+int sock_no_connect(struct socket *sock, struct sockaddr *saddr, 
+		    int len, int flags)
+{
+	return -EOPNOTSUPP;
+}
+
+int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
+{
+	return -EOPNOTSUPP;
+}
+
+int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
+{
+	return -EOPNOTSUPP;
+}
+
+int sock_no_getname(struct socket *sock, struct sockaddr *saddr, 
+		    int *len, int peer)
+{
+	return -EOPNOTSUPP;
+}
+
+unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt)
+{
+	return 0;
+}
+
+int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+	return -EOPNOTSUPP;
+}
+
+int sock_no_listen(struct socket *sock, int backlog)
+{
+	return -EOPNOTSUPP;
+}
+
+int sock_no_shutdown(struct socket *sock, int how)
+{
+	return -EOPNOTSUPP;
+}
+
+int sock_no_setsockopt(struct socket *sock, int level, int optname,
+		    char *optval, int optlen)
+{
+	return -EOPNOTSUPP;
+}
+
+int sock_no_getsockopt(struct socket *sock, int level, int optname,
+		    char *optval, int *optlen)
+{
+	return -EOPNOTSUPP;
+}
+
+/* 
+ * Note: if you add something that sleeps here then change sock_fcntl()
+ *       to do proper fd locking.
+ */
+int sock_no_fcntl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+	struct sock *sk = sock->sk;
+
+	switch(cmd)
+	{
+		case F_SETOWN:
+			/*
+			 * This is a little restrictive, but it's the only
+			 * way to make sure that you can't send a sigurg to
+			 * another process.
+			 */
+			if (current->pgrp != -arg &&
+				current->pid != arg &&
+				!capable(CAP_KILL)) return(-EPERM);
+			sk->proc = arg;
+			return(0);
+		case F_GETOWN:
+			return(sk->proc);
+		default:
+			return(-EINVAL);
+	}
+}
+
+int sock_no_sendmsg(struct socket *sock, struct msghdr *m, int flags,
+		    struct scm_cookie *scm)
+{
+	return -EOPNOTSUPP;
+}
+
+int sock_no_recvmsg(struct socket *sock, struct msghdr *m, int flags,
+		    struct scm_cookie *scm)
+{
+	return -EOPNOTSUPP;
+}
+
+
+
+/*
+ *	Default Socket Callbacks
+ */
+
+void sock_def_wakeup(struct sock *sk)
+{
+	if(!sk->dead)
+		wake_up_interruptible(sk->sleep);
+}
+
+void sock_def_error_report(struct sock *sk)
+{
+	if (!sk->dead) {
+		wake_up_interruptible(sk->sleep);
+		sock_wake_async(sk->socket,0); 
+	}
+}
+
+void sock_def_readable(struct sock *sk, int len)
+{
+	if(!sk->dead) {
+		wake_up_interruptible(sk->sleep);
+		sock_wake_async(sk->socket,1);
+	}
+}
+
+void sock_def_write_space(struct sock *sk)
+{
+	/* Do not wake up a writer until he can make "significant"
+	 * progress.  --DaveM
+	 */
+	if(!sk->dead &&
+	   ((atomic_read(&sk->wmem_alloc) << 1) <= sk->sndbuf)) {
+		wake_up_interruptible(sk->sleep);
+
+		/* Should agree with poll, otherwise some programs break */
+		if (sock_writeable(sk))
+			sock_wake_async(sk->socket, 2);
+	}
+}
+
+void sock_def_destruct(struct sock *sk)
+{
+	if (sk->protinfo.destruct_hook)
+		kfree(sk->protinfo.destruct_hook);
+}
+
+void sock_init_data(struct socket *sock, struct sock *sk)
+{
+	skb_queue_head_init(&sk->receive_queue);
+	skb_queue_head_init(&sk->write_queue);
+	skb_queue_head_init(&sk->back_log);
+	skb_queue_head_init(&sk->error_queue);
+	
+	init_timer(&sk->timer);
+	
+	sk->allocation	=	GFP_KERNEL;
+	sk->rcvbuf	=	sysctl_rmem_default;
+	sk->sndbuf	=	sysctl_wmem_default;
+	sk->state 	= 	TCP_CLOSE;
+	sk->zapped	=	1;
+	sk->socket	=	sock;
+
+	if(sock)
+	{
+		sk->type	=	sock->type;
+		sk->sleep	=	&sock->wait;
+		sock->sk	=	sk;
+	}
+
+	sk->state_change	=	sock_def_wakeup;
+	sk->data_ready		=	sock_def_readable;
+	sk->write_space		=	sock_def_write_space;
+	sk->error_report	=	sock_def_error_report;
+	sk->destruct            =       sock_def_destruct;
+
+	sk->peercred.pid 	=	0;
+	sk->peercred.uid	=	-1;
+	sk->peercred.gid	=	-1;
+
+}
diff --git a/pfinet/linux-src/net/core/sysctl_net_core.c b/pfinet/linux-src/net/core/sysctl_net_core.c
new file mode 100644
index 00000000..446ca145
--- /dev/null
+++ b/pfinet/linux-src/net/core/sysctl_net_core.c
@@ -0,0 +1,61 @@
+/* -*- linux-c -*-
+ * sysctl_net_core.c: sysctl interface to net core subsystem.
+ *
+ * Begun April 1, 1996, Mike Shaver.
+ * Added /proc/sys/net/core directory entry (empty =) ). [MS]
+ */
+
+#include <linux/mm.h>
+#include <linux/sysctl.h>
+#include <linux/config.h>
+
+#ifdef CONFIG_SYSCTL
+
+extern int netdev_max_backlog;
+extern int netdev_fastroute;
+extern int net_msg_cost;
+extern int net_msg_burst;
+
+extern __u32 sysctl_wmem_max;
+extern __u32 sysctl_rmem_max;
+extern __u32 sysctl_wmem_default;
+extern __u32 sysctl_rmem_default;
+
+extern int sysctl_core_destroy_delay;
+extern int sysctl_optmem_max;
+
+ctl_table core_table[] = {
+#ifdef CONFIG_NET
+	{NET_CORE_WMEM_MAX, "wmem_max",
+	 &sysctl_wmem_max, sizeof(int), 0644, NULL,
+	 &proc_dointvec},
+	{NET_CORE_RMEM_MAX, "rmem_max",
+	 &sysctl_rmem_max, sizeof(int), 0644, NULL,
+	 &proc_dointvec},
+	{NET_CORE_WMEM_DEFAULT, "wmem_default",
+	 &sysctl_wmem_default, sizeof(int), 0644, NULL,
+	 &proc_dointvec},
+	{NET_CORE_RMEM_DEFAULT, "rmem_default",
+	 &sysctl_rmem_default, sizeof(int), 0644, NULL,
+	 &proc_dointvec},
+	{NET_CORE_MAX_BACKLOG, "netdev_max_backlog",
+	 &netdev_max_backlog, sizeof(int), 0644, NULL,
+	 &proc_dointvec},
+#ifdef CONFIG_NET_FASTROUTE
+	{NET_CORE_FASTROUTE, "netdev_fastroute",
+	 &netdev_fastroute, sizeof(int), 0644, NULL,
+	 &proc_dointvec},
+#endif
+	{NET_CORE_MSG_COST, "message_cost",
+	 &net_msg_cost, sizeof(int), 0644, NULL,
+	 &proc_dointvec_jiffies},
+	{NET_CORE_MSG_BURST, "message_burst",
+	 &net_msg_burst, sizeof(int), 0644, NULL,
+	 &proc_dointvec_jiffies},
+	{NET_CORE_OPTMEM_MAX, "optmem_max",
+	 &sysctl_optmem_max, sizeof(int), 0644, NULL,
+	 &proc_dointvec},
+#endif /* CONFIG_NET */
+	{ 0 }
+};
+#endif
diff --git a/pfinet/linux-src/net/core/utils.c b/pfinet/linux-src/net/core/utils.c
new file mode 100644
index 00000000..415926b8
--- /dev/null
+++ b/pfinet/linux-src/net/core/utils.c
@@ -0,0 +1,66 @@
+/*
+ *	Generic address resultion entity
+ *
+ *	Authors:
+ *	net_random Alan Cox
+ *	net_ratelimit Andy Kleen
+ *
+ *	Created by Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+
+static unsigned long net_rand_seed = 152L;
+
+unsigned long net_random(void)
+{
+	net_rand_seed=net_rand_seed*69069L+1;
+        return net_rand_seed^jiffies;
+}
+
+void net_srandom(unsigned long entropy)
+{
+	net_rand_seed ^= entropy;
+	net_random();
+}
+
+int net_msg_cost = 5*HZ;
+int net_msg_burst = 10*5*HZ;
+
+/* 
+ * This enforces a rate limit: not more than one kernel message
+ * every 5secs to make a denial-of-service attack impossible.
+ *
+ * All warning printk()s should be guarded by this function. 
+ */ 
+int net_ratelimit(void)
+{
+	static unsigned long toks = 10*5*HZ;
+	static unsigned long last_msg; 
+	static int missed;
+	unsigned long now = jiffies;
+
+	toks += now - xchg(&last_msg, now);
+	if (toks > net_msg_burst)
+		toks = net_msg_burst;
+	if (toks >= net_msg_cost) {
+		toks -= net_msg_cost;
+		if (missed)
+			printk(KERN_WARNING "NET: %d messages suppressed.\n", missed);
+		missed = 0;
+		return 1;
+	}
+	missed++; 
+	return 0;
+}
diff --git a/pfinet/linux-src/net/ethernet/Makefile b/pfinet/linux-src/net/ethernet/Makefile
new file mode 100644
index 00000000..193d6af8
--- /dev/null
+++ b/pfinet/linux-src/net/ethernet/Makefile
@@ -0,0 +1,33 @@
+#
+# Makefile for the Linux Ethernet layer.
+#
+# Note! Dependencies are done automagically by 'make dep', which also
+# removes any old dependencies. DON'T put your own dependencies here
+# unless it's something special (ie not a .c file).
+#
+# Note 2! The CFLAGS definition is now in the main makefile...
+
+O_TARGET := ethernet.o
+
+OBJS	:= eth.o
+
+ifeq ($(CONFIG_SYSCTL),y)
+OBJS += sysctl_net_ether.o
+endif
+
+ifdef CONFIG_IPX
+OBJ2	:= pe2.o
+endif
+
+ifdef CONFIG_ATALK
+OBJ2	:= pe2.o
+endif
+
+ifdef CONFIG_NET
+O_OBJS	:= $(OBJS) $(OBJ2)
+endif
+
+include $(TOPDIR)/Rules.make
+
+tar:
+	tar -cvf /dev/f1 .
diff --git a/pfinet/linux-src/net/ethernet/eth.c b/pfinet/linux-src/net/ethernet/eth.c
new file mode 100644
index 00000000..bce35d48
--- /dev/null
+++ b/pfinet/linux-src/net/ethernet/eth.c
@@ -0,0 +1,298 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		Ethernet-type device handling.
+ *
+ * Version:	@(#)eth.c	1.0.7	05/25/93
+ *
+ * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
+ *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *		Mark Evans, <evansmp@uhura.aston.ac.uk>
+ *		Florian  La Roche, <rzsfl@rz.uni-sb.de>
+ *		Alan Cox, <gw4pts@gw4pts.ampr.org>
+ * 
+ * Fixes:
+ *		Mr Linux	: Arp problems
+ *		Alan Cox	: Generic queue tidyup (very tiny here)
+ *		Alan Cox	: eth_header ntohs should be htons
+ *		Alan Cox	: eth_rebuild_header missing an htons and
+ *				  minor other things.
+ *		Tegge		: Arp bug fixes. 
+ *		Florian		: Removed many unnecessary functions, code cleanup
+ *				  and changes for new arp and skbuff.
+ *		Alan Cox	: Redid header building to reflect new format.
+ *		Alan Cox	: ARP only when compiled with CONFIG_INET
+ *		Greg Page	: 802.2 and SNAP stuff.
+ *		Alan Cox	: MAC layer pointers/new format.
+ *		Paul Gortmaker	: eth_copy_and_sum shouldn't csum padding.
+ *		Alan Cox	: Protect against forwarding explosions with
+ *				  older network drivers and IFF_ALLMULTI.
+ *	Christer Weinigel	: Better rebuild header message.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/ip.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/config.h>
+#include <linux/init.h>
+#include <net/dst.h>
+#include <net/arp.h>
+#include <net/sock.h>
+#include <net/ipv6.h>
+#include <net/ip.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/checksum.h>
+
+
+__initfunc(void eth_setup(char *str, int *ints))
+{
+	struct device *d = dev_base;
+
+	if (!str || !*str)
+		return;
+	while (d) 
+	{
+		if (!strcmp(str,d->name)) 
+		{
+			if (ints[0] > 0)
+				d->irq=ints[1];
+			if (ints[0] > 1)
+				d->base_addr=ints[2];
+			if (ints[0] > 2)
+				d->mem_start=ints[3];
+			if (ints[0] > 3)
+				d->mem_end=ints[4];
+			break;
+		}
+		d=d->next;
+	}
+}
+
+
+/*
+ *	 Create the Ethernet MAC header for an arbitrary protocol layer 
+ *
+ *	saddr=NULL	means use device source address
+ *	daddr=NULL	means leave destination address (eg unresolved arp)
+ */
+
+int eth_header(struct sk_buff *skb, struct device *dev, unsigned short type,
+	   void *daddr, void *saddr, unsigned len)
+{
+	struct ethhdr *eth = (struct ethhdr *)skb_push(skb,ETH_HLEN);
+
+	/* 
+	 *	Set the protocol type. For a packet of type ETH_P_802_3 we put the length
+	 *	in here instead. It is up to the 802.2 layer to carry protocol information.
+	 */
+	
+	if(type!=ETH_P_802_3) 
+		eth->h_proto = htons(type);
+	else
+		eth->h_proto = htons(len);
+
+	/*
+	 *	Set the source hardware address. 
+	 */
+	 
+	if(saddr)
+		memcpy(eth->h_source,saddr,dev->addr_len);
+	else
+		memcpy(eth->h_source,dev->dev_addr,dev->addr_len);
+
+	/*
+	 *	Anyway, the loopback-device should never use this function... 
+	 */
+
+	if (dev->flags & (IFF_LOOPBACK|IFF_NOARP)) 
+	{
+		memset(eth->h_dest, 0, dev->addr_len);
+		return(dev->hard_header_len);
+	}
+	
+	if(daddr)
+	{
+		memcpy(eth->h_dest,daddr,dev->addr_len);
+		return dev->hard_header_len;
+	}
+	
+	return -dev->hard_header_len;
+}
+
+
+/*
+ *	Rebuild the Ethernet MAC header. This is called after an ARP
+ *	(or in future other address resolution) has completed on this
+ *	sk_buff. We now let ARP fill in the other fields.
+ *
+ *	This routine CANNOT use cached dst->neigh!
+ *	Really, it is used only when dst->neigh is wrong.
+ */
+
+int eth_rebuild_header(struct sk_buff *skb)
+{
+	struct ethhdr *eth = (struct ethhdr *)skb->data;
+	struct device *dev = skb->dev;
+
+	switch (eth->h_proto)
+	{
+#ifdef CONFIG_INET
+	case __constant_htons(ETH_P_IP):
+ 		return arp_find(eth->h_dest, skb);
+#endif	
+	default:
+		printk(KERN_DEBUG
+		       "%s: unable to resolve type %X addresses.\n", 
+		       dev->name, (int)eth->h_proto);
+		
+		memcpy(eth->h_source, dev->dev_addr, dev->addr_len);
+		break;
+	}
+
+	return 0;
+}
+
+
+/*
+ *	Determine the packet's protocol ID. The rule here is that we 
+ *	assume 802.3 if the type field is short enough to be a length.
+ *	This is normal practice and works for any 'now in use' protocol.
+ */
+ 
+unsigned short eth_type_trans(struct sk_buff *skb, struct device *dev)
+{
+	struct ethhdr *eth;
+	unsigned char *rawp;
+	
+	skb->mac.raw=skb->data;
+	skb_pull(skb,dev->hard_header_len);
+	eth= skb->mac.ethernet;
+	
+	if(*eth->h_dest&1)
+	{
+		if(memcmp(eth->h_dest,dev->broadcast, ETH_ALEN)==0)
+			skb->pkt_type=PACKET_BROADCAST;
+		else
+			skb->pkt_type=PACKET_MULTICAST;
+	}
+	
+	/*
+	 *	This ALLMULTI check should be redundant by 1.4
+	 *	so don't forget to remove it.
+	 *
+	 *	Seems, you forgot to remove it. All silly devices
+	 *	seems to set IFF_PROMISC.
+	 */
+	 
+	else if(dev->flags&(IFF_PROMISC/*|IFF_ALLMULTI*/))
+	{
+		if(memcmp(eth->h_dest,dev->dev_addr, ETH_ALEN))
+			skb->pkt_type=PACKET_OTHERHOST;
+	}
+	
+	if (ntohs(eth->h_proto) >= 1536)
+		return eth->h_proto;
+		
+	rawp = skb->data;
+	
+	/*
+	 *	This is a magic hack to spot IPX packets. Older Novell breaks
+	 *	the protocol design and runs IPX over 802.3 without an 802.2 LLC
+	 *	layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This
+	 *	won't work for fault tolerant netware but does for the rest.
+	 */
+	if (*(unsigned short *)rawp == 0xFFFF)
+		return htons(ETH_P_802_3);
+		
+	/*
+	 *	Real 802.2 LLC
+	 */
+	return htons(ETH_P_802_2);
+}
+
+int eth_header_parse(struct sk_buff *skb, unsigned char *haddr)
+{
+	struct ethhdr *eth = skb->mac.ethernet;
+	memcpy(haddr, eth->h_source, ETH_ALEN);
+	return ETH_ALEN;
+}
+
+int eth_header_cache(struct neighbour *neigh, struct hh_cache *hh)
+{
+	unsigned short type = hh->hh_type;
+	struct ethhdr *eth = (struct ethhdr*)(((u8*)hh->hh_data) + 2);
+	struct device *dev = neigh->dev;
+
+	if (type == __constant_htons(ETH_P_802_3))
+		return -1;
+
+	eth->h_proto = type;
+	memcpy(eth->h_source, dev->dev_addr, dev->addr_len);
+	memcpy(eth->h_dest, neigh->ha, dev->addr_len);
+	return 0;
+}
+
+/*
+ * Called by Address Resolution module to notify changes in address.
+ */
+
+void eth_header_cache_update(struct hh_cache *hh, struct device *dev, unsigned char * haddr)
+{
+	memcpy(((u8*)hh->hh_data) + 2, haddr, dev->addr_len);
+}
+
+#ifndef CONFIG_IP_ROUTER
+
+/*
+ *	Copy from an ethernet device memory space to an sk_buff while checksumming if IP
+ */
+ 
+void eth_copy_and_sum(struct sk_buff *dest, unsigned char *src, int length, int base)
+{
+	struct ethhdr *eth;
+	struct iphdr *iph;
+	int ip_length;
+
+	eth=(struct ethhdr *)src;
+	if(eth->h_proto!=htons(ETH_P_IP))
+	{
+		memcpy(dest->data,src,length);
+		return;
+	}
+	/*
+	 * We have to watch for padded packets. The csum doesn't include the
+	 * padding, and there is no point in copying the padding anyway.
+	 * We have to use the smaller of length and ip_length because it
+	 * can happen that ip_length > length.
+	 */
+	memcpy(dest->data,src,sizeof(struct iphdr)+ETH_HLEN);	/* ethernet is always >= 34 */
+	length -= sizeof(struct iphdr) + ETH_HLEN;
+	iph=(struct iphdr*)(src+ETH_HLEN);
+	ip_length = ntohs(iph->tot_len) - sizeof(struct iphdr);
+
+	/* Also watch out for bogons - min IP size is 8 (rfc-1042) */
+	if ((ip_length <= length) && (ip_length > 7))
+		length=ip_length;
+
+	dest->csum=csum_partial_copy(src+sizeof(struct iphdr)+ETH_HLEN,dest->data+sizeof(struct iphdr)+ETH_HLEN,length,base);
+	dest->ip_summed=1;
+}
+
+#endif /* !(CONFIG_IP_ROUTER) */
diff --git a/pfinet/linux-src/net/ethernet/pe2.c b/pfinet/linux-src/net/ethernet/pe2.c
new file mode 100644
index 00000000..4915f070
--- /dev/null
+++ b/pfinet/linux-src/net/ethernet/pe2.c
@@ -0,0 +1,38 @@
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <net/datalink.h>
+#include <linux/mm.h>
+#include <linux/in.h>
+
+static void
+pEII_datalink_header(struct datalink_proto *dl, 
+		struct sk_buff *skb, unsigned char *dest_node)
+{
+	struct device	*dev = skb->dev;
+
+	skb->protocol = htons (ETH_P_IPX);
+	if(dev->hard_header)
+		dev->hard_header(skb, dev, ETH_P_IPX, dest_node, NULL, skb->len);
+}
+
+struct datalink_proto *
+make_EII_client(void)
+{
+	struct datalink_proto	*proto;
+
+	proto = (struct datalink_proto *) kmalloc(sizeof(*proto), GFP_ATOMIC);
+	if (proto != NULL) {
+		proto->type_len = 0;
+		proto->header_length = 0;
+		proto->datalink_header = pEII_datalink_header;
+		proto->string_name = "EtherII";
+	}
+
+	return proto;
+}
+
+void destroy_EII_client(struct datalink_proto *dl)
+{
+	if (dl)
+		kfree_s(dl, sizeof(struct datalink_proto));
+}
diff --git a/pfinet/linux-src/net/ethernet/sysctl_net_ether.c b/pfinet/linux-src/net/ethernet/sysctl_net_ether.c
new file mode 100644
index 00000000..b81a6d53
--- /dev/null
+++ b/pfinet/linux-src/net/ethernet/sysctl_net_ether.c
@@ -0,0 +1,13 @@
+/* -*- linux-c -*-
+ * sysctl_net_ether.c: sysctl interface to net Ethernet subsystem.
+ *
+ * Begun April 1, 1996, Mike Shaver.
+ * Added /proc/sys/net/ether directory entry (empty =) ). [MS]
+ */
+
+#include <linux/mm.h>
+#include <linux/sysctl.h>
+
+ctl_table ether_table[] = {
+	{0}
+};
diff --git a/pfinet/linux-src/net/ipv4/Config.in b/pfinet/linux-src/net/ipv4/Config.in
new file mode 100644
index 00000000..29786da5
--- /dev/null
+++ b/pfinet/linux-src/net/ipv4/Config.in
@@ -0,0 +1,88 @@
+#
+# IP configuration
+#
+bool 'IP: multicasting' CONFIG_IP_MULTICAST
+bool 'IP: advanced router' CONFIG_IP_ADVANCED_ROUTER
+if [ "$CONFIG_IP_ADVANCED_ROUTER" = "y" ]; then
+  define_bool CONFIG_RTNETLINK y	
+  define_bool CONFIG_NETLINK y
+  bool 'IP: policy routing' CONFIG_IP_MULTIPLE_TABLES
+  bool 'IP: equal cost multipath' CONFIG_IP_ROUTE_MULTIPATH
+  bool 'IP: use TOS value as routing key' CONFIG_IP_ROUTE_TOS
+  bool 'IP: verbose route monitoring' CONFIG_IP_ROUTE_VERBOSE
+  bool 'IP: large routing tables' CONFIG_IP_ROUTE_LARGE_TABLES
+  if [ "$CONFIG_IP_MULTIPLE_TABLES" = "y" ]; then
+    bool 'IP: fast network address translation' CONFIG_IP_ROUTE_NAT
+  fi
+fi
+bool 'IP: kernel level autoconfiguration' CONFIG_IP_PNP
+if [ "$CONFIG_IP_PNP" = "y" ]; then
+  bool '      BOOTP support' CONFIG_IP_PNP_BOOTP
+  bool '      RARP support' CONFIG_IP_PNP_RARP
+# not yet ready..
+#  bool '      ARP support' CONFIG_IP_PNP_ARP		
+fi
+if [ "$CONFIG_FIREWALL" = "y" ]; then
+  bool 'IP: firewalling' CONFIG_IP_FIREWALL
+  if [ "$CONFIG_IP_FIREWALL" = "y" ]; then
+    if [ "$CONFIG_NETLINK" = "y" ]; then
+	bool 'IP: firewall packet netlink device' CONFIG_IP_FIREWALL_NETLINK
+	if [ "$CONFIG_IP_FIREWALL_NETLINK" = "y" ]; then
+	    define_bool CONFIG_NETLINK_DEV y
+	fi
+    fi
+    bool 'IP: always defragment (required for masquerading)' CONFIG_IP_ALWAYS_DEFRAG
+    if [ "$CONFIG_IP_MULTIPLE_TABLES" = "y" ]; then
+	bool 'IP: use FWMARK value as routing key' CONFIG_IP_ROUTE_FWMARK
+    fi
+  fi
+fi
+if [ "$CONFIG_IP_FIREWALL" = "y" ]; then
+  if [ "$CONFIG_IP_ALWAYS_DEFRAG" != "n" ]; then
+    bool 'IP: transparent proxy support' CONFIG_IP_TRANSPARENT_PROXY
+    bool 'IP: masquerading' CONFIG_IP_MASQUERADE
+    if [ "$CONFIG_IP_MASQUERADE" != "n" ]; then
+      comment 'Protocol-specific masquerading support will be built as modules.'
+      bool 'IP: ICMP masquerading' CONFIG_IP_MASQUERADE_ICMP
+      comment 'Protocol-specific masquerading support will be built as modules.'
+      if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
+        bool 'IP: masquerading special modules support' CONFIG_IP_MASQUERADE_MOD 
+        if [ "$CONFIG_IP_MASQUERADE_MOD" = "y" ]; then
+          tristate 'IP: ipautofw masq support (EXPERIMENTAL)' CONFIG_IP_MASQUERADE_IPAUTOFW
+          tristate 'IP: ipportfw masq support (EXPERIMENTAL)' CONFIG_IP_MASQUERADE_IPPORTFW
+          tristate 'IP: ip fwmark masq-forwarding support (EXPERIMENTAL)' CONFIG_IP_MASQUERADE_MFW
+	fi
+      fi
+    fi
+  fi
+fi
+bool 'IP: optimize as router not host' CONFIG_IP_ROUTER
+tristate 'IP: tunneling' CONFIG_NET_IPIP
+tristate 'IP: GRE tunnels over IP' CONFIG_NET_IPGRE
+if [ "$CONFIG_IP_MULTICAST" = "y" ]; then
+  if [ "$CONFIG_NET_IPGRE" != "n" ]; then
+    bool 'IP: broadcast GRE over IP' CONFIG_NET_IPGRE_BROADCAST
+  fi
+  bool 'IP: multicast routing' CONFIG_IP_MROUTE
+  if [ "$CONFIG_IP_MROUTE" = "y" ]; then
+    bool 'IP: PIM-SM version 1 support' CONFIG_IP_PIMSM_V1
+    bool 'IP: PIM-SM version 2 support' CONFIG_IP_PIMSM_V2
+  fi
+fi
+bool 'IP: aliasing support' CONFIG_IP_ALIAS
+if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
+  if [ "$CONFIG_RTNETLINK" = "y" ]; then
+    bool 'IP: ARP daemon support (EXPERIMENTAL)' CONFIG_ARPD
+  fi
+fi
+bool 'IP: TCP syncookie support (not enabled per default)' CONFIG_SYN_COOKIES
+comment '(it is safe to leave these untouched)'
+#bool 'IP: PC/TCP compatibility mode' CONFIG_INET_PCTCP
+tristate 'IP: Reverse ARP' CONFIG_INET_RARP
+#bool 'IP: Path MTU Discovery (normally enabled)' CONFIG_PATH_MTU_DISCOVERY
+#bool 'IP: Disable NAGLE algorithm (normally enabled)' CONFIG_TCP_NAGLE_OFF
+bool 'IP: Allow large windows (not recommended if <16Mb of memory)' CONFIG_SKB_LARGE
+#if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then 
+#bool 'IP: support experimental checksum copy to user for UDP'  CONFIG_UDP_DELAY_CSUM
+#fi
+
diff --git a/pfinet/linux-src/net/ipv4/Makefile b/pfinet/linux-src/net/ipv4/Makefile
new file mode 100644
index 00000000..8ab280de
--- /dev/null
+++ b/pfinet/linux-src/net/ipv4/Makefile
@@ -0,0 +1,116 @@
+#
+# Makefile for the Linux TCP/IP (INET) layer.
+#
+# Note! Dependencies are done automagically by 'make dep', which also
+# removes any old dependencies. DON'T put your own dependencies here
+# unless it's something special (ie not a .c file).
+#
+# Note 2! The CFLAGS definition is now in the main makefile...
+
+O_TARGET := ipv4.o
+IPV4_OBJS := utils.o route.o proc.o timer.o protocol.o \
+	     ip_input.o ip_fragment.o ip_forward.o ip_options.o \
+	     ip_output.o ip_sockglue.o \
+	     tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o\
+	     raw.o udp.o arp.o icmp.o devinet.o af_inet.o igmp.o \
+	     sysctl_net_ipv4.o fib_frontend.o fib_semantics.o fib_hash.o
+IPV4X_OBJS :=
+
+MOD_LIST_NAME := IPV4_MODULES
+M_OBJS :=
+
+ifeq ($(CONFIG_IP_FIREWALL),y)
+IPV4_OBJS += ip_fw.o
+endif
+
+ifeq ($(CONFIG_IP_MULTIPLE_TABLES),y)
+IPV4_OBJS += fib_rules.o
+endif
+
+ifeq ($(CONFIG_IP_ROUTE_NAT),y)
+IPV4_OBJS += ip_nat_dumb.o
+endif
+
+ifeq ($(CONFIG_IP_MROUTE),y)
+IPV4_OBJS += ipmr.o
+endif
+
+ifeq ($(CONFIG_INET_RARP),y)
+IPV4_OBJS += rarp.o
+else
+  ifeq ($(CONFIG_INET_RARP),m)
+  M_OBJS += rarp.o
+  endif
+endif
+
+ifeq ($(CONFIG_NET_IPIP),y)
+IPV4X_OBJS += ipip.o
+else
+  ifeq ($(CONFIG_NET_IPIP),m)
+  MX_OBJS += ipip.o
+  endif
+endif
+
+ifeq ($(CONFIG_NET_IPGRE),y)
+IPV4X_OBJS += ip_gre.o
+else
+  ifeq ($(CONFIG_NET_IPGRE),m)
+  MX_OBJS += ip_gre.o
+  endif
+endif
+
+ifeq ($(CONFIG_IP_MASQUERADE),y)
+IPV4X_OBJS += ip_masq.o ip_masq_app.o 
+
+ifeq ($(CONFIG_IP_MASQUERADE_MOD),y)
+  IPV4X_OBJS += ip_masq_mod.o 
+  
+  ifeq ($(CONFIG_IP_MASQUERADE_IPAUTOFW),y)
+  IPV4_OBJS += ip_masq_autofw.o
+  else
+    ifeq ($(CONFIG_IP_MASQUERADE_IPAUTOFW),m)
+    M_OBJS += ip_masq_autofw.o
+    endif
+  endif
+  
+  ifeq ($(CONFIG_IP_MASQUERADE_IPPORTFW),y)
+  IPV4_OBJS += ip_masq_portfw.o
+  else
+    ifeq ($(CONFIG_IP_MASQUERADE_IPPORTFW),m)
+    M_OBJS += ip_masq_portfw.o
+    endif
+  endif
+  
+  ifeq ($(CONFIG_IP_MASQUERADE_MFW),y)
+  IPV4_OBJS += ip_masq_mfw.o
+  else
+    ifeq ($(CONFIG_IP_MASQUERADE_MFW),m)
+    M_OBJS += ip_masq_mfw.o
+    endif
+  endif
+
+endif
+
+M_OBJS += ip_masq_user.o
+M_OBJS += ip_masq_ftp.o ip_masq_irc.o ip_masq_raudio.o ip_masq_quake.o
+M_OBJS += ip_masq_vdolive.o ip_masq_cuseeme.o
+endif
+
+ifeq ($(CONFIG_SYN_COOKIES),y)
+IPV4_OBJS += syncookies.o
+# module not supported, because it would be too messy.
+endif
+
+ifeq ($(CONFIG_IP_PNP),y)
+IPV4_OBJS += ipconfig.o
+endif
+
+ifdef CONFIG_INET
+O_OBJS := $(IPV4_OBJS)
+OX_OBJS := $(IPV4X_OBJS)
+endif
+
+include $(TOPDIR)/Rules.make
+
+tar:
+		tar -cvf /dev/f1 .
diff --git a/pfinet/linux-src/net/ipv4/af_inet.c b/pfinet/linux-src/net/ipv4/af_inet.c
new file mode 100644
index 00000000..e37eb6bd
--- /dev/null
+++ b/pfinet/linux-src/net/ipv4/af_inet.c
@@ -0,0 +1,1161 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		PF_INET protocol family socket handler.
+ *
+ * Version:	$Id: af_inet.c,v 1.87.2.5 1999/08/08 08:43:10 davem Exp $
+ *
+ * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
+ *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *		Florian La Roche, <flla@stud.uni-sb.de>
+ *		Alan Cox, <A.Cox@swansea.ac.uk>
+ *
+ * Changes (see also sock.c)
+ *
+ *		A.N.Kuznetsov	:	Socket death error in accept().
+ *		John Richardson :	Fix non blocking error in connect()
+ *					so sockets that fail to connect
+ *					don't return -EINPROGRESS.
+ *		Alan Cox	:	Asynchronous I/O support
+ *		Alan Cox	:	Keep correct socket pointer on sock structures
+ *					when accept() ed
+ *		Alan Cox	:	Semantics of SO_LINGER aren't state moved
+ *					to close when you look carefully. With
+ *					this fixed and the accept bug fixed 
+ *					some RPC stuff seems happier.
+ *		Niibe Yutaka	:	4.4BSD style write async I/O
+ *		Alan Cox, 
+ *		Tony Gale 	:	Fixed reuse semantics.
+ *		Alan Cox	:	bind() shouldn't abort existing but dead
+ *					sockets. Stops FTP netin:.. I hope.
+ *		Alan Cox	:	bind() works correctly for RAW sockets. Note
+ *					that FreeBSD at least was broken in this respect
+ *					so be careful with compatibility tests...
+ *		Alan Cox	:	routing cache support
+ *		Alan Cox	:	memzero the socket structure for compactness.
+ *		Matt Day	:	nonblock connect error handler
+ *		Alan Cox	:	Allow large numbers of pending sockets
+ *					(eg for big web sites), but only if
+ *					specifically application requested.
+ *		Alan Cox	:	New buffering throughout IP. Used dumbly.
+ *		Alan Cox	:	New buffering now used smartly.
+ *		Alan Cox	:	BSD rather than common sense interpretation of
+ *					listen.
+ *		Germano Caronni	:	Assorted small races.
+ *		Alan Cox	:	sendmsg/recvmsg basic support.
+ *		Alan Cox	:	Only sendmsg/recvmsg now supported.
+ *		Alan Cox	:	Locked down bind (see security list).
+ *		Alan Cox	:	Loosened bind a little.
+ *		Mike McLagan	:	ADD/DEL DLCI Ioctls
+ *	Willy Konynenberg	:	Transparent proxying support.
+ *		David S. Miller	:	New socket lookup architecture.
+ *					Some other random speedups.
+ *		Cyrus Durgin	:	Cleaned up file for kmod hacks.
+ *		Andi Kleen	:	Fix inet_stream_connect TCP race.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/proc_fs.h>
+#include <linux/stat.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/arp.h>
+#include <net/rarp.h>
+#include <net/route.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/raw.h>
+#include <net/icmp.h>
+#include <net/ipip.h>
+#include <net/inet_common.h>
+#include <linux/ip_fw.h>
+#ifdef CONFIG_IP_MROUTE
+#include <linux/mroute.h>
+#endif
+#ifdef CONFIG_IP_MASQUERADE
+#include <net/ip_masq.h>
+#endif
+#ifdef CONFIG_BRIDGE
+#include <net/br.h>
+#endif
+#ifdef CONFIG_KMOD
+#include <linux/kmod.h>
+#endif
+#ifdef CONFIG_NET_RADIO
+#include <linux/wireless.h>
+#endif	/* CONFIG_NET_RADIO */
+
+#define min(a,b)	((a)<(b)?(a):(b))
+
+struct linux_mib net_statistics;
+
+extern int raw_get_info(char *, char **, off_t, int, int);
+extern int snmp_get_info(char *, char **, off_t, int, int);
+extern int netstat_get_info(char *, char **, off_t, int, int);
+extern int afinet_get_info(char *, char **, off_t, int, int);
+extern int tcp_get_info(char *, char **, off_t, int, int);
+extern int udp_get_info(char *, char **, off_t, int, int);
+extern void ip_mc_drop_socket(struct sock *sk);
+
+#ifdef CONFIG_DLCI
+extern int dlci_ioctl(unsigned int, void*);
+#endif
+
+#ifdef CONFIG_DLCI_MODULE
+int (*dlci_ioctl_hook)(unsigned int, void *) = NULL;
+#endif
+
+int (*rarp_ioctl_hook)(unsigned int,void*) = NULL;
+
+/*
+ *	Destroy an AF_INET socket
+ */
+ 
+static __inline__ void kill_sk_queues(struct sock *sk)
+{
+	struct sk_buff *skb;
+
+	/* First the read buffer. */
+	while((skb = skb_dequeue(&sk->receive_queue)) != NULL)
+		kfree_skb(skb);
+
+	/* Next, the error queue. */
+	while((skb = skb_dequeue(&sk->error_queue)) != NULL)
+		kfree_skb(skb);
+
+  	/* Now the backlog. */
+  	while((skb=skb_dequeue(&sk->back_log)) != NULL)
+		kfree_skb(skb);
+}
+
+static __inline__ void kill_sk_now(struct sock *sk)
+{
+	/* No longer exists. */
+	del_from_prot_sklist(sk);
+
+	/* Remove from protocol hash chains. */
+	sk->prot->unhash(sk);
+
+	if(sk->opt)
+		kfree(sk->opt);
+	dst_release(sk->dst_cache);
+	sk_free(sk);
+}
+
+static __inline__ void kill_sk_later(struct sock *sk)
+{
+	/* this should never happen. */
+	/* actually it can if an ack has just been sent. */
+	/* 
+	 * It's more normal than that...
+	 * It can happen because a skb is still in the device queues
+	 * [PR]
+	 */
+		  
+	NETDEBUG(printk(KERN_DEBUG "Socket destroy delayed (r=%d w=%d)\n",
+			atomic_read(&sk->rmem_alloc),
+			atomic_read(&sk->wmem_alloc)));
+
+	sk->ack_backlog = 0;
+	release_sock(sk);
+	net_reset_timer(sk, TIME_DESTROY, SOCK_DESTROY_TIME);
+}
+
+void destroy_sock(struct sock *sk)
+{
+	lock_sock(sk);			/* just to be safe. */
+
+  	/* Now we can no longer get new packets or once the
+  	 * timers are killed, send them.
+  	 */
+  	net_delete_timer(sk);
+
+	if (sk->prot->destroy && !sk->destroy)
+		sk->prot->destroy(sk);
+
+	sk->destroy = 1;
+
+	kill_sk_queues(sk);
+
+	/* Now if everything is gone we can free the socket
+	 * structure, otherwise we need to keep it around until
+	 * everything is gone.
+	 */
+	if (atomic_read(&sk->rmem_alloc) == 0 && atomic_read(&sk->wmem_alloc) == 0)
+		kill_sk_now(sk);
+	else
+		kill_sk_later(sk);
+}
+
+/*
+ *	The routines beyond this point handle the behaviour of an AF_INET
+ *	socket object. Mostly it punts to the subprotocols of IP to do
+ *	the work.
+ */
+ 
+
+/*
+ *	Set socket options on an inet socket.
+ */
+ 
+int inet_setsockopt(struct socket *sock, int level, int optname,
+		    char *optval, int optlen)
+{
+	struct sock *sk=sock->sk;
+	if (sk->prot->setsockopt==NULL)
+		return(-EOPNOTSUPP);
+	return sk->prot->setsockopt(sk,level,optname,optval,optlen);
+}
+
+/*
+ *	Get a socket option on an AF_INET socket.
+ *
+ *	FIX: POSIX 1003.1g is very ambiguous here. It states that
+ *	asynchronous errors should be reported by getsockopt. We assume
+ *	this means if you specify SO_ERROR (otherwise whats the point of it).
+ */
+
+int inet_getsockopt(struct socket *sock, int level, int optname,
+		    char *optval, int *optlen)
+{
+	struct sock *sk=sock->sk;
+	if (sk->prot->getsockopt==NULL)
+		return(-EOPNOTSUPP);
+	return sk->prot->getsockopt(sk,level,optname,optval,optlen);
+}
+
+/*
+ *	Automatically bind an unbound socket.
+ */
+
+static int inet_autobind(struct sock *sk)
+{
+	/* We may need to bind the socket. */
+	if (sk->num == 0) {
+		if (sk->prot->get_port(sk, 0) != 0)
+			return(-EAGAIN);
+		sk->sport = htons(sk->num);
+		sk->prot->hash(sk);
+		add_to_prot_sklist(sk);
+	}
+	return 0;
+}
+
+/*
+ *	Move a socket into listening state.
+ */
+ 
+int inet_listen(struct socket *sock, int backlog)
+{
+	struct sock *sk = sock->sk;
+	unsigned char old_state;
+
+	if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM)
+		return(-EINVAL);
+
+	if ((unsigned) backlog == 0)	/* BSDism */
+		backlog = 1;
+	if ((unsigned) backlog > SOMAXCONN)
+		backlog = SOMAXCONN;
+	sk->max_ack_backlog = backlog;
+
+	/* Really, if the socket is already in listen state
+	 * we can only allow the backlog to be adjusted.
+	 */
+	old_state = sk->state;
+	if (old_state != TCP_LISTEN) {
+		sk->state = TCP_LISTEN;
+		sk->ack_backlog = 0;
+		if (sk->num == 0) {
+			if (sk->prot->get_port(sk, 0) != 0) {
+				sk->state = old_state;
+				return -EAGAIN;
+			}
+			sk->sport = htons(sk->num);
+			add_to_prot_sklist(sk);
+		} else {
+			if (sk->prev)
+				((struct tcp_bind_bucket*)sk->prev)->fastreuse = 0;
+		}
+
+		dst_release(xchg(&sk->dst_cache, NULL));
+		sk->prot->hash(sk);
+		sk->socket->flags |= SO_ACCEPTCON;
+	}
+	return 0;
+}
+
+/*
+ *	Create an inet socket.
+ *
+ *	FIXME: Gcc would generate much better code if we set the parameters
+ *	up in in-memory structure order. Gcc68K even more so
+ */
+
+static int inet_create(struct socket *sock, int protocol)
+{
+	struct sock *sk;
+	struct proto *prot;
+
+	/* Compatibility */
+	if (sock->type == SOCK_PACKET) {
+		static int warned; 
+		if (net_families[PF_PACKET]==NULL)
+		{
+#if defined(CONFIG_KMOD) && defined(CONFIG_PACKET_MODULE)
+			char module_name[30];
+			sprintf(module_name,"net-pf-%d", PF_PACKET);
+			request_module(module_name);
+			if (net_families[PF_PACKET] == NULL)
+#endif
+			return -ESOCKTNOSUPPORT;
+		}
+		if (!warned++)
+			printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n", current->comm);
+		return net_families[PF_PACKET]->create(sock, protocol);
+	}
+
+	sock->state = SS_UNCONNECTED;
+	sk = sk_alloc(PF_INET, GFP_KERNEL, 1);
+	if (sk == NULL) 
+		goto do_oom;
+
+	switch (sock->type) {
+	case SOCK_STREAM:
+		if (protocol && protocol != IPPROTO_TCP)
+			goto free_and_noproto;
+		protocol = IPPROTO_TCP;
+		if (ipv4_config.no_pmtu_disc)
+			sk->ip_pmtudisc = IP_PMTUDISC_DONT;
+		else
+			sk->ip_pmtudisc = IP_PMTUDISC_WANT;
+		prot = &tcp_prot;
+		sock->ops = &inet_stream_ops;
+		break;
+	case SOCK_SEQPACKET:
+		goto free_and_badtype;
+	case SOCK_DGRAM:
+		if (protocol && protocol != IPPROTO_UDP)
+			goto free_and_noproto;
+		protocol = IPPROTO_UDP;
+		sk->no_check = UDP_NO_CHECK;
+		sk->ip_pmtudisc = IP_PMTUDISC_DONT;
+		prot=&udp_prot;
+		sock->ops = &inet_dgram_ops;
+		break;
+	case SOCK_RAW:
+		if (!capable(CAP_NET_RAW))
+			goto free_and_badperm;
+		if (!protocol)
+			goto free_and_noproto;
+		prot = &raw_prot;
+		sk->reuse = 1;
+		sk->ip_pmtudisc = IP_PMTUDISC_DONT;
+		sk->num = protocol;
+		sock->ops = &inet_dgram_ops;
+		if (protocol == IPPROTO_RAW)
+			sk->ip_hdrincl = 1;
+		break;
+	default:
+		goto free_and_badtype;
+	}
+
+	sock_init_data(sock,sk);
+	
+	sk->destruct = NULL;
+
+	sk->zapped=0;
+#ifdef CONFIG_TCP_NAGLE_OFF
+	sk->nonagle = 1;
+#endif  
+	sk->family = PF_INET;
+	sk->protocol = protocol;
+
+	sk->prot = prot;
+	sk->backlog_rcv = prot->backlog_rcv;
+
+	sk->timer.data = (unsigned long)sk;
+	sk->timer.function = &net_timer;
+
+	sk->ip_ttl=ip_statistics.IpDefaultTTL;
+
+	sk->ip_mc_loop=1;
+	sk->ip_mc_ttl=1;
+	sk->ip_mc_index=0;
+	sk->ip_mc_list=NULL;
+	
+	if (sk->num) {
+		/* It assumes that any protocol which allows
+		 * the user to assign a number at socket
+		 * creation time automatically
+		 * shares.
+		 */
+		sk->sport = htons(sk->num);
+
+		/* Add to protocol hash chains. */
+		sk->prot->hash(sk);
+		add_to_prot_sklist(sk);
+	}
+
+	if (sk->prot->init) {
+		int err = sk->prot->init(sk);
+		if (err != 0) {
+			destroy_sock(sk);
+			return(err);
+		}
+	}
+	return(0);
+
+free_and_badtype:
+	sk_free(sk);
+	return -ESOCKTNOSUPPORT;
+
+free_and_badperm:
+	sk_free(sk);
+	return -EPERM;
+
+free_and_noproto:
+	sk_free(sk);
+	return -EPROTONOSUPPORT;
+
+do_oom:
+	return -ENOBUFS;
+}
+
+
+/*
+ *	The peer socket should always be NULL (or else). When we call this
+ *	function we are destroying the object and from then on nobody
+ *	should refer to it.
+ */
+ 
+int inet_release(struct socket *sock, struct socket *peersock)
+{
+	struct sock *sk = sock->sk;
+
+	if (sk) {
+		long timeout;
+
+		/* Begin closedown and wake up sleepers. */
+		if (sock->state != SS_UNCONNECTED)
+			sock->state = SS_DISCONNECTING;
+		sk->state_change(sk);
+
+		/* Applications forget to leave groups before exiting */
+		ip_mc_drop_socket(sk);
+
+		/* If linger is set, we don't return until the close
+		 * is complete.  Otherwise we return immediately. The
+		 * actually closing is done the same either way.
+		 *
+		 * If the close is due to the process exiting, we never
+		 * linger..
+		 */
+		timeout = 0;
+		if (sk->linger && !(current->flags & PF_EXITING)) {
+			timeout = HZ * sk->lingertime;
+			if (!timeout)
+				timeout = MAX_SCHEDULE_TIMEOUT;
+		}
+		sock->sk = NULL;
+		sk->socket = NULL;
+		sk->prot->close(sk, timeout);
+	}
+	return(0);
+}
+
+static int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+{
+	struct sockaddr_in *addr=(struct sockaddr_in *)uaddr;
+	struct sock *sk=sock->sk;
+	unsigned short snum;
+	int chk_addr_ret;
+
+	/* If the socket has its own bind function then use it. (RAW) */
+	if(sk->prot->bind)
+		return sk->prot->bind(sk, uaddr, addr_len);
+		
+	/* Check these errors (active socket, bad address length, double bind). */
+	if ((sk->state != TCP_CLOSE)			||
+	    (addr_len < sizeof(struct sockaddr_in))	||
+	    (sk->num != 0))
+		return -EINVAL;
+		
+	chk_addr_ret = inet_addr_type(addr->sin_addr.s_addr);
+	if (addr->sin_addr.s_addr != 0 && chk_addr_ret != RTN_LOCAL &&
+	    chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) {
+#ifdef CONFIG_IP_TRANSPARENT_PROXY
+		/* Superuser may bind to any address to allow transparent proxying. */
+		if(chk_addr_ret != RTN_UNICAST || !capable(CAP_NET_ADMIN))
+#endif
+			return -EADDRNOTAVAIL;	/* Source address MUST be ours! */
+	}
+
+	/*      We keep a pair of addresses. rcv_saddr is the one
+	 *      used by hash lookups, and saddr is used for transmit.
+	 *
+	 *      In the BSD API these are the same except where it
+	 *      would be illegal to use them (multicast/broadcast) in
+	 *      which case the sending device address is used.
+	 */
+	sk->rcv_saddr = sk->saddr = addr->sin_addr.s_addr;
+	if(chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
+		sk->saddr = 0;  /* Use device */
+
+	snum = ntohs(addr->sin_port);
+#ifdef CONFIG_IP_MASQUERADE
+	/* The kernel masquerader needs some ports. */
+	if((snum >= PORT_MASQ_BEGIN) && (snum <= PORT_MASQ_END))
+		return -EADDRINUSE;
+#endif		 
+	if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
+		return(-EACCES);
+	
+	/* Make sure we are allowed to bind here. */
+	if (sk->prot->get_port(sk, snum) != 0)
+		return -EADDRINUSE;
+
+	sk->sport = htons(sk->num);
+	sk->daddr = 0;
+	sk->dport = 0;
+	sk->prot->hash(sk);
+	add_to_prot_sklist(sk);
+	dst_release(sk->dst_cache);
+	sk->dst_cache=NULL;
+	return(0);
+}
+
+int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr,
+		       int addr_len, int flags)
+{
+	struct sock *sk=sock->sk;
+	int err;
+
+	if (inet_autobind(sk) != 0)
+		return(-EAGAIN);
+	if (sk->prot->connect == NULL) 
+		return(-EOPNOTSUPP);
+	err = sk->prot->connect(sk, (struct sockaddr *)uaddr, addr_len);
+	if (err < 0) 
+		return(err);
+	return(0);
+}
+
+static void inet_wait_for_connect(struct sock *sk)
+{
+	struct wait_queue wait = { current, NULL };
+
+	add_wait_queue(sk->sleep, &wait);
+	current->state = TASK_INTERRUPTIBLE;
+	while (sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV) {
+		if (signal_pending(current))
+			break;
+		if (sk->err)
+			break;
+		schedule();
+		current->state = TASK_INTERRUPTIBLE;
+	}
+	current->state = TASK_RUNNING;
+	remove_wait_queue(sk->sleep, &wait);
+}
+
+/*
+ *	Connect to a remote host. There is regrettably still a little
+ *	TCP 'magic' in here.
+ */
+ 
+int inet_stream_connect(struct socket *sock, struct sockaddr * uaddr,
+			int addr_len, int flags)
+{
+	struct sock *sk=sock->sk;
+	int err;
+
+	if(sock->state != SS_UNCONNECTED && sock->state != SS_CONNECTING) {
+		if(sock->state == SS_CONNECTED)
+			return -EISCONN;
+		return -EINVAL;
+	}
+
+	if(sock->state == SS_CONNECTING) {
+		/* Note: tcp_connected contains SYN_RECV, which may cause
+		   bogus results here. -AK */ 
+		if(tcp_connected(sk->state)) {
+			sock->state = SS_CONNECTED;
+			return 0;
+		}
+		if (sk->zapped || sk->err)
+			goto sock_error;
+		if (flags & O_NONBLOCK)
+			return -EALREADY;
+	} else {
+		if (sk->prot->connect == NULL) 
+			return(-EOPNOTSUPP);
+
+		/* We may need to bind the socket. */
+		if (inet_autobind(sk) != 0)
+			return(-EAGAIN);
+
+		err = sk->prot->connect(sk, uaddr, addr_len);
+		/* Note: there is a theoretical race here when an wake up
+		   occurred before inet_wait_for_connect is entered. In 2.3
+		   the wait queue setup should be moved before the low level
+		   connect call. -AK*/
+		if (err < 0)
+			return(err);
+  		sock->state = SS_CONNECTING;
+	}
+	
+	if (sk->state > TCP_FIN_WAIT2 && sock->state == SS_CONNECTING)
+		goto sock_error;
+
+	if (sk->state != TCP_ESTABLISHED && (flags & O_NONBLOCK)) 
+	  	return (-EINPROGRESS);
+
+	if (sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV) {
+		inet_wait_for_connect(sk);
+		if (signal_pending(current))
+			return -ERESTARTSYS;
+	}
+
+	sock->state = SS_CONNECTED;
+	if ((sk->state != TCP_ESTABLISHED) && sk->err)
+		goto sock_error; 
+	return(0);
+
+sock_error:	
+	/* This is ugly but needed to fix a race in the ICMP error handler */
+	if (sk->zapped && sk->state != TCP_CLOSE) { 
+		lock_sock(sk);  
+		tcp_set_state(sk, TCP_CLOSE);
+		release_sock(sk); 
+		sk->zapped = 0;
+	}
+	sock->state = SS_UNCONNECTED;
+	return sock_error(sk);
+}
+
+/*
+ *	Accept a pending connection. The TCP layer now gives BSD semantics.
+ */
+
+int inet_accept(struct socket *sock, struct socket *newsock, int flags)
+{
+	struct sock *sk1 = sock->sk, *sk2;
+	struct sock *newsk = newsock->sk;
+	int err = -EINVAL;
+
+	if (sock->state != SS_UNCONNECTED || !(sock->flags & SO_ACCEPTCON))
+		goto do_err;
+
+	err = -EOPNOTSUPP;
+	if (sk1->prot->accept == NULL)
+		goto do_err;
+
+	if((sk2 = sk1->prot->accept(sk1,flags)) == NULL)
+		goto do_sk1_err;
+
+	/*
+	 *	We've been passed an extra socket.
+	 *	We need to free it up because the tcp module creates
+	 *	its own when it accepts one.
+	 */
+	sk2->sleep = newsk->sleep;
+
+	newsock->sk = sk2;
+	sk2->socket = newsock;
+	newsk->socket = NULL;
+
+	if (flags & O_NONBLOCK)
+		goto do_half_success;
+
+	if(sk2->state == TCP_ESTABLISHED)
+		goto do_full_success;
+	if(sk2->err > 0)
+		goto do_connect_err;
+	err = -ECONNABORTED;
+	if (sk2->state == TCP_CLOSE)
+		goto do_bad_connection;
+do_full_success:
+	destroy_sock(newsk);
+	newsock->state = SS_CONNECTED;
+	return 0;
+
+do_half_success:
+	destroy_sock(newsk);
+	return(0);
+
+do_connect_err:
+	err = sock_error(sk2);
+do_bad_connection:
+	sk2->sleep = NULL;
+	sk2->socket = NULL;
+	destroy_sock(sk2);
+	newsock->sk = newsk;
+	newsk->socket = newsock;
+	return err;
+
+do_sk1_err:
+	err = sock_error(sk1);
+do_err:
+	return err;
+}
+
+
+/*
+ *	This does both peername and sockname.
+ */
+ 
+static int inet_getname(struct socket *sock, struct sockaddr *uaddr,
+		 int *uaddr_len, int peer)
+{
+	struct sock *sk		= sock->sk;
+	struct sockaddr_in *sin	= (struct sockaddr_in *)uaddr;
+  
+	sin->sin_family = AF_INET;
+	if (peer) {
+		if (!tcp_connected(sk->state)) 
+			return(-ENOTCONN);
+		sin->sin_port = sk->dport;
+		sin->sin_addr.s_addr = sk->daddr;
+	} else {
+		__u32 addr = sk->rcv_saddr;
+		if (!addr)
+			addr = sk->saddr;
+		sin->sin_port = sk->sport;
+		sin->sin_addr.s_addr = addr;
+	}
+	*uaddr_len = sizeof(*sin);
+	return(0);
+}
+
+
+
+int inet_recvmsg(struct socket *sock, struct msghdr *msg, int size,
+		 int flags, struct scm_cookie *scm)
+{
+	struct sock *sk = sock->sk;
+	int addr_len = 0;
+	int err;
+	
+	if (sock->flags & SO_ACCEPTCON)
+		return(-EINVAL);
+	if (sk->prot->recvmsg == NULL) 
+		return(-EOPNOTSUPP);
+	/* We may need to bind the socket. */
+	if (inet_autobind(sk) != 0)
+		return(-EAGAIN);
+	err = sk->prot->recvmsg(sk, msg, size, flags&MSG_DONTWAIT,
+				flags&~MSG_DONTWAIT, &addr_len);
+	if (err >= 0)
+		msg->msg_namelen = addr_len;
+	return err;
+}
+
+
+int inet_sendmsg(struct socket *sock, struct msghdr *msg, int size,
+		 struct scm_cookie *scm)
+{
+	struct sock *sk = sock->sk;
+
+	if (sk->shutdown & SEND_SHUTDOWN) {
+		if (!(msg->msg_flags&MSG_NOSIGNAL))
+			send_sig(SIGPIPE, current, 1);
+		return(-EPIPE);
+	}
+	if (sk->prot->sendmsg == NULL) 
+		return(-EOPNOTSUPP);
+	if(sk->err)
+		return sock_error(sk);
+
+	/* We may need to bind the socket. */
+	if (inet_autobind(sk) != 0)
+		return -EAGAIN;
+
+	return sk->prot->sendmsg(sk, msg, size);
+}
+
+
+int inet_shutdown(struct socket *sock, int how)
+{
+	struct sock *sk = sock->sk;
+
+	/* This should really check to make sure
+	 * the socket is a TCP socket. (WHY AC...)
+	 */
+	how++; /* maps 0->1 has the advantage of making bit 1 rcvs and
+		       1->2 bit 2 snds.
+		       2->3 */
+	if ((how & ~SHUTDOWN_MASK) || how==0)	/* MAXINT->0 */
+		return(-EINVAL);
+	if (!sk)
+		return(-ENOTCONN);
+	if (sock->state == SS_CONNECTING && sk->state == TCP_ESTABLISHED)
+		sock->state = SS_CONNECTED;
+	if (!tcp_connected(sk->state)) 
+		return(-ENOTCONN);
+	sk->shutdown |= how;
+	if (sk->prot->shutdown)
+		sk->prot->shutdown(sk, how);
+	/* Wake up anyone sleeping in poll. */
+	sk->state_change(sk);
+	return(0);
+}
+
+
+unsigned int inet_poll(struct file * file, struct socket *sock, poll_table *wait)
+{
+	struct sock *sk = sock->sk;
+
+	if (sk->prot->poll == NULL)
+		return(0);
+	return sk->prot->poll(file, sock, wait);
+}
+
+/*
+ *	ioctl() calls you can issue on an INET socket. Most of these are
+ *	device configuration and stuff and very rarely used. Some ioctls
+ *	pass on to the socket itself.
+ *
+ *	NOTE: I like the idea of a module for the config stuff. ie ifconfig
+ *	loads the devconfigure module does its configuring and unloads it.
+ *	There's a good 20K of config code hanging around the kernel.
+ */
+
+static int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+	struct sock *sk = sock->sk;
+	int err;
+	int pid;
+
+	switch(cmd) 
+	{
+		case FIOSETOWN:
+		case SIOCSPGRP:
+			err = get_user(pid, (int *) arg);
+			if (err)
+				return err; 
+			if (current->pid != pid && current->pgrp != -pid && 
+			    !capable(CAP_NET_ADMIN))
+				return -EPERM;
+			sk->proc = pid;
+			return(0);
+		case FIOGETOWN:
+		case SIOCGPGRP:
+			return put_user(sk->proc, (int *)arg);
+		case SIOCGSTAMP:
+			if(sk->stamp.tv_sec==0)
+				return -ENOENT;
+			err = copy_to_user((void *)arg,&sk->stamp,sizeof(struct timeval));
+			if (err)
+				err = -EFAULT;
+			return err;
+		case SIOCADDRT:
+		case SIOCDELRT:
+		case SIOCRTMSG:
+			return(ip_rt_ioctl(cmd,(void *) arg));
+		case SIOCDARP:
+		case SIOCGARP:
+		case SIOCSARP:
+			return(arp_ioctl(cmd,(void *) arg));
+		case SIOCDRARP:
+		case SIOCGRARP:
+		case SIOCSRARP:
+#ifdef CONFIG_KMOD
+			if (rarp_ioctl_hook == NULL)
+				request_module("rarp");
+#endif
+			if (rarp_ioctl_hook != NULL)
+				return(rarp_ioctl_hook(cmd,(void *) arg));
+		case SIOCGIFADDR:
+		case SIOCSIFADDR:
+		case SIOCGIFBRDADDR:
+		case SIOCSIFBRDADDR:
+		case SIOCGIFNETMASK:
+		case SIOCSIFNETMASK:
+		case SIOCGIFDSTADDR:
+		case SIOCSIFDSTADDR:
+		case SIOCSIFPFLAGS:	
+		case SIOCGIFPFLAGS:	
+		case SIOCSIFFLAGS:
+			return(devinet_ioctl(cmd,(void *) arg));
+		case SIOCGIFBR:
+		case SIOCSIFBR:
+#ifdef CONFIG_BRIDGE		
+			return(br_ioctl(cmd,(void *) arg));
+#else
+			return -ENOPKG;
+#endif						
+			
+		case SIOCADDDLCI:
+		case SIOCDELDLCI:
+#ifdef CONFIG_DLCI
+			return(dlci_ioctl(cmd, (void *) arg));
+#endif
+
+#ifdef CONFIG_DLCI_MODULE
+
+#ifdef CONFIG_KMOD
+			if (dlci_ioctl_hook == NULL)
+				request_module("dlci");
+#endif
+
+			if (dlci_ioctl_hook)
+				return((*dlci_ioctl_hook)(cmd, (void *) arg));
+#endif
+			return -ENOPKG;
+
+		default:
+			if ((cmd >= SIOCDEVPRIVATE) &&
+			    (cmd <= (SIOCDEVPRIVATE + 15)))
+				return(dev_ioctl(cmd,(void *) arg));
+
+#ifdef CONFIG_NET_RADIO
+			if((cmd >= SIOCIWFIRST) && (cmd <= SIOCIWLAST))
+				return(dev_ioctl(cmd,(void *) arg));
+#endif
+
+			if (sk->prot->ioctl==NULL || (err=sk->prot->ioctl(sk, cmd, arg))==-ENOIOCTLCMD)
+				return(dev_ioctl(cmd,(void *) arg));		
+			return err;
+	}
+	/*NOTREACHED*/
+	return(0);
+}
+
+struct proto_ops inet_stream_ops = {
+	PF_INET,
+
+	sock_no_dup,
+	inet_release,
+	inet_bind,
+	inet_stream_connect,
+	sock_no_socketpair,
+	inet_accept,
+	inet_getname, 
+	inet_poll,
+	inet_ioctl,
+	inet_listen,
+	inet_shutdown,
+	inet_setsockopt,
+	inet_getsockopt,
+	sock_no_fcntl,
+	inet_sendmsg,
+	inet_recvmsg
+};
+
+struct proto_ops inet_dgram_ops = {
+	PF_INET,
+
+	sock_no_dup,
+	inet_release,
+	inet_bind,
+	inet_dgram_connect,
+	sock_no_socketpair,
+	sock_no_accept,
+	inet_getname, 
+	datagram_poll,
+	inet_ioctl,
+	sock_no_listen,
+	inet_shutdown,
+	inet_setsockopt,
+	inet_getsockopt,
+	sock_no_fcntl,
+	inet_sendmsg,
+	inet_recvmsg
+};
+
+struct net_proto_family inet_family_ops = {
+	PF_INET,
+	inet_create
+};
+
+
+#ifdef CONFIG_PROC_FS
+#ifdef CONFIG_INET_RARP
+static struct proc_dir_entry proc_net_rarp = {
+	PROC_NET_RARP, 4, "rarp",
+	S_IFREG | S_IRUGO, 1, 0, 0,
+	0, &proc_net_inode_operations,
+	rarp_get_info
+};
+#endif		/* RARP */
+static struct proc_dir_entry proc_net_raw = {
+	PROC_NET_RAW, 3, "raw",
+	S_IFREG | S_IRUGO, 1, 0, 0,
+	0, &proc_net_inode_operations,
+	raw_get_info
+};
+static struct proc_dir_entry proc_net_netstat = {
+	PROC_NET_NETSTAT, 7, "netstat",
+	S_IFREG | S_IRUGO, 1, 0, 0,
+	0, &proc_net_inode_operations,
+	netstat_get_info
+};
+static struct proc_dir_entry proc_net_snmp = {
+	PROC_NET_SNMP, 4, "snmp",
+	S_IFREG | S_IRUGO, 1, 0, 0,
+	0, &proc_net_inode_operations,
+	snmp_get_info
+};
+static struct proc_dir_entry proc_net_sockstat = {
+	PROC_NET_SOCKSTAT, 8, "sockstat",
+	S_IFREG | S_IRUGO, 1, 0, 0,
+	0, &proc_net_inode_operations,
+	afinet_get_info
+};
+static struct proc_dir_entry proc_net_tcp = {
+	PROC_NET_TCP, 3, "tcp",
+	S_IFREG | S_IRUGO, 1, 0, 0,
+	0, &proc_net_inode_operations,
+	tcp_get_info
+};
+static struct proc_dir_entry proc_net_udp = {
+	PROC_NET_UDP, 3, "udp",
+	S_IFREG | S_IRUGO, 1, 0, 0,
+	0, &proc_net_inode_operations,
+	udp_get_info
+};
+#endif		/* CONFIG_PROC_FS */
+
+extern void tcp_init(void);
+extern void tcp_v4_init(struct net_proto_family *);
+
+
+/*
+ *	Called by socket.c on kernel startup.  
+ */
+ 
+__initfunc(void inet_proto_init(struct net_proto *pro))
+{
+	struct sk_buff *dummy_skb;
+	struct inet_protocol *p;
+
+	printk(KERN_INFO "NET4: Linux TCP/IP 1.0 for NET4.0\n");
+
+	if (sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb))
+	{
+		printk(KERN_CRIT "inet_proto_init: panic\n");
+		return;
+	}
+
+	/*
+	 *	Tell SOCKET that we are alive... 
+	 */
+   
+  	(void) sock_register(&inet_family_ops);
+
+	/*
+	 *	Add all the protocols. 
+	 */
+
+	printk(KERN_INFO "IP Protocols: ");
+	for(p = inet_protocol_base; p != NULL;) 
+	{
+		struct inet_protocol *tmp = (struct inet_protocol *) p->next;
+		inet_add_protocol(p);
+		printk("%s%s",p->name,tmp?", ":"\n");
+		p = tmp;
+	}
+
+	/*
+	 *	Set the ARP module up
+	 */
+
+	arp_init();
+
+  	/*
+  	 *	Set the IP module up
+  	 */
+
+	ip_init();
+
+	tcp_v4_init(&inet_family_ops);
+
+	/* Setup TCP slab cache for open requests. */
+	tcp_init();
+
+
+	/*
+	 *	Set the ICMP layer up
+	 */
+
+	icmp_init(&inet_family_ops);
+
+	/* I wish inet_add_protocol had no constructor hook...
+	   I had to move IPIP from net/ipv4/protocol.c :-( --ANK
+	 */
+#ifdef CONFIG_NET_IPIP
+	ipip_init();
+#endif
+#ifdef CONFIG_NET_IPGRE
+	ipgre_init();
+#endif
+
+	/*
+	 *	Set the firewalling up
+	 */
+#if defined(CONFIG_IP_FIREWALL)
+	ip_fw_init();
+#endif
+
+#ifdef CONFIG_IP_MASQUERADE
+	ip_masq_init();
+#endif
+	
+	/*
+	 *	Initialise the multicast router
+	 */
+#if defined(CONFIG_IP_MROUTE)
+	ip_mr_init();
+#endif
+
+#ifdef CONFIG_INET_RARP
+	rarp_ioctl_hook = rarp_ioctl;
+#endif
+	/*
+	 *	Create all the /proc entries.
+	 */
+
+#ifdef CONFIG_PROC_FS
+#ifdef CONFIG_INET_RARP
+	proc_net_register(&proc_net_rarp);
+#endif		/* RARP */
+	proc_net_register(&proc_net_raw);
+	proc_net_register(&proc_net_snmp);
+	proc_net_register(&proc_net_netstat);
+	proc_net_register(&proc_net_sockstat);
+	proc_net_register(&proc_net_tcp);
+	proc_net_register(&proc_net_udp);
+#endif		/* CONFIG_PROC_FS */
+}
diff --git a/pfinet/linux-src/net/ipv4/arp.c b/pfinet/linux-src/net/ipv4/arp.c
new file mode 100644
index 00000000..27d2f802
--- /dev/null
+++ b/pfinet/linux-src/net/ipv4/arp.c
@@ -0,0 +1,1154 @@
+/* linux/net/inet/arp.c
+ *
+ * Version:	$Id: arp.c,v 1.77.2.1 1999/06/28 10:39:23 davem Exp $
+ *
+ * Copyright (C) 1994 by Florian  La Roche
+ *
+ * This module implements the Address Resolution Protocol ARP (RFC 826),
+ * which is used to convert IP addresses (or in the future maybe other
+ * high-level addresses) into a low-level hardware address (like an Ethernet
+ * address).
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Fixes:
+ *		Alan Cox	:	Removed the Ethernet assumptions in 
+ *					Florian's code
+ *		Alan Cox	:	Fixed some small errors in the ARP 
+ *					logic
+ *		Alan Cox	:	Allow >4K in /proc
+ *		Alan Cox	:	Make ARP add its own protocol entry
+ *		Ross Martin     :       Rewrote arp_rcv() and arp_get_info()
+ *		Stephen Henson	:	Add AX25 support to arp_get_info()
+ *		Alan Cox	:	Drop data when a device is downed.
+ *		Alan Cox	:	Use init_timer().
+ *		Alan Cox	:	Double lock fixes.
+ *		Martin Seine	:	Move the arphdr structure
+ *					to if_arp.h for compatibility.
+ *					with BSD based programs.
+ *		Andrew Tridgell :       Added ARP netmask code and
+ *					re-arranged proxy handling.
+ *		Alan Cox	:	Changed to use notifiers.
+ *		Niibe Yutaka	:	Reply for this device or proxies only.
+ *		Alan Cox	:	Don't proxy across hardware types!
+ *		Jonathan Naylor :	Added support for NET/ROM.
+ *		Mike Shaver     :       RFC1122 checks.
+ *		Jonathan Naylor :	Only lookup the hardware address for
+ *					the correct hardware type.
+ *		Germano Caronni	:	Assorted subtle races.
+ *		Craig Schlenter :	Don't modify permanent entry 
+ *					during arp_rcv.
+ *		Russ Nelson	:	Tidied up a few bits.
+ *		Alexey Kuznetsov:	Major changes to caching and behaviour,
+ *					eg intelligent arp probing and 
+ *					generation
+ *					of host down events.
+ *		Alan Cox	:	Missing unlock in device events.
+ *		Eckes		:	ARP ioctl control errors.
+ *		Alexey Kuznetsov:	Arp free fix.
+ *		Manuel Rodriguez:	Gratuitous ARP.
+ *              Jonathan Layes  :       Added arpd support through kerneld 
+ *                                      message queue (960314)
+ *		Mike Shaver	:	/proc/sys/net/ipv4/arp_* support
+ *		Mike McLagan    :	Routing by source
+ *		Stuart Cheshire	:	Metricom and grat arp fixes
+ *					*** FOR 2.1 clean this up ***
+ *		Lawrence V. Stefani: (08/12/96) Added FDDI support.
+ *		Alan Cox 	:	Took the AP1000 nasty FDDI hack and
+ *					folded into the mainstream FDDI code.
+ *					Ack spit, Linus how did you allow that
+ *					one in...
+ *		Jes Sorensen	:	Make FDDI work again in 2.1.x and
+ *					clean up the APFDDI & gen. FDDI bits.
+ *		Alexey Kuznetsov:	new arp state machine;
+ *					now it is in net/core/neighbour.c.
+ */
+
+/* RFC1122 Status:
+   2.3.2.1 (ARP Cache Validation):
+     MUST provide mechanism to flush stale cache entries (OK)
+     SHOULD be able to configure cache timeout (OK)
+     MUST throttle ARP retransmits (OK)
+   2.3.2.2 (ARP Packet Queue):
+     SHOULD save at least one packet from each "conversation" with an
+       unresolved IP address.  (OK)
+   950727 -- MS
+*/
+      
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/config.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/errno.h>
+#include <linux/in.h>
+#include <linux/mm.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/fddidevice.h>
+#include <linux/if_arp.h>
+#include <linux/trdevice.h>
+#include <linux/skbuff.h>
+#include <linux/proc_fs.h>
+#include <linux/stat.h>
+#include <linux/init.h>
+#ifdef CONFIG_SYSCTL
+#include <linux/sysctl.h>
+#endif
+
+#include <net/ip.h>
+#include <net/icmp.h>
+#include <net/route.h>
+#include <net/protocol.h>
+#include <net/tcp.h>
+#include <net/sock.h>
+#include <net/arp.h>
+#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
+#include <net/ax25.h>
+#if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE)
+#include <net/netrom.h>
+#endif
+#endif
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+/*
+ *	Interface to generic neighbour cache.
+ */
+static int arp_constructor(struct neighbour *neigh);
+static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb);
+static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb);
+static void parp_redo(struct sk_buff *skb);
+
+static struct neigh_ops arp_generic_ops =
+{
+	AF_INET,
+	NULL,
+	arp_solicit,
+	arp_error_report,
+	neigh_resolve_output,
+	neigh_connected_output,
+	dev_queue_xmit,
+	dev_queue_xmit
+};
+
+static struct neigh_ops arp_hh_ops =
+{
+	AF_INET,
+	NULL,
+	arp_solicit,
+	arp_error_report,
+	neigh_resolve_output,
+	neigh_resolve_output,
+	dev_queue_xmit,
+	dev_queue_xmit
+};
+
+static struct neigh_ops arp_direct_ops =
+{
+	AF_INET,
+	NULL,
+	NULL,
+	NULL,
+	dev_queue_xmit,
+	dev_queue_xmit,
+	dev_queue_xmit,
+	dev_queue_xmit
+};
+
+struct neigh_ops arp_broken_ops =
+{
+	AF_INET,
+	NULL,
+	arp_solicit,
+	arp_error_report,
+	neigh_compat_output,
+	neigh_compat_output,
+	dev_queue_xmit,
+	dev_queue_xmit,
+};
+
+struct neigh_table arp_tbl =
+{
+	NULL,
+	AF_INET,
+	sizeof(struct neighbour) + 4,
+	4,
+	arp_constructor,
+	NULL,
+	NULL,
+	parp_redo,
+        { NULL, NULL, &arp_tbl, 0, NULL, NULL,
+		  30*HZ, 1*HZ, 60*HZ, 30*HZ, 5*HZ, 3, 3, 0, 3, 1*HZ, (8*HZ)/10, 64, 1*HZ },
+	30*HZ, 128, 512, 1024,
+};
+
+int arp_mc_map(u32 addr, u8 *haddr, struct device *dev, int dir)
+{
+	switch (dev->type) {
+	case ARPHRD_ETHER:
+	case ARPHRD_IEEE802:
+	case ARPHRD_FDDI:
+		ip_eth_mc_map(addr, haddr);
+		return 0;
+	default:
+		if (dir) {
+			memcpy(haddr, dev->broadcast, dev->addr_len);
+			return 0;
+		}
+	}
+	return -EINVAL;
+}
+
+
+
+static int arp_constructor(struct neighbour *neigh)
+{
+	u32 addr = *(u32*)neigh->primary_key;
+	struct device *dev = neigh->dev;
+	struct in_device *in_dev = dev->ip_ptr;
+
+	if (in_dev == NULL)
+		return -EINVAL;
+
+	neigh->type = inet_addr_type(addr);
+	if (in_dev->arp_parms)
+		neigh->parms = in_dev->arp_parms;
+
+	if (dev->hard_header == NULL) {
+		neigh->nud_state = NUD_NOARP;
+		neigh->ops = &arp_direct_ops;
+		neigh->output = neigh->ops->queue_xmit;
+	} else {
+		/* Good devices (checked by reading texts, but only Ethernet is
+		   tested)
+
+		   ARPHRD_ETHER: (ethernet, apfddi)
+		   ARPHRD_FDDI: (fddi)
+		   ARPHRD_IEEE802: (tr)
+		   ARPHRD_METRICOM: (strip)
+		   ARPHRD_ARCNET:
+		   etc. etc. etc.
+
+		   ARPHRD_IPDDP will also work, if author repairs it.
+		   I did not it, because this driver does not work even
+		   in old paradigm.
+		 */
+
+#if 1
+		/* So... these "amateur" devices are hopeless.
+		   The only thing, that I can say now:
+		   It is very sad that we need to keep ugly obsolete
+		   code to make them happy.
+
+		   They should be moved to more reasonable state, now
+		   they use rebuild_header INSTEAD OF hard_start_xmit!!!
+		   Besides that, they are sort of out of date
+		   (a lot of redundant clones/copies, useless in 2.1),
+		   I wonder why people believe that they work.
+		 */
+		switch (dev->type) {
+		default:
+			break;
+		case ARPHRD_ROSE:	
+#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
+		case ARPHRD_AX25:
+#if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE)
+		case ARPHRD_NETROM:
+#endif
+			neigh->ops = &arp_broken_ops;
+			neigh->output = neigh->ops->output;
+			return 0;
+#endif
+		}
+#endif
+		if (neigh->type == RTN_MULTICAST) {
+			neigh->nud_state = NUD_NOARP;
+			arp_mc_map(addr, neigh->ha, dev, 1);
+		} else if (dev->flags&(IFF_NOARP|IFF_LOOPBACK)) {
+			neigh->nud_state = NUD_NOARP;
+			memcpy(neigh->ha, dev->dev_addr, dev->addr_len);
+		} else if (neigh->type == RTN_BROADCAST || dev->flags&IFF_POINTOPOINT) {
+			neigh->nud_state = NUD_NOARP;
+			memcpy(neigh->ha, dev->broadcast, dev->addr_len);
+		}
+		if (dev->hard_header_cache)
+			neigh->ops = &arp_hh_ops;
+		else
+			neigh->ops = &arp_generic_ops;
+		if (neigh->nud_state&NUD_VALID)
+			neigh->output = neigh->ops->connected_output;
+		else
+			neigh->output = neigh->ops->output;
+	}
+
+	return 0;
+}
+
+static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb)
+{
+	dst_link_failure(skb);
+	kfree_skb(skb);
+}
+
+static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
+{
+	u32 saddr;
+	u8  *dst_ha = NULL;
+	struct device *dev = neigh->dev;
+	u32 target = *(u32*)neigh->primary_key;
+	int probes = neigh->probes;
+
+	if (skb && inet_addr_type(skb->nh.iph->saddr) == RTN_LOCAL)
+		saddr = skb->nh.iph->saddr;
+	else
+		saddr = inet_select_addr(dev, target, RT_SCOPE_LINK);
+
+	if ((probes -= neigh->parms->ucast_probes) < 0) {
+		if (!(neigh->nud_state&NUD_VALID))
+			printk(KERN_DEBUG "trying to ucast probe in NUD_INVALID\n");
+		dst_ha = neigh->ha;
+	} else if ((probes -= neigh->parms->app_probes) < 0) {
+#ifdef CONFIG_ARPD
+		neigh_app_ns(neigh);
+#endif
+		return;
+	}
+
+	arp_send(ARPOP_REQUEST, ETH_P_ARP, target, dev, saddr,
+		 dst_ha, dev->dev_addr, NULL);
+}
+
+/* OBSOLETE FUNCTIONS */
+
+/*
+ *	Find an arp mapping in the cache. If not found, post a request.
+ *
+ *	It is very UGLY routine: it DOES NOT use skb->dst->neighbour,
+ *	even if it exists. It is supposed that skb->dev was mangled
+ *	by a virtual device (eql, shaper). Nobody but broken devices
+ *	is allowed to use this function, it is scheduled to be removed. --ANK
+ */
+
+static int arp_set_predefined(int addr_hint, unsigned char * haddr, u32 paddr, struct device * dev)
+{
+	switch (addr_hint) {
+	case RTN_LOCAL:
+		printk(KERN_DEBUG "ARP: arp called for own IP address\n");
+		memcpy(haddr, dev->dev_addr, dev->addr_len);
+		return 1;
+	case RTN_MULTICAST:
+		arp_mc_map(paddr, haddr, dev, 1);
+		return 1;
+	case RTN_BROADCAST:
+		memcpy(haddr, dev->broadcast, dev->addr_len);
+		return 1;
+	}
+	return 0;
+}
+
+
+int arp_find(unsigned char *haddr, struct sk_buff *skb)
+{
+	struct device *dev = skb->dev;
+	u32 paddr;
+	struct neighbour *n;
+
+	if (!skb->dst) {
+		printk(KERN_DEBUG "arp_find is called with dst==NULL\n");
+		kfree_skb(skb);
+		return 1;
+	}
+
+	paddr = ((struct rtable*)skb->dst)->rt_gateway;
+
+	if (arp_set_predefined(inet_addr_type(paddr), haddr, paddr, dev))
+		return 0;
+
+	start_bh_atomic();
+	n = __neigh_lookup(&arp_tbl, &paddr, dev, 1);
+
+	if (n) {
+		n->used = jiffies;
+		if (n->nud_state&NUD_VALID || neigh_event_send(n, skb) == 0) {
+			memcpy(haddr, n->ha, dev->addr_len);
+			neigh_release(n);
+			end_bh_atomic();
+			return 0;
+		}
+		neigh_release(n);
+	} else
+		kfree_skb(skb);
+	end_bh_atomic();
+	return 1;
+}
+
+/* END OF OBSOLETE FUNCTIONS */
+
+/*
+ * Note: requires bh_atomic locking.
+ */
+int arp_bind_neighbour(struct dst_entry *dst)
+{
+	struct device *dev = dst->dev;
+
+	if (dev == NULL)
+		return 0;
+	if (dst->neighbour == NULL) {
+		u32 nexthop = ((struct rtable*)dst)->rt_gateway;
+		if (dev->flags&(IFF_LOOPBACK|IFF_POINTOPOINT))
+			nexthop = 0;
+		dst->neighbour = __neigh_lookup(&arp_tbl, &nexthop, dev, 1);
+	}
+	return (dst->neighbour != NULL);
+}
+
+/*
+ *	Interface to link layer: send routine and receive handler.
+ */
+
+/*
+ *	Create and send an arp packet. If (dest_hw == NULL), we create a broadcast
+ *	message.
+ */
+
+void arp_send(int type, int ptype, u32 dest_ip, 
+	      struct device *dev, u32 src_ip, 
+	      unsigned char *dest_hw, unsigned char *src_hw,
+	      unsigned char *target_hw)
+{
+	struct sk_buff *skb;
+	struct arphdr *arp;
+	unsigned char *arp_ptr;
+
+	/*
+	 *	No arp on this interface.
+	 */
+	
+	if (dev->flags&IFF_NOARP)
+		return;
+
+	/*
+	 *	Allocate a buffer
+	 */
+	
+	skb = alloc_skb(sizeof(struct arphdr)+ 2*(dev->addr_len+4)
+				+ dev->hard_header_len + 15, GFP_ATOMIC);
+	if (skb == NULL)
+		return;
+
+	skb_reserve(skb, (dev->hard_header_len+15)&~15);
+	skb->nh.raw = skb->data;
+	arp = (struct arphdr *) skb_put(skb,sizeof(struct arphdr) + 2*(dev->addr_len+4));
+	skb->dev = dev;
+	skb->protocol = __constant_htons (ETH_P_ARP);
+	if (src_hw == NULL)
+		src_hw = dev->dev_addr;
+	if (dest_hw == NULL)
+		dest_hw = dev->broadcast;
+
+	/*
+	 *	Fill the device header for the ARP frame
+	 */
+	dev->hard_header(skb,dev,ptype,dest_hw,src_hw,skb->len);
+
+	/*
+	 * Fill out the arp protocol part.
+	 *
+	 * The arp hardware type should match the device type, except for FDDI,
+	 * which (according to RFC 1390) should always equal 1 (Ethernet).
+	 */
+	/*
+	 *	Exceptions everywhere. AX.25 uses the AX.25 PID value not the
+	 *	DIX code for the protocol. Make these device structure fields.
+	 */
+	switch (dev->type) {
+	default:
+		arp->ar_hrd = htons(dev->type);
+		arp->ar_pro = __constant_htons(ETH_P_IP);
+		break;
+
+#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
+	case ARPHRD_AX25:
+		arp->ar_hrd = __constant_htons(ARPHRD_AX25);
+		arp->ar_pro = __constant_htons(AX25_P_IP);
+		break;
+
+#if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE)
+	case ARPHRD_NETROM:
+		arp->ar_hrd = __constant_htons(ARPHRD_NETROM);
+		arp->ar_pro = __constant_htons(AX25_P_IP);
+		break;
+#endif
+#endif
+
+#ifdef CONFIG_FDDI
+	case ARPHRD_FDDI:
+		arp->ar_hrd = __constant_htons(ARPHRD_ETHER);
+		arp->ar_pro = __constant_htons(ETH_P_IP);
+		break;
+#endif
+	}
+
+	arp->ar_hln = dev->addr_len;
+	arp->ar_pln = 4;
+	arp->ar_op = htons(type);
+
+	arp_ptr=(unsigned char *)(arp+1);
+
+	memcpy(arp_ptr, src_hw, dev->addr_len);
+	arp_ptr+=dev->addr_len;
+	memcpy(arp_ptr, &src_ip,4);
+	arp_ptr+=4;
+	if (target_hw != NULL)
+		memcpy(arp_ptr, target_hw, dev->addr_len);
+	else
+		memset(arp_ptr, 0, dev->addr_len);
+	arp_ptr+=dev->addr_len;
+	memcpy(arp_ptr, &dest_ip, 4);
+	skb->dev = dev;
+
+	dev_queue_xmit(skb);
+}
+
+static void parp_redo(struct sk_buff *skb)
+{
+	arp_rcv(skb, skb->dev, NULL);
+}
+
+/*
+ *	Receive an arp request by the device layer.
+ */
+
+int arp_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
+{
+	struct arphdr *arp = skb->nh.arph;
+	unsigned char *arp_ptr= (unsigned char *)(arp+1);
+	struct rtable *rt;
+	unsigned char *sha, *tha;
+	u32 sip, tip;
+	u16 dev_type = dev->type;
+	int addr_type;
+	struct in_device *in_dev = dev->ip_ptr;
+	struct neighbour *n;
+
+/*
+ *	The hardware length of the packet should match the hardware length
+ *	of the device.  Similarly, the hardware types should match.  The
+ *	device should be ARP-able.  Also, if pln is not 4, then the lookup
+ *	is not from an IP number.  We can't currently handle this, so toss
+ *	it. 
+ */  
+	if (in_dev == NULL ||
+	    arp->ar_hln != dev->addr_len    || 
+	    dev->flags & IFF_NOARP ||
+	    skb->pkt_type == PACKET_OTHERHOST ||
+	    skb->pkt_type == PACKET_LOOPBACK ||
+	    arp->ar_pln != 4)
+		goto out;
+
+	switch (dev_type) {
+	default:	
+		if (arp->ar_pro != __constant_htons(ETH_P_IP))
+			goto out;
+		if (htons(dev_type) != arp->ar_hrd)
+			goto out;
+		break;
+#ifdef CONFIG_NET_ETHERNET
+	case ARPHRD_ETHER:
+		/*
+		 * ETHERNET devices will accept ARP hardware types of either
+		 * 1 (Ethernet) or 6 (IEEE 802.2).
+		 */
+		if (arp->ar_hrd != __constant_htons(ARPHRD_ETHER) &&
+		    arp->ar_hrd != __constant_htons(ARPHRD_IEEE802))
+			goto out;
+		if (arp->ar_pro != __constant_htons(ETH_P_IP))
+			goto out;
+		break;
+#endif
+#ifdef CONFIG_FDDI
+	case ARPHRD_FDDI:
+		/*
+		 * According to RFC 1390, FDDI devices should accept ARP hardware types
+		 * of 1 (Ethernet).  However, to be more robust, we'll accept hardware
+		 * types of either 1 (Ethernet) or 6 (IEEE 802.2).
+		 */
+		if (arp->ar_hrd != __constant_htons(ARPHRD_ETHER) &&
+		    arp->ar_hrd != __constant_htons(ARPHRD_IEEE802))
+			goto out;
+		if (arp->ar_pro != __constant_htons(ETH_P_IP))
+			goto out;
+		break;
+#endif
+#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
+	case ARPHRD_AX25:
+		if (arp->ar_pro != __constant_htons(AX25_P_IP))
+			goto out;
+		if (arp->ar_hrd != __constant_htons(ARPHRD_AX25))
+			goto out;
+		break;
+#if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE)
+	case ARPHRD_NETROM:
+		if (arp->ar_pro != __constant_htons(AX25_P_IP))
+			goto out;
+		if (arp->ar_hrd != __constant_htons(ARPHRD_NETROM))
+			goto out;
+		break;
+#endif
+#endif
+	}
+
+	/* Undertsand only these message types */
+
+	if (arp->ar_op != __constant_htons(ARPOP_REPLY) &&
+	    arp->ar_op != __constant_htons(ARPOP_REQUEST))
+		goto out;
+
+/*
+ *	Extract fields
+ */
+	sha=arp_ptr;
+	arp_ptr += dev->addr_len;
+	memcpy(&sip, arp_ptr, 4);
+	arp_ptr += 4;
+	tha=arp_ptr;
+	arp_ptr += dev->addr_len;
+	memcpy(&tip, arp_ptr, 4);
+/* 
+ *	Check for bad requests for 127.x.x.x and requests for multicast
+ *	addresses.  If this is one such, delete it.
+ */
+	if (LOOPBACK(tip) || MULTICAST(tip))
+		goto out;
+
+/*
+ *  Process entry.  The idea here is we want to send a reply if it is a
+ *  request for us or if it is a request for someone else that we hold
+ *  a proxy for.  We want to add an entry to our cache if it is a reply
+ *  to us or if it is a request for our address.  
+ *  (The assumption for this last is that if someone is requesting our 
+ *  address, they are probably intending to talk to us, so it saves time 
+ *  if we cache their address.  Their address is also probably not in 
+ *  our cache, since ours is not in their cache.)
+ * 
+ *  Putting this another way, we only care about replies if they are to
+ *  us, in which case we add them to the cache.  For requests, we care
+ *  about those for us and those for our proxies.  We reply to both,
+ *  and in the case of requests for us we add the requester to the arp 
+ *  cache.
+ */
+
+	/* Special case: IPv4 duplicate address detection packet (RFC2131) */
+	if (sip == 0) {
+		if (arp->ar_op == __constant_htons(ARPOP_REQUEST) &&
+		    inet_addr_type(tip) == RTN_LOCAL)
+			arp_send(ARPOP_REPLY,ETH_P_ARP,tip,dev,tip,sha,dev->dev_addr,dev->dev_addr);
+		goto out;
+	}
+
+	if (arp->ar_op == __constant_htons(ARPOP_REQUEST) &&
+	    ip_route_input(skb, tip, sip, 0, dev) == 0) {
+
+		rt = (struct rtable*)skb->dst;
+		addr_type = rt->rt_type;
+
+		if (addr_type == RTN_LOCAL) {
+			n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
+			if (n) {
+				arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha);
+				neigh_release(n);
+			}
+			goto out;
+		} else if (IN_DEV_FORWARD(in_dev)) {
+			if ((rt->rt_flags&RTCF_DNAT) ||
+			    (addr_type == RTN_UNICAST  && rt->u.dst.dev != dev &&
+			     (IN_DEV_PROXY_ARP(in_dev) || pneigh_lookup(&arp_tbl, &tip, dev, 0)))) {
+				n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
+				neigh_release(n);
+
+				if (skb->stamp.tv_sec == 0 ||
+				    skb->pkt_type == PACKET_HOST ||
+				    in_dev->arp_parms->proxy_delay == 0) {
+					arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha);
+				} else {
+					pneigh_enqueue(&arp_tbl, in_dev->arp_parms, skb);
+					return 0;
+				}
+				goto out;
+			}
+		}
+	}
+
+	/* Update our ARP tables */
+
+	n = __neigh_lookup(&arp_tbl, &sip, dev, 0);
+
+#ifdef CONFIG_IP_ACCEPT_UNSOLICITED_ARP
+	/* Unsolicited ARP is not accepted by default.
+	   It is possible, that this option should be enabled for some
+	   devices (strip is candidate)
+	 */
+	if (n == NULL &&
+	    arp->ar_op == __constant_htons(ARPOP_REPLY) &&
+	    inet_addr_type(sip) == RTN_UNICAST)
+		n = __neigh_lookup(&arp_tbl, &sip, dev, -1);
+#endif
+
+	if (n) {
+		int state = NUD_REACHABLE;
+		int override = 0;
+
+		/* If several different ARP replies follows back-to-back,
+		   use the FIRST one. It is possible, if several proxy
+		   agents are active. Taking the first reply prevents
+		   arp trashing and chooses the fastest router.
+		 */
+		if (jiffies - n->updated >= n->parms->locktime)
+			override = 1;
+
+		/* Broadcast replies and request packets
+		   do not assert neighbour reachability.
+		 */
+		if (arp->ar_op != __constant_htons(ARPOP_REPLY) ||
+		    skb->pkt_type != PACKET_HOST)
+			state = NUD_STALE;
+		neigh_update(n, sha, state, override, 1);
+		neigh_release(n);
+	}
+
+out:
+	kfree_skb(skb);
+	return 0;
+}
+
+
+
+/*
+ *	User level interface (ioctl, /proc)
+ */
+
+/*
+ *	Set (create) an ARP cache entry.
+ */
+
+int arp_req_set(struct arpreq *r, struct device * dev)
+{
+	u32 ip = ((struct sockaddr_in *) &r->arp_pa)->sin_addr.s_addr;
+	struct neighbour *neigh;
+	int err;
+
+	if (r->arp_flags&ATF_PUBL) {
+		u32 mask = ((struct sockaddr_in *) &r->arp_netmask)->sin_addr.s_addr;
+		if (mask && mask != 0xFFFFFFFF)
+			return -EINVAL;
+		if (!dev && (r->arp_flags & ATF_COM)) {
+			dev = dev_getbyhwaddr(r->arp_ha.sa_family, r->arp_ha.sa_data);
+			if (!dev)
+				return -ENODEV;
+		}
+		if (mask) {
+			if (pneigh_lookup(&arp_tbl, &ip, dev, 1) == NULL)
+				return -ENOBUFS;
+			return 0;
+		}
+		if (dev == NULL) {
+			ipv4_devconf.proxy_arp = 1;
+			return 0;
+		}
+		if (dev->ip_ptr) {
+			((struct in_device*)dev->ip_ptr)->cnf.proxy_arp = 1;
+			return 0;
+		}
+		return -ENXIO;
+	}
+
+	if (r->arp_flags & ATF_PERM)
+		r->arp_flags |= ATF_COM;
+	if (dev == NULL) {
+		struct rtable * rt;
+		if ((err = ip_route_output(&rt, ip, 0, RTO_ONLINK, 0)) != 0)
+			return err;
+		dev = rt->u.dst.dev;
+		ip_rt_put(rt);
+		if (!dev)
+			return -EINVAL;
+	}
+	if (r->arp_ha.sa_family != dev->type)	
+		return -EINVAL;
+
+	err = -ENOBUFS;
+	start_bh_atomic();
+	neigh = __neigh_lookup(&arp_tbl, &ip, dev, 1);
+	if (neigh) {
+		unsigned state = NUD_STALE;
+		if (r->arp_flags & ATF_PERM)
+			state = NUD_PERMANENT;
+		err = neigh_update(neigh, (r->arp_flags&ATF_COM) ?
+				   r->arp_ha.sa_data : NULL, state, 1, 0);
+		neigh_release(neigh);
+	}
+	end_bh_atomic();
+	return err;
+}
+
+static unsigned arp_state_to_flags(struct neighbour *neigh)
+{
+	unsigned flags = 0;
+	if (neigh->nud_state&NUD_PERMANENT)
+		flags = ATF_PERM|ATF_COM;
+	else if (neigh->nud_state&NUD_VALID)
+		flags = ATF_COM;
+	return flags;
+}
+
+/*
+ *	Get an ARP cache entry.
+ */
+
+static int arp_req_get(struct arpreq *r, struct device *dev)
+{
+	u32 ip = ((struct sockaddr_in *) &r->arp_pa)->sin_addr.s_addr;
+	struct neighbour *neigh;
+	int err = -ENXIO;
+
+	start_bh_atomic();
+	neigh = __neigh_lookup(&arp_tbl, &ip, dev, 0);
+	if (neigh) {
+		memcpy(r->arp_ha.sa_data, neigh->ha, dev->addr_len);
+		r->arp_ha.sa_family = dev->type;
+		strncpy(r->arp_dev, dev->name, sizeof(r->arp_dev));
+		r->arp_flags = arp_state_to_flags(neigh);
+		neigh_release(neigh);
+		err = 0;
+	}
+	end_bh_atomic();
+	return err;
+}
+
+int arp_req_delete(struct arpreq *r, struct device * dev)
+{
+	int err;
+	u32 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
+	struct neighbour *neigh;
+
+	if (r->arp_flags & ATF_PUBL) {
+		u32 mask = ((struct sockaddr_in *) &r->arp_netmask)->sin_addr.s_addr;
+		if (mask == 0xFFFFFFFF)
+			return pneigh_delete(&arp_tbl, &ip, dev);
+		if (mask == 0) {
+			if (dev == NULL) {
+				ipv4_devconf.proxy_arp = 0;
+				return 0;
+			}
+			if (dev->ip_ptr) {
+				((struct in_device*)dev->ip_ptr)->cnf.proxy_arp = 0;
+				return 0;
+			}
+			return -ENXIO;
+		}
+		return -EINVAL;
+	}
+
+	if (dev == NULL) {
+		struct rtable * rt;
+		if ((err = ip_route_output(&rt, ip, 0, RTO_ONLINK, 0)) != 0)
+			return err;
+		dev = rt->u.dst.dev;
+		ip_rt_put(rt);
+		if (!dev)
+			return -EINVAL;
+	}
+	err = -ENXIO;
+	start_bh_atomic();
+	neigh = __neigh_lookup(&arp_tbl, &ip, dev, 0);
+	if (neigh) {
+		if (neigh->nud_state&~NUD_NOARP)
+			err = neigh_update(neigh, NULL, NUD_FAILED, 1, 0);
+		neigh_release(neigh);
+	}
+	end_bh_atomic();
+	return err;
+}
+
+/*
+ *	Handle an ARP layer I/O control request.
+ */
+
+int arp_ioctl(unsigned int cmd, void *arg)
+{
+	int err;
+	struct arpreq r;
+	struct device * dev = NULL;
+
+	switch(cmd) {
+		case SIOCDARP:
+		case SIOCSARP:
+			if (!capable(CAP_NET_ADMIN))
+				return -EPERM;
+		case SIOCGARP:
+			err = copy_from_user(&r, arg, sizeof(struct arpreq));
+			if (err)
+				return -EFAULT;
+			break;
+		default:
+			return -EINVAL;
+	}
+
+	if (r.arp_pa.sa_family != AF_INET)
+		return -EPFNOSUPPORT;
+
+	if (!(r.arp_flags & ATF_PUBL) &&
+	    (r.arp_flags & (ATF_NETMASK|ATF_DONTPUB)))
+		return -EINVAL;
+	if (!(r.arp_flags & ATF_NETMASK))
+		((struct sockaddr_in *)&r.arp_netmask)->sin_addr.s_addr=__constant_htonl(0xFFFFFFFFUL);
+
+	rtnl_lock();
+	if (r.arp_dev[0]) {
+		err = -ENODEV;
+		if ((dev = dev_get(r.arp_dev)) == NULL)
+			goto out;
+
+		/* Mmmm... It is wrong... ARPHRD_NETROM==0 */
+		if (!r.arp_ha.sa_family)
+			r.arp_ha.sa_family = dev->type;
+		err = -EINVAL;
+		if ((r.arp_flags & ATF_COM) && r.arp_ha.sa_family != dev->type)
+			goto out;
+	} else if (cmd == SIOCGARP) {
+		err = -ENODEV;
+		goto out;
+	}
+
+	switch(cmd) {
+	case SIOCDARP:
+	        err = arp_req_delete(&r, dev);
+		break;
+	case SIOCSARP:
+		err = arp_req_set(&r, dev);
+		break;
+	case SIOCGARP:
+		err = arp_req_get(&r, dev);
+		if (!err && copy_to_user(arg, &r, sizeof(r)))
+			err = -EFAULT;
+		break;
+	}
+out:
+	rtnl_unlock();
+	return err;
+}
+
+/*
+ *	Write the contents of the ARP cache to a PROCfs file.
+ */
+#ifdef CONFIG_PROC_FS
+
+#define HBUFFERLEN 30
+
+int arp_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
+{
+	int len=0;
+	off_t pos=0;
+	int size;
+	char hbuffer[HBUFFERLEN];
+	int i,j,k;
+	const char hexbuf[] =  "0123456789ABCDEF";
+
+	size = sprintf(buffer,"IP address       HW type     Flags       HW address            Mask     Device\n");
+
+	pos+=size;
+	len+=size;
+
+	neigh_table_lock(&arp_tbl);
+
+	for(i=0; i<=NEIGH_HASHMASK; i++)	{
+		struct neighbour *n;
+		for (n=arp_tbl.hash_buckets[i]; n; n=n->next) {
+			struct device *dev = n->dev;
+			int hatype = dev->type;
+
+			/* Do not confuse users "arp -a" with magic entries */
+			if (!(n->nud_state&~NUD_NOARP))
+				continue;
+
+			/* I'd get great pleasure deleting
+			   this ugly code. Let's output it in hexadecimal format.
+			   "arp" utility will eventually repaired  --ANK
+			 */
+#if 1 /* UGLY CODE */
+/*
+ *	Convert hardware address to XX:XX:XX:XX ... form.
+ */
+#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
+			if (hatype == ARPHRD_AX25 || hatype == ARPHRD_NETROM)
+			     strcpy(hbuffer,ax2asc((ax25_address *)n->ha));
+			else {
+#endif
+			for (k=0,j=0;k<HBUFFERLEN-3 && j<dev->addr_len;j++) {
+				hbuffer[k++]=hexbuf[(n->ha[j]>>4)&15 ];
+				hbuffer[k++]=hexbuf[n->ha[j]&15     ];
+				hbuffer[k++]=':';
+			}
+			hbuffer[--k]=0;
+	
+#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
+		}
+#endif
+#else
+			if ((neigh->nud_state&NUD_VALID) && dev->addr_len) {
+				int j;
+				for (j=0; j < dev->addr_len; j++)
+					sprintf(hbuffer+2*j, "%02x", neigh->ha[j]);
+			} else
+				sprintf(hbuffer, "0");
+#endif
+
+			size = sprintf(buffer+len,
+				"%-17s0x%-10x0x%-10x%s",
+				in_ntoa(*(u32*)n->primary_key),
+				hatype,
+				arp_state_to_flags(n), 
+				hbuffer);
+			size += sprintf(buffer+len+size,
+				 "     %-17s %s\n",
+				 "*", dev->name);
+
+			len += size;
+			pos += size;
+		  
+			if (pos <= offset)
+				len=0;
+			if (pos >= offset+length)
+				goto done;
+		}
+	}
+
+	for (i=0; i<=PNEIGH_HASHMASK; i++) {
+		struct pneigh_entry *n;
+		for (n=arp_tbl.phash_buckets[i]; n; n=n->next) {
+			struct device *dev = n->dev;
+			int hatype = dev ? dev->type : 0;
+
+			size = sprintf(buffer+len,
+				"%-17s0x%-10x0x%-10x%s",
+				in_ntoa(*(u32*)n->key),
+				hatype,
+ 				ATF_PUBL|ATF_PERM,
+				"00:00:00:00:00:00");
+			size += sprintf(buffer+len+size,
+				 "     %-17s %s\n",
+				 "*", dev ? dev->name : "*");
+
+			len += size;
+			pos += size;
+		  
+			if (pos <= offset)
+				len=0;
+			if (pos >= offset+length)
+				goto done;
+		}
+	}
+
+done:
+	neigh_table_unlock(&arp_tbl);
+  
+	*start = buffer+len-(pos-offset);	/* Start of wanted data */
+	len = pos-offset;			/* Start slop */
+	if (len>length)
+		len = length;			/* Ending slop */
+	if (len<0)
+		len = 0;
+	return len;
+}
+#endif
+
+/* Note, that it is not on notifier chain.
+   It is necessary, that this routine was called after route cache will be
+   flushed.
+ */
+void arp_ifdown(struct device *dev)
+{
+	neigh_ifdown(&arp_tbl, dev);
+}
+
+
+/*
+ *	Called once on startup.
+ */
+
+static struct packet_type arp_packet_type =
+{
+	__constant_htons(ETH_P_ARP),
+	NULL,		/* All devices */
+	arp_rcv,
+	NULL,
+	NULL
+};
+
+#ifdef CONFIG_PROC_FS
+static struct proc_dir_entry proc_net_arp = {
+	PROC_NET_ARP, 3, "arp",
+	S_IFREG | S_IRUGO, 1, 0, 0,
+	0, &proc_net_inode_operations,
+	arp_get_info
+};
+#endif
+
+__initfunc(void arp_init (void))
+{
+	neigh_table_init(&arp_tbl);
+
+	dev_add_pack(&arp_packet_type);
+
+#ifdef CONFIG_PROC_FS
+	proc_net_register(&proc_net_arp);
+#endif
+#ifdef CONFIG_SYSCTL
+	neigh_sysctl_register(NULL, &arp_tbl.parms, NET_IPV4, NET_IPV4_NEIGH, "ipv4");
+#endif
+}
+
+
+#ifdef CONFIG_AX25_MODULE
+
+/*
+ *	ax25 -> ASCII conversion
+ */
+char *ax2asc(ax25_address *a)
+{
+	static char buf[11];
+	char c, *s;
+	int n;
+
+	for (n = 0, s = buf; n < 6; n++) {
+		c = (a->ax25_call[n] >> 1) & 0x7F;
+
+		if (c != ' ') *s++ = c;
+	}
+	
+	*s++ = '-';
+
+	if ((n = ((a->ax25_call[6] >> 1) & 0x0F)) > 9) {
+		*s++ = '1';
+		n -= 10;
+	}
+	
+	*s++ = n + '0';
+	*s++ = '\0';
+
+	if (*buf == '\0' || *buf == '-')
+	   return "*";
+
+	return buf;
+
+}
+
+#endif
diff --git a/pfinet/linux-src/net/ipv4/devinet.c b/pfinet/linux-src/net/ipv4/devinet.c
new file mode 100644
index 00000000..a50ee3bd
--- /dev/null
+++ b/pfinet/linux-src/net/ipv4/devinet.c
@@ -0,0 +1,1034 @@
+/*
+ *	NET3	IP device support routines.
+ *
+ *	Version: $Id: devinet.c,v 1.28.2.2 1999/08/07 10:56:18 davem Exp $
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ *	Derived from the IP parts of dev.c 1.0.19
+ * 		Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
+ *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *				Mark Evans, <evansmp@uhura.aston.ac.uk>
+ *
+ *	Additional Authors:
+ *		Alan Cox, <gw4pts@gw4pts.ampr.org>
+ *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ *	Changes:
+ *	        Alexey Kuznetsov:	pa_* fields are replaced with ifaddr lists.
+ *		Cyrus Durgin:		updated for kmod
+ */
+
+#include <linux/config.h>
+ 
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/init.h>
+#include <linux/notifier.h>
+#include <linux/inetdevice.h>
+#include <linux/igmp.h>
+#ifdef CONFIG_SYSCTL
+#include <linux/sysctl.h>
+#endif
+#ifdef CONFIG_KMOD
+#include <linux/kmod.h>
+#endif
+
+#include <net/ip.h>
+#include <net/route.h>
+#include <net/ip_fib.h>
+
+struct ipv4_devconf ipv4_devconf = { 1, 1, 1, 1, 0, };
+static struct ipv4_devconf ipv4_devconf_dflt = { 1, 1, 1, 1, 1, };
+
+#ifdef CONFIG_RTNETLINK
+static void rtmsg_ifa(int event, struct in_ifaddr *);
+#else
+#define rtmsg_ifa(a,b)	do { } while(0)
+#endif
+
+static struct notifier_block *inetaddr_chain;
+static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, int destroy);
+#ifdef CONFIG_SYSCTL
+static void devinet_sysctl_register(struct in_device *in_dev, struct ipv4_devconf *p);
+static void devinet_sysctl_unregister(struct ipv4_devconf *p);
+#endif
+
+int inet_ifa_count;
+int inet_dev_count;
+
+static struct in_ifaddr * inet_alloc_ifa(void)
+{
+	struct in_ifaddr *ifa;
+
+	ifa = kmalloc(sizeof(*ifa), GFP_KERNEL);
+	if (ifa) {
+		memset(ifa, 0, sizeof(*ifa));
+		inet_ifa_count++;
+	}
+
+	return ifa;
+}
+
+static __inline__ void inet_free_ifa(struct in_ifaddr *ifa)
+{
+	kfree_s(ifa, sizeof(*ifa));
+	inet_ifa_count--;
+}
+
+struct in_device *inetdev_init(struct device *dev)
+{
+	struct in_device *in_dev;
+
+	if (dev->mtu < 68)
+		return NULL;
+
+	in_dev = kmalloc(sizeof(*in_dev), GFP_KERNEL);
+	if (!in_dev)
+		return NULL;
+	inet_dev_count++;
+	memset(in_dev, 0, sizeof(*in_dev));
+	memcpy(&in_dev->cnf, &ipv4_devconf_dflt, sizeof(in_dev->cnf));
+	in_dev->cnf.sysctl = NULL;
+	in_dev->dev = dev;
+	if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL) {
+		kfree(in_dev);
+		return NULL;
+	}
+#ifdef CONFIG_SYSCTL
+	neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4, NET_IPV4_NEIGH, "ipv4");
+#endif
+	dev->ip_ptr = in_dev;
+#ifdef CONFIG_SYSCTL
+	devinet_sysctl_register(in_dev, &in_dev->cnf);
+#endif
+	if (dev->flags&IFF_UP)
+		ip_mc_up(in_dev);
+	return in_dev;
+}
+
+static void inetdev_destroy(struct in_device *in_dev)
+{
+	struct in_ifaddr *ifa;
+
+	ip_mc_destroy_dev(in_dev);
+
+	while ((ifa = in_dev->ifa_list) != NULL) {
+		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
+		inet_free_ifa(ifa);
+	}
+
+#ifdef CONFIG_SYSCTL
+	devinet_sysctl_unregister(&in_dev->cnf);
+#endif
+	in_dev->dev->ip_ptr = NULL;
+	synchronize_bh();
+	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
+	kfree(in_dev);
+}
+
+struct in_ifaddr * inet_addr_onlink(struct in_device *in_dev, u32 a, u32 b)
+{
+	for_primary_ifa(in_dev) {
+		if (inet_ifa_match(a, ifa)) {
+			if (!b || inet_ifa_match(b, ifa))
+				return ifa;
+		}
+	} endfor_ifa(in_dev);
+	return NULL;
+}
+
+static void
+inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, int destroy)
+{
+	struct in_ifaddr *ifa1 = *ifap;
+
+	/* 1. Deleting primary ifaddr forces deletion all secondaries */
+
+	if (!(ifa1->ifa_flags&IFA_F_SECONDARY)) {
+		struct in_ifaddr *ifa;
+		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
+
+		while ((ifa=*ifap1) != NULL) {
+			if (!(ifa->ifa_flags&IFA_F_SECONDARY) ||
+			    ifa1->ifa_mask != ifa->ifa_mask ||
+			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
+				ifap1 = &ifa->ifa_next;
+				continue;
+			}
+			*ifap1 = ifa->ifa_next;
+			synchronize_bh();
+
+			rtmsg_ifa(RTM_DELADDR, ifa);
+			notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa);
+			inet_free_ifa(ifa);
+		}
+	}
+
+	/* 2. Unlink it */
+
+	*ifap = ifa1->ifa_next;
+	synchronize_bh();
+
+	/* 3. Announce address deletion */
+
+	/* Send message first, then call notifier.
+	   At first sight, FIB update triggered by notifier
+	   will refer to already deleted ifaddr, that could confuse
+	   netlink listeners. It is not true: look, gated sees
+	   that route deleted and if it still thinks that ifaddr
+	   is valid, it will try to restore deleted routes... Grr.
+	   So that, this order is correct.
+	 */
+	rtmsg_ifa(RTM_DELADDR, ifa1);
+	notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
+	if (destroy) {
+		inet_free_ifa(ifa1);
+		if (in_dev->ifa_list == NULL)
+			inetdev_destroy(in_dev);
+	}
+}
+
+static int
+inet_insert_ifa(struct in_device *in_dev, struct in_ifaddr *ifa)
+{
+	struct in_ifaddr *ifa1, **ifap, **last_primary;
+
+	if (ifa->ifa_local == 0) {
+		inet_free_ifa(ifa);
+		return 0;
+	}
+
+	ifa->ifa_flags &= ~IFA_F_SECONDARY;
+	last_primary = &in_dev->ifa_list;
+
+	for (ifap=&in_dev->ifa_list; (ifa1=*ifap)!=NULL; ifap=&ifa1->ifa_next) {
+		if (!(ifa1->ifa_flags&IFA_F_SECONDARY) && ifa->ifa_scope <= ifa1->ifa_scope)
+			last_primary = &ifa1->ifa_next;
+		if (ifa1->ifa_mask == ifa->ifa_mask && inet_ifa_match(ifa1->ifa_address, ifa)) {
+			if (ifa1->ifa_local == ifa->ifa_local) {
+				inet_free_ifa(ifa);
+				return -EEXIST;
+			}
+			if (ifa1->ifa_scope != ifa->ifa_scope) {
+				inet_free_ifa(ifa);
+				return -EINVAL;
+			}
+			ifa->ifa_flags |= IFA_F_SECONDARY;
+		}
+	}
+
+	if (!(ifa->ifa_flags&IFA_F_SECONDARY)) {
+		net_srandom(ifa->ifa_local);
+		ifap = last_primary;
+	}
+
+	ifa->ifa_next = *ifap;
+	wmb();
+	*ifap = ifa;
+
+	/* Send message first, then call notifier.
+	   Notifier will trigger FIB update, so that
+	   listeners of netlink will know about new ifaddr */
+	rtmsg_ifa(RTM_NEWADDR, ifa);
+	notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
+
+	return 0;
+}
+
+static int
+inet_set_ifa(struct device *dev, struct in_ifaddr *ifa)
+{
+	struct in_device *in_dev = dev->ip_ptr;
+
+	if (in_dev == NULL) {
+		in_dev = inetdev_init(dev);
+		if (in_dev == NULL) {
+			inet_free_ifa(ifa);
+			return -ENOBUFS;
+		}
+	}
+	ifa->ifa_dev = in_dev;
+	if (LOOPBACK(ifa->ifa_local))
+		ifa->ifa_scope = RT_SCOPE_HOST;
+	return inet_insert_ifa(in_dev, ifa);
+}
+
+struct in_device *inetdev_by_index(int ifindex)
+{
+	struct device *dev;
+	dev = dev_get_by_index(ifindex);
+	if (dev)
+		return dev->ip_ptr;
+	return NULL;
+}
+
+struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, u32 prefix, u32 mask)
+{
+	for_primary_ifa(in_dev) {
+		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
+			return ifa;
+	} endfor_ifa(in_dev);
+	return NULL;
+}
+
+#ifdef CONFIG_RTNETLINK
+
+/* rtm_{add|del} functions are not reenterable, so that
+   this structure can be made static
+ */
+
+int
+inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+	struct rtattr  **rta = arg;
+	struct in_device *in_dev;
+	struct ifaddrmsg *ifm = NLMSG_DATA(nlh);
+	struct in_ifaddr *ifa, **ifap;
+
+	if ((in_dev = inetdev_by_index(ifm->ifa_index)) == NULL)
+		return -EADDRNOTAVAIL;
+
+	for (ifap=&in_dev->ifa_list; (ifa=*ifap)!=NULL; ifap=&ifa->ifa_next) {
+		if ((rta[IFA_LOCAL-1] && memcmp(RTA_DATA(rta[IFA_LOCAL-1]), &ifa->ifa_local, 4)) ||
+		    (rta[IFA_LABEL-1] && strcmp(RTA_DATA(rta[IFA_LABEL-1]), ifa->ifa_label)) ||
+		    (rta[IFA_ADDRESS-1] &&
+		     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
+		      !inet_ifa_match(*(u32*)RTA_DATA(rta[IFA_ADDRESS-1]), ifa))))
+			continue;
+		inet_del_ifa(in_dev, ifap, 1);
+		return 0;
+	}
+
+	return -EADDRNOTAVAIL;
+}
+
+int
+inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+	struct rtattr **rta = arg;
+	struct device *dev;
+	struct in_device *in_dev;
+	struct ifaddrmsg *ifm = NLMSG_DATA(nlh);
+	struct in_ifaddr *ifa;
+
+	if (ifm->ifa_prefixlen > 32 || rta[IFA_LOCAL-1] == NULL)
+		return -EINVAL;
+
+	if ((dev = dev_get_by_index(ifm->ifa_index)) == NULL)
+		return -ENODEV;
+
+	if ((in_dev = dev->ip_ptr) == NULL) {
+		in_dev = inetdev_init(dev);
+		if (!in_dev)
+			return -ENOBUFS;
+	}
+
+	if ((ifa = inet_alloc_ifa()) == NULL)
+		return -ENOBUFS;
+
+	if (rta[IFA_ADDRESS-1] == NULL)
+		rta[IFA_ADDRESS-1] = rta[IFA_LOCAL-1];
+	memcpy(&ifa->ifa_local, RTA_DATA(rta[IFA_LOCAL-1]), 4);
+	memcpy(&ifa->ifa_address, RTA_DATA(rta[IFA_ADDRESS-1]), 4);
+	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
+	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
+	if (rta[IFA_BROADCAST-1])
+		memcpy(&ifa->ifa_broadcast, RTA_DATA(rta[IFA_BROADCAST-1]), 4);
+	if (rta[IFA_ANYCAST-1])
+		memcpy(&ifa->ifa_anycast, RTA_DATA(rta[IFA_ANYCAST-1]), 4);
+	ifa->ifa_flags = ifm->ifa_flags;
+	ifa->ifa_scope = ifm->ifa_scope;
+	ifa->ifa_dev = in_dev;
+	if (rta[IFA_LABEL-1])
+		memcpy(ifa->ifa_label, RTA_DATA(rta[IFA_LABEL-1]), IFNAMSIZ);
+	else
+		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
+
+	return inet_insert_ifa(in_dev, ifa);
+}
+
+#endif
+
+/* 
+ *	Determine a default network mask, based on the IP address. 
+ */
+
+static __inline__ int inet_abc_len(u32 addr)
+{
+  	if (ZERONET(addr))
+  		return 0;
+
+  	addr = ntohl(addr);
+  	if (IN_CLASSA(addr)) 
+  		return 8;
+  	if (IN_CLASSB(addr)) 
+  		return 16;
+  	if (IN_CLASSC(addr)) 
+  		return 24;
+
+	/*
+	 *	Something else, probably a multicast. 
+	 */
+  	 
+  	return -1;
+}
+
+
+int devinet_ioctl(unsigned int cmd, void *arg)
+{
+	struct ifreq ifr;
+	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
+	struct in_device *in_dev;
+	struct in_ifaddr **ifap = NULL;
+	struct in_ifaddr *ifa = NULL;
+	struct device *dev;
+#ifdef CONFIG_IP_ALIAS
+	char *colon;
+#endif
+	int exclusive = 0;
+	int ret = 0;
+
+	/*
+	 *	Fetch the caller's info block into kernel space
+	 */
+
+	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
+		return -EFAULT;
+	ifr.ifr_name[IFNAMSIZ-1] = 0;
+
+#ifdef CONFIG_IP_ALIAS
+	colon = strchr(ifr.ifr_name, ':');
+	if (colon)
+		*colon = 0;
+#endif
+
+#ifdef CONFIG_KMOD
+	dev_load(ifr.ifr_name);
+#endif
+
+	switch(cmd) {
+	case SIOCGIFADDR:	/* Get interface address */
+	case SIOCGIFBRDADDR:	/* Get the broadcast address */
+	case SIOCGIFDSTADDR:	/* Get the destination address */
+	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
+		/* Note that this ioctls will not sleep,
+		   so that we do not impose a lock.
+		   One day we will be forced to put shlock here (I mean SMP)
+		 */
+		memset(sin, 0, sizeof(*sin));
+		sin->sin_family = AF_INET;
+		break;
+
+	case SIOCSIFFLAGS:
+		if (!capable(CAP_NET_ADMIN))
+			return -EACCES;
+		rtnl_lock();
+		exclusive = 1;
+		break;
+	case SIOCSIFADDR:	/* Set interface address (and family) */
+	case SIOCSIFBRDADDR:	/* Set the broadcast address */
+	case SIOCSIFDSTADDR:	/* Set the destination address */
+	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
+		if (!capable(CAP_NET_ADMIN))
+			return -EACCES;
+		if (sin->sin_family != AF_INET)
+			return -EINVAL;
+		rtnl_lock();
+		exclusive = 1;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+
+	if ((dev = dev_get(ifr.ifr_name)) == NULL) {
+		ret = -ENODEV;
+		goto done;
+	}
+
+#ifdef CONFIG_IP_ALIAS
+	if (colon)
+		*colon = ':';
+#endif
+
+	if ((in_dev=dev->ip_ptr) != NULL) {
+		for (ifap=&in_dev->ifa_list; (ifa=*ifap) != NULL; ifap=&ifa->ifa_next)
+			if (strcmp(ifr.ifr_name, ifa->ifa_label) == 0)
+				break;
+	}
+
+	if (ifa == NULL && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS) {
+		ret = -EADDRNOTAVAIL;
+		goto done;
+	}
+
+	switch(cmd) {
+		case SIOCGIFADDR:	/* Get interface address */
+			sin->sin_addr.s_addr = ifa->ifa_local;
+			goto rarok;
+
+		case SIOCGIFBRDADDR:	/* Get the broadcast address */
+			sin->sin_addr.s_addr = ifa->ifa_broadcast;
+			goto rarok;
+
+		case SIOCGIFDSTADDR:	/* Get the destination address */
+			sin->sin_addr.s_addr = ifa->ifa_address;
+			goto rarok;
+
+		case SIOCGIFNETMASK:	/* Get the netmask for the interface */
+			sin->sin_addr.s_addr = ifa->ifa_mask;
+			goto rarok;
+
+		case SIOCSIFFLAGS:
+#ifdef CONFIG_IP_ALIAS
+			if (colon) {
+				if (ifa == NULL) {
+					ret = -EADDRNOTAVAIL;
+					break;
+				}
+				if (!(ifr.ifr_flags&IFF_UP))
+					inet_del_ifa(in_dev, ifap, 1);
+				break;
+			}
+#endif
+			ret = dev_change_flags(dev, ifr.ifr_flags);
+			break;
+	
+		case SIOCSIFADDR:	/* Set interface address (and family) */
+			if (inet_abc_len(sin->sin_addr.s_addr) < 0) {
+				ret = -EINVAL;
+				break;
+			}
+
+			if (!ifa) {
+				if ((ifa = inet_alloc_ifa()) == NULL) {
+					ret = -ENOBUFS;
+					break;
+				}
+#ifdef CONFIG_IP_ALIAS
+				if (colon)
+					memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
+				else
+#endif
+				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
+			} else {
+				ret = 0;
+				if (ifa->ifa_local == sin->sin_addr.s_addr)
+					break;
+				inet_del_ifa(in_dev, ifap, 0);
+				ifa->ifa_broadcast = 0;
+				ifa->ifa_anycast = 0;
+			}
+
+			ifa->ifa_address =
+			ifa->ifa_local = sin->sin_addr.s_addr;
+
+			if (!(dev->flags&IFF_POINTOPOINT)) {
+				ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
+				ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
+				if ((dev->flags&IFF_BROADCAST) && ifa->ifa_prefixlen < 31)
+					ifa->ifa_broadcast = ifa->ifa_address|~ifa->ifa_mask;
+			} else {
+				ifa->ifa_prefixlen = 32;
+				ifa->ifa_mask = inet_make_mask(32);
+			}
+			ret = inet_set_ifa(dev, ifa);
+			break;
+
+		case SIOCSIFBRDADDR:	/* Set the broadcast address */
+			if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
+				inet_del_ifa(in_dev, ifap, 0);
+				ifa->ifa_broadcast = sin->sin_addr.s_addr;
+				inet_insert_ifa(in_dev, ifa);
+			}
+			break;
+	
+		case SIOCSIFDSTADDR:	/* Set the destination address */
+			if (ifa->ifa_address != sin->sin_addr.s_addr) {
+				if (inet_abc_len(sin->sin_addr.s_addr) < 0) {
+					ret = -EINVAL;
+					break;
+				}
+				inet_del_ifa(in_dev, ifap, 0);
+				ifa->ifa_address = sin->sin_addr.s_addr;
+				inet_insert_ifa(in_dev, ifa);
+			}
+			break;
+
+		case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
+
+			/*
+			 *	The mask we set must be legal.
+			 */
+			if (bad_mask(sin->sin_addr.s_addr, 0)) {
+				ret = -EINVAL;
+				break;
+			}
+
+			if (ifa->ifa_mask != sin->sin_addr.s_addr) {
+				inet_del_ifa(in_dev, ifap, 0);
+				ifa->ifa_mask = sin->sin_addr.s_addr;
+				ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
+				inet_set_ifa(dev, ifa);
+			}
+			break;
+	}
+done:
+	if (exclusive)
+		rtnl_unlock();
+	return ret;
+
+rarok:
+	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
+		return -EFAULT;
+	return 0;
+}
+
+static int
+inet_gifconf(struct device *dev, char *buf, int len)
+{
+	struct in_device *in_dev = dev->ip_ptr;
+	struct in_ifaddr *ifa;
+	struct ifreq ifr;
+	int done=0;
+
+	if (in_dev==NULL || (ifa=in_dev->ifa_list)==NULL)
+		return 0;
+
+	for ( ; ifa; ifa = ifa->ifa_next) {
+		if (!buf) {
+			done += sizeof(ifr);
+			continue;
+		}
+		if (len < (int) sizeof(ifr))
+			return done;
+		memset(&ifr, 0, sizeof(struct ifreq));
+		if (ifa->ifa_label)
+			strcpy(ifr.ifr_name, ifa->ifa_label);
+		else
+			strcpy(ifr.ifr_name, dev->name);
+
+		(*(struct sockaddr_in *) &ifr.ifr_addr).sin_family = AF_INET;
+		(*(struct sockaddr_in *) &ifr.ifr_addr).sin_addr.s_addr = ifa->ifa_local;
+
+		if (copy_to_user(buf, &ifr, sizeof(struct ifreq)))
+			return -EFAULT;
+		buf += sizeof(struct ifreq);
+		len -= sizeof(struct ifreq);
+		done += sizeof(struct ifreq);
+	}
+	return done;
+}
+
+u32 inet_select_addr(struct device *dev, u32 dst, int scope)
+{
+	u32 addr = 0;
+	struct in_device *in_dev = dev->ip_ptr;
+
+	if (in_dev == NULL)
+		return 0;
+
+	for_primary_ifa(in_dev) {
+		if (ifa->ifa_scope > scope)
+			continue;
+		if (!dst || inet_ifa_match(dst, ifa))
+			return ifa->ifa_local;
+		if (!addr)
+			addr = ifa->ifa_local;
+	} endfor_ifa(in_dev);
+	
+	if (addr || scope >= RT_SCOPE_LINK)
+		return addr;
+
+	/* Not loopback addresses on loopback should be preferred
+	   in this case. It is importnat that lo is the first interface
+	   in dev_base list.
+	 */
+	for (dev=dev_base; dev; dev=dev->next) {
+		if ((in_dev=dev->ip_ptr) == NULL)
+			continue;
+
+		for_primary_ifa(in_dev) {
+			if (ifa->ifa_scope <= scope)
+				return ifa->ifa_local;
+		} endfor_ifa(in_dev);
+	}
+
+	return 0;
+}
+
+/*
+ *	Device notifier
+ */
+
+int register_inetaddr_notifier(struct notifier_block *nb)
+{
+	return notifier_chain_register(&inetaddr_chain, nb);
+}
+
+int unregister_inetaddr_notifier(struct notifier_block *nb)
+{
+	return notifier_chain_unregister(&inetaddr_chain,nb);
+}
+ 
+static int inetdev_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+	struct device *dev = ptr;
+	struct in_device *in_dev = dev->ip_ptr;
+
+	if (in_dev == NULL)
+		return NOTIFY_DONE;
+
+	switch (event) {
+	case NETDEV_REGISTER:
+		if (in_dev)
+			printk(KERN_DEBUG "inetdev_event: bug\n");
+		dev->ip_ptr = NULL;
+		break;
+	case NETDEV_UP:
+		if (dev == &loopback_dev) {
+			struct in_ifaddr *ifa;
+			if ((ifa = inet_alloc_ifa()) != NULL) {
+				ifa->ifa_local =
+				ifa->ifa_address = htonl(INADDR_LOOPBACK);
+				ifa->ifa_prefixlen = 8;
+				ifa->ifa_mask = inet_make_mask(8);
+				ifa->ifa_dev = in_dev;
+				ifa->ifa_scope = RT_SCOPE_HOST;
+				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
+				inet_insert_ifa(in_dev, ifa);
+			}
+		}
+		ip_mc_up(in_dev);
+		break;
+	case NETDEV_DOWN:
+		ip_mc_down(in_dev);
+		break;
+	case NETDEV_CHANGEMTU:	
+		if (dev->mtu >= 68)
+			break;
+		/* MTU falled under minimal IP mtu. Disable IP. */
+	case NETDEV_UNREGISTER:
+		inetdev_destroy(in_dev);
+		break;
+	case NETDEV_CHANGENAME:
+		if (in_dev->ifa_list) {
+			struct in_ifaddr *ifa;
+			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
+				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
+			/* Do not notify about label change, this event is
+			   not interesting to applications using netlink.
+			 */
+		}
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+struct notifier_block ip_netdev_notifier={
+	inetdev_event,
+	NULL,
+	0
+};
+
+#ifdef CONFIG_RTNETLINK
+
+static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
+			    u32 pid, u32 seq, int event)
+{
+	struct ifaddrmsg *ifm;
+	struct nlmsghdr  *nlh;
+	unsigned char	 *b = skb->tail;
+
+	nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ifm));
+	ifm = NLMSG_DATA(nlh);
+	ifm->ifa_family = AF_INET;
+	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
+	ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
+	ifm->ifa_scope = ifa->ifa_scope;
+	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
+	if (ifa->ifa_address)
+		RTA_PUT(skb, IFA_ADDRESS, 4, &ifa->ifa_address);
+	if (ifa->ifa_local)
+		RTA_PUT(skb, IFA_LOCAL, 4, &ifa->ifa_local);
+	if (ifa->ifa_broadcast)
+		RTA_PUT(skb, IFA_BROADCAST, 4, &ifa->ifa_broadcast);
+	if (ifa->ifa_anycast)
+		RTA_PUT(skb, IFA_ANYCAST, 4, &ifa->ifa_anycast);
+	if (ifa->ifa_label[0])
+		RTA_PUT(skb, IFA_LABEL, IFNAMSIZ, &ifa->ifa_label);
+	nlh->nlmsg_len = skb->tail - b;
+	return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	int idx, ip_idx;
+	int s_idx, s_ip_idx;
+	struct device *dev;
+	struct in_device *in_dev;
+	struct in_ifaddr *ifa;
+
+	s_idx = cb->args[0];
+	s_ip_idx = ip_idx = cb->args[1];
+	for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
+		if (idx < s_idx)
+			continue;
+		if (idx > s_idx)
+			s_ip_idx = 0;
+		if ((in_dev = dev->ip_ptr) == NULL)
+			continue;
+		for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
+		     ifa = ifa->ifa_next, ip_idx++) {
+			if (ip_idx < s_ip_idx)
+				continue;
+			if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
+					     cb->nlh->nlmsg_seq, RTM_NEWADDR) <= 0)
+				goto done;
+		}
+	}
+done:
+	cb->args[0] = idx;
+	cb->args[1] = ip_idx;
+
+	return skb->len;
+}
+
+static void rtmsg_ifa(int event, struct in_ifaddr * ifa)
+{
+	struct sk_buff *skb;
+	int size = NLMSG_SPACE(sizeof(struct ifaddrmsg)+128);
+
+	skb = alloc_skb(size, GFP_KERNEL);
+	if (!skb) {
+		netlink_set_err(rtnl, 0, RTMGRP_IPV4_IFADDR, ENOBUFS);
+		return;
+	}
+	if (inet_fill_ifaddr(skb, ifa, 0, 0, event) < 0) {
+		kfree_skb(skb);
+		netlink_set_err(rtnl, 0, RTMGRP_IPV4_IFADDR, EINVAL);
+		return;
+	}
+	NETLINK_CB(skb).dst_groups = RTMGRP_IPV4_IFADDR;
+	netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV4_IFADDR, GFP_KERNEL);
+}
+
+
+static struct rtnetlink_link inet_rtnetlink_table[RTM_MAX-RTM_BASE+1] =
+{
+	{ NULL,			NULL,			},
+	{ NULL,			NULL,			},
+	{ NULL,			NULL,			},
+	{ NULL,			NULL,			},
+
+	{ inet_rtm_newaddr,	NULL,			},
+	{ inet_rtm_deladdr,	NULL,			},
+	{ NULL,			inet_dump_ifaddr,	},
+	{ NULL,			NULL,			},
+
+	{ inet_rtm_newroute,	NULL,			},
+	{ inet_rtm_delroute,	NULL,			},
+	{ inet_rtm_getroute,	inet_dump_fib,		},
+	{ NULL,			NULL,			},
+
+	{ NULL,			NULL,			},
+	{ NULL,			NULL,			},
+	{ NULL,			NULL,			},
+	{ NULL,			NULL,			},
+
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+	{ inet_rtm_newrule,	NULL,			},
+	{ inet_rtm_delrule,	NULL,			},
+	{ NULL,			inet_dump_rules,	},
+	{ NULL,			NULL,			},
+#else
+	{ NULL,			NULL,			},
+	{ NULL,			NULL,			},
+	{ NULL,			NULL,			},
+	{ NULL,			NULL,			},
+#endif
+};
+
+#endif /* CONFIG_RTNETLINK */
+
+
+#ifdef CONFIG_SYSCTL
+
+void inet_forward_change()
+{
+	struct device *dev;
+	int on = ipv4_devconf.forwarding;
+
+	ipv4_devconf.accept_redirects = !on;
+	ipv4_devconf_dflt.forwarding = on;
+
+	for (dev = dev_base; dev; dev = dev->next) {
+		struct in_device *in_dev = dev->ip_ptr;
+		if (in_dev)
+			in_dev->cnf.forwarding = on;
+	}
+
+	rt_cache_flush(0);
+
+	ip_statistics.IpForwarding = on ? 1 : 2;
+}
+
+static
+int devinet_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
+			   void *buffer, size_t *lenp)
+{
+	int *valp = ctl->data;
+	int val = *valp;
+	int ret;
+
+	ret = proc_dointvec(ctl, write, filp, buffer, lenp);
+
+	if (write && *valp != val) {
+		if (valp == &ipv4_devconf.forwarding)
+			inet_forward_change();
+		else if (valp != &ipv4_devconf_dflt.forwarding)
+			rt_cache_flush(0);
+	}
+
+        return ret;
+}
+
+static struct devinet_sysctl_table
+{
+	struct ctl_table_header *sysctl_header;
+	ctl_table devinet_vars[12];
+	ctl_table devinet_dev[2];
+	ctl_table devinet_conf_dir[2];
+	ctl_table devinet_proto_dir[2];
+	ctl_table devinet_root_dir[2];
+} devinet_sysctl = {
+	NULL,
+	{{NET_IPV4_CONF_FORWARDING, "forwarding",
+         &ipv4_devconf.forwarding, sizeof(int), 0644, NULL,
+         &devinet_sysctl_forward},
+	{NET_IPV4_CONF_MC_FORWARDING, "mc_forwarding",
+         &ipv4_devconf.mc_forwarding, sizeof(int), 0444, NULL,
+         &proc_dointvec},
+	{NET_IPV4_CONF_ACCEPT_REDIRECTS, "accept_redirects",
+         &ipv4_devconf.accept_redirects, sizeof(int), 0644, NULL,
+         &proc_dointvec},
+	{NET_IPV4_CONF_SECURE_REDIRECTS, "secure_redirects",
+         &ipv4_devconf.secure_redirects, sizeof(int), 0644, NULL,
+         &proc_dointvec},
+	{NET_IPV4_CONF_SHARED_MEDIA, "shared_media",
+         &ipv4_devconf.shared_media, sizeof(int), 0644, NULL,
+         &proc_dointvec},
+	{NET_IPV4_CONF_RP_FILTER, "rp_filter",
+         &ipv4_devconf.rp_filter, sizeof(int), 0644, NULL,
+         &proc_dointvec},
+	{NET_IPV4_CONF_SEND_REDIRECTS, "send_redirects",
+         &ipv4_devconf.send_redirects, sizeof(int), 0644, NULL,
+         &proc_dointvec},
+	{NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE, "accept_source_route",
+         &ipv4_devconf.accept_source_route, sizeof(int), 0644, NULL,
+         &proc_dointvec},
+	{NET_IPV4_CONF_PROXY_ARP, "proxy_arp",
+         &ipv4_devconf.proxy_arp, sizeof(int), 0644, NULL,
+         &proc_dointvec},
+	{NET_IPV4_CONF_BOOTP_RELAY, "bootp_relay",
+         &ipv4_devconf.bootp_relay, sizeof(int), 0644, NULL,
+         &proc_dointvec},
+        {NET_IPV4_CONF_LOG_MARTIANS, "log_martians",
+         &ipv4_devconf.log_martians, sizeof(int), 0644, NULL,
+         &proc_dointvec},
+	 {0}},
+
+	{{NET_PROTO_CONF_ALL, "all", NULL, 0, 0555, devinet_sysctl.devinet_vars},{0}},
+	{{NET_IPV4_CONF, "conf", NULL, 0, 0555, devinet_sysctl.devinet_dev},{0}},
+	{{NET_IPV4, "ipv4", NULL, 0, 0555, devinet_sysctl.devinet_conf_dir},{0}},
+	{{CTL_NET, "net", NULL, 0, 0555, devinet_sysctl.devinet_proto_dir},{0}}
+};
+
+static void devinet_sysctl_register(struct in_device *in_dev, struct ipv4_devconf *p)
+{
+	int i;
+	struct device *dev = in_dev ? in_dev->dev : NULL;
+	struct devinet_sysctl_table *t;
+
+	t = kmalloc(sizeof(*t), GFP_KERNEL);
+	if (t == NULL)
+		return;
+	memcpy(t, &devinet_sysctl, sizeof(*t));
+	for (i=0; i<sizeof(t->devinet_vars)/sizeof(t->devinet_vars[0])-1; i++) {
+		t->devinet_vars[i].data += (char*)p - (char*)&ipv4_devconf;
+		t->devinet_vars[i].de = NULL;
+	}
+	if (dev) {
+		t->devinet_dev[0].procname = dev->name;
+		t->devinet_dev[0].ctl_name = dev->ifindex;
+	} else {
+		t->devinet_dev[0].procname = "default";
+		t->devinet_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT;
+	}
+	t->devinet_dev[0].child = t->devinet_vars;
+	t->devinet_dev[0].de = NULL;
+	t->devinet_conf_dir[0].child = t->devinet_dev;
+	t->devinet_conf_dir[0].de = NULL;
+	t->devinet_proto_dir[0].child = t->devinet_conf_dir;
+	t->devinet_proto_dir[0].de = NULL;
+	t->devinet_root_dir[0].child = t->devinet_proto_dir;
+	t->devinet_root_dir[0].de = NULL;
+
+	t->sysctl_header = register_sysctl_table(t->devinet_root_dir, 0);
+	if (t->sysctl_header == NULL)
+		kfree(t);
+	else
+		p->sysctl = t;
+}
+
+static void devinet_sysctl_unregister(struct ipv4_devconf *p)
+{
+	if (p->sysctl) {
+		struct devinet_sysctl_table *t = p->sysctl;
+		p->sysctl = NULL;
+		unregister_sysctl_table(t->sysctl_header);
+		kfree(t);
+	}
+}
+#endif
+
+__initfunc(void devinet_init(void))
+{
+	register_gifconf(PF_INET, inet_gifconf);
+	register_netdevice_notifier(&ip_netdev_notifier);
+#ifdef CONFIG_RTNETLINK
+	rtnetlink_links[PF_INET] = inet_rtnetlink_table;
+#endif
+#ifdef CONFIG_SYSCTL
+	devinet_sysctl.sysctl_header =
+		register_sysctl_table(devinet_sysctl.devinet_root_dir, 0);
+	devinet_sysctl_register(NULL, &ipv4_devconf_dflt);
+#endif
+}
diff --git a/pfinet/linux-src/net/ipv4/fib_frontend.c b/pfinet/linux-src/net/ipv4/fib_frontend.c
new file mode 100644
index 00000000..a1747048
--- /dev/null
+++ b/pfinet/linux-src/net/ipv4/fib_frontend.c
@@ -0,0 +1,628 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		IPv4 Forwarding Information Base: FIB frontend.
+ *
+ * Version:	$Id: fib_frontend.c,v 1.15 1999/03/21 05:22:31 davem Exp $
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/errno.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/proc_fs.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/init.h>
+
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/route.h>
+#include <net/tcp.h>
+#include <net/sock.h>
+#include <net/icmp.h>
+#include <net/arp.h>
+#include <net/ip_fib.h>
+
+#define FFprint(a...) printk(KERN_DEBUG a)
+
+#ifndef CONFIG_IP_MULTIPLE_TABLES
+
+#define RT_TABLE_MIN RT_TABLE_MAIN
+
+struct fib_table *local_table;
+struct fib_table *main_table;
+
+#else
+
+#define RT_TABLE_MIN 1
+
+struct fib_table *fib_tables[RT_TABLE_MAX+1];
+
+struct fib_table *__fib_new_table(int id)
+{
+	struct fib_table *tb;
+
+	tb = fib_hash_init(id);
+	if (!tb)
+		return NULL;
+	fib_tables[id] = tb;
+	return tb;
+}
+
+
+#endif /* CONFIG_IP_MULTIPLE_TABLES */
+
+
+void fib_flush(void)
+{
+	int flushed = 0;
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+	struct fib_table *tb;
+	int id;
+
+	for (id = RT_TABLE_MAX; id>0; id--) {
+		if ((tb = fib_get_table(id))==NULL)
+			continue;
+		flushed += tb->tb_flush(tb);
+	}
+#else /* CONFIG_IP_MULTIPLE_TABLES */
+	flushed += main_table->tb_flush(main_table);
+	flushed += local_table->tb_flush(local_table);
+#endif /* CONFIG_IP_MULTIPLE_TABLES */
+
+	if (flushed)
+		rt_cache_flush(-1);
+}
+
+
+#ifdef CONFIG_PROC_FS
+
+/* 
+ *	Called from the PROCfs module. This outputs /proc/net/route.
+ *
+ *	It always works in backward compatibility mode.
+ *	The format of the file is not supposed to be changed.
+ */
+ 
+static int
+fib_get_procinfo(char *buffer, char **start, off_t offset, int length, int dummy)
+{
+	int first = offset/128;
+	char *ptr = buffer;
+	int count = (length+127)/128;
+	int len;
+
+	*start = buffer + offset%128;
+	
+	if (--first < 0) {
+		sprintf(buffer, "%-127s\n", "Iface\tDestination\tGateway \tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT");
+		--count;
+		ptr += 128;
+		first = 0;
+  	}
+
+	/* rtnl_shlock(); -- it is pointless at the moment --ANK */
+	if (main_table && count > 0) {
+		int n = main_table->tb_get_info(main_table, ptr, first, count);
+		count -= n;
+		ptr += n*128;
+	}
+	/* rtnl_shunlock(); */
+	len = ptr - *start;
+	if (len >= length)
+		return length;
+	if (len >= 0)
+		return len;
+	return 0;
+}
+
+#endif /* CONFIG_PROC_FS */
+
+/*
+ *	Find the first device with a given source address.
+ */
+
+struct device * ip_dev_find(u32 addr)
+{
+	struct rt_key key;
+	struct fib_result res;
+
+	memset(&key, 0, sizeof(key));
+	key.dst = addr;
+
+	if (!local_table || local_table->tb_lookup(local_table, &key, &res)
+	    || res.type != RTN_LOCAL)
+		return NULL;
+
+	return FIB_RES_DEV(res);
+}
+
+unsigned inet_addr_type(u32 addr)
+{
+	struct rt_key		key;
+	struct fib_result	res;
+
+	if (ZERONET(addr) || BADCLASS(addr))
+		return RTN_BROADCAST;
+	if (MULTICAST(addr))
+		return RTN_MULTICAST;
+
+	memset(&key, 0, sizeof(key));
+	key.dst = addr;
+
+	if (local_table) {
+		if (local_table->tb_lookup(local_table, &key, &res) == 0)
+			return res.type;
+		return RTN_UNICAST;
+	}
+	return RTN_BROADCAST;
+}
+
+/* Given (packet source, input interface) and optional (dst, oif, tos):
+   - (main) check, that source is valid i.e. not broadcast or our local
+     address.
+   - figure out what "logical" interface this packet arrived
+     and calculate "specific destination" address.
+   - check, that packet arrived from expected physical interface.
+ */
+
+int fib_validate_source(u32 src, u32 dst, u8 tos, int oif,
+			struct device *dev, u32 *spec_dst, u32 *itag)
+{
+	struct in_device *in_dev = dev->ip_ptr;
+	struct rt_key key;
+	struct fib_result res;
+
+	key.dst = src;
+	key.src = dst;
+	key.tos = tos;
+	key.oif = 0;
+	key.iif = oif;
+	key.scope = RT_SCOPE_UNIVERSE;
+
+	if (in_dev == NULL)
+		return -EINVAL;
+	if (fib_lookup(&key, &res))
+		goto last_resort;
+	if (res.type != RTN_UNICAST)
+		return -EINVAL;
+	*spec_dst = FIB_RES_PREFSRC(res);
+	if (itag)
+		fib_combine_itag(itag, &res);
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+	if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
+#else
+	if (FIB_RES_DEV(res) == dev)
+#endif
+		return FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
+
+	if (in_dev->ifa_list == NULL)
+		goto last_resort;
+	if (IN_DEV_RPFILTER(in_dev))
+		return -EINVAL;
+	key.oif = dev->ifindex;
+	if (fib_lookup(&key, &res) == 0 && res.type == RTN_UNICAST) {
+		*spec_dst = FIB_RES_PREFSRC(res);
+		return FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
+	}
+	return 0;
+
+last_resort:
+	if (IN_DEV_RPFILTER(in_dev))
+		return -EINVAL;
+	*spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
+	*itag = 0;
+	return 0;
+}
+
+#ifndef CONFIG_IP_NOSIOCRT
+
+/*
+ *	Handle IP routing ioctl calls. These are used to manipulate the routing tables
+ */
+ 
+int ip_rt_ioctl(unsigned int cmd, void *arg)
+{
+	int err;
+	struct kern_rta rta;
+	struct rtentry  r;
+	struct {
+		struct nlmsghdr nlh;
+		struct rtmsg	rtm;
+	} req;
+
+	switch (cmd) {
+	case SIOCADDRT:		/* Add a route */
+	case SIOCDELRT:		/* Delete a route */
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
+		if (copy_from_user(&r, arg, sizeof(struct rtentry)))
+			return -EFAULT;
+		rtnl_lock();
+		err = fib_convert_rtentry(cmd, &req.nlh, &req.rtm, &rta, &r);
+		if (err == 0) {
+			if (cmd == SIOCDELRT) {
+				struct fib_table *tb = fib_get_table(req.rtm.rtm_table);
+				err = -ESRCH;
+				if (tb)
+					err = tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL);
+			} else {
+				struct fib_table *tb = fib_new_table(req.rtm.rtm_table);
+				err = -ENOBUFS;
+				if (tb)
+					err = tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL);
+			}
+			if (rta.rta_mx)
+				kfree(rta.rta_mx);
+		}
+		rtnl_unlock();
+		return err;
+	}
+	return -EINVAL;
+}
+
+#else
+
+int ip_rt_ioctl(unsigned int cmd, void *arg)
+{
+	return -EINVAL;
+}
+
+#endif
+
+#ifdef CONFIG_RTNETLINK
+
+static int inet_check_attr(struct rtmsg *r, struct rtattr **rta)
+{
+	int i;
+
+	for (i=1; i<=RTA_MAX; i++) {
+		struct rtattr *attr = rta[i-1];
+		if (attr) {
+			if (RTA_PAYLOAD(attr) < 4)
+				return -EINVAL;
+			if (i != RTA_MULTIPATH && i != RTA_METRICS)
+				rta[i-1] = (struct rtattr*)RTA_DATA(attr);
+		}
+	}
+	return 0;
+}
+
+int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+{
+	struct fib_table * tb;
+	struct rtattr **rta = arg;
+	struct rtmsg *r = NLMSG_DATA(nlh);
+
+	if (inet_check_attr(r, rta))
+		return -EINVAL;
+
+	tb = fib_get_table(r->rtm_table);
+	if (tb)
+		return tb->tb_delete(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb));
+	return -ESRCH;
+}
+
+int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+{
+	struct fib_table * tb;
+	struct rtattr **rta = arg;
+	struct rtmsg *r = NLMSG_DATA(nlh);
+
+	if (inet_check_attr(r, rta))
+		return -EINVAL;
+
+	tb = fib_new_table(r->rtm_table);
+	if (tb)
+		return tb->tb_insert(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb));
+	return -ENOBUFS;
+}
+
+int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	int t;
+	int s_t;
+	struct fib_table *tb;
+
+	if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) &&
+	    ((struct rtmsg*)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED)
+		return ip_rt_dump(skb, cb);
+
+	s_t = cb->args[0];
+	if (s_t == 0)
+		s_t = cb->args[0] = RT_TABLE_MIN;
+
+	for (t=s_t; t<=RT_TABLE_MAX; t++) {
+		if (t < s_t) continue;
+		if (t > s_t)
+			memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
+		if ((tb = fib_get_table(t))==NULL)
+			continue;
+		if (tb->tb_dump(tb, skb, cb) < 0) 
+			break;
+	}
+
+	cb->args[0] = t;
+
+	return skb->len;
+}
+
+#endif
+
+/* Prepare and feed intra-kernel routing request.
+   Really, it should be netlink message, but :-( netlink
+   can be not configured, so that we feed it directly
+   to fib engine. It is legal, because all events occur
+   only when netlink is already locked.
+ */
+
+static void fib_magic(int cmd, int type, u32 dst, int dst_len, struct in_ifaddr *ifa)
+{
+	struct fib_table * tb;
+	struct {
+		struct nlmsghdr	nlh;
+		struct rtmsg	rtm;
+	} req;
+	struct kern_rta rta;
+
+	memset(&req.rtm, 0, sizeof(req.rtm));
+	memset(&rta, 0, sizeof(rta));
+
+	if (type == RTN_UNICAST)
+		tb = fib_new_table(RT_TABLE_MAIN);
+	else
+		tb = fib_new_table(RT_TABLE_LOCAL);
+
+	if (tb == NULL)
+		return;
+
+	req.nlh.nlmsg_len = sizeof(req);
+	req.nlh.nlmsg_type = cmd;
+	req.nlh.nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE|NLM_F_APPEND;
+	req.nlh.nlmsg_pid = 0;
+	req.nlh.nlmsg_seq = 0;
+
+	req.rtm.rtm_dst_len = dst_len;
+	req.rtm.rtm_table = tb->tb_id;
+	req.rtm.rtm_protocol = RTPROT_KERNEL;
+	req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST);
+	req.rtm.rtm_type = type;
+
+	rta.rta_dst = &dst;
+	rta.rta_prefsrc = &ifa->ifa_local;
+	rta.rta_oif = &ifa->ifa_dev->dev->ifindex;
+
+	if (cmd == RTM_NEWROUTE)
+		tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL);
+	else
+		tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL);
+}
+
+static void fib_add_ifaddr(struct in_ifaddr *ifa)
+{
+	struct in_device *in_dev = ifa->ifa_dev;
+	struct device *dev = in_dev->dev;
+	struct in_ifaddr *prim = ifa;
+	u32 mask = ifa->ifa_mask;
+	u32 addr = ifa->ifa_local;
+	u32 prefix = ifa->ifa_address&mask;
+
+	if (ifa->ifa_flags&IFA_F_SECONDARY) {
+		prim = inet_ifa_byprefix(in_dev, prefix, mask);
+		if (prim == NULL) {
+			printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
+			return;
+		}
+	}
+
+	fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
+
+	if (!(dev->flags&IFF_UP))
+		return;
+
+	/* Add broadcast address, if it is explicitly assigned. */
+	if (ifa->ifa_broadcast && ifa->ifa_broadcast != 0xFFFFFFFF)
+		fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
+
+	if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
+	    (prefix != addr || ifa->ifa_prefixlen < 32)) {
+		fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
+			  RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
+
+		/* Add network specific broadcasts, when it takes a sense */
+		if (ifa->ifa_prefixlen < 31) {
+			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
+			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
+		}
+	}
+}
+
+static void fib_del_ifaddr(struct in_ifaddr *ifa)
+{
+	struct in_device *in_dev = ifa->ifa_dev;
+	struct device *dev = in_dev->dev;
+	struct in_ifaddr *ifa1;
+	struct in_ifaddr *prim = ifa;
+	u32 brd = ifa->ifa_address|~ifa->ifa_mask;
+	u32 any = ifa->ifa_address&ifa->ifa_mask;
+#define LOCAL_OK	1
+#define BRD_OK		2
+#define BRD0_OK		4
+#define BRD1_OK		8
+	unsigned ok = 0;
+
+	if (!(ifa->ifa_flags&IFA_F_SECONDARY))
+		fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
+			  RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
+	else {
+		prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
+		if (prim == NULL) {
+			printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
+			return;
+		}
+	}
+
+	/* Deletion is more complicated than add.
+	   We should take care of not to delete too much :-)
+
+	   Scan address list to be sure that addresses are really gone.
+	 */
+
+	for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
+		if (ifa->ifa_local == ifa1->ifa_local)
+			ok |= LOCAL_OK;
+		if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
+			ok |= BRD_OK;
+		if (brd == ifa1->ifa_broadcast)
+			ok |= BRD1_OK;
+		if (any == ifa1->ifa_broadcast)
+			ok |= BRD0_OK;
+	}
+
+	if (!(ok&BRD_OK))
+		fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
+	if (!(ok&BRD1_OK))
+		fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
+	if (!(ok&BRD0_OK))
+		fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
+	if (!(ok&LOCAL_OK)) {
+		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
+
+		/* Check, that this local address finally disappeared. */
+		if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
+			/* And the last, but not the least thing.
+			   We must flush stray FIB entries.
+
+			   First of all, we scan fib_info list searching
+			   for stray nexthop entries, then ignite fib_flush.
+			*/
+			if (fib_sync_down(ifa->ifa_local, NULL, 0))
+				fib_flush();
+		}
+	}
+#undef LOCAL_OK
+#undef BRD_OK
+#undef BRD0_OK
+#undef BRD1_OK
+}
+
+static void fib_disable_ip(struct device *dev, int force)
+{
+	if (fib_sync_down(0, dev, force))
+		fib_flush();
+	rt_cache_flush(0);
+	arp_ifdown(dev);
+}
+
+static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+	struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
+
+	switch (event) {
+	case NETDEV_UP:
+		fib_add_ifaddr(ifa);
+		rt_cache_flush(-1);
+		break;
+	case NETDEV_DOWN:
+		if (ifa->ifa_dev && ifa->ifa_dev->ifa_list == NULL) {
+			/* Last address was deleted from this interface.
+			   Disable IP.
+			 */
+			fib_disable_ip(ifa->ifa_dev->dev, 1);
+		} else {
+			fib_del_ifaddr(ifa);
+			rt_cache_flush(-1);
+		}
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+	struct device *dev = ptr;
+	struct in_device *in_dev = dev->ip_ptr;
+
+	if (!in_dev)
+		return NOTIFY_DONE;
+
+	switch (event) {
+	case NETDEV_UP:
+		for_ifa(in_dev) {
+			fib_add_ifaddr(ifa);
+		} endfor_ifa(in_dev);
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+		fib_sync_up(dev);
+#endif
+		rt_cache_flush(-1);
+		break;
+	case NETDEV_DOWN:
+		fib_disable_ip(dev, 0);
+		break;
+	case NETDEV_UNREGISTER:
+		fib_disable_ip(dev, 1);
+		break;
+	case NETDEV_CHANGEMTU:
+	case NETDEV_CHANGE:
+		rt_cache_flush(0);
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+struct notifier_block fib_inetaddr_notifier = {
+	fib_inetaddr_event,
+	NULL,
+	0
+};
+
+struct notifier_block fib_netdev_notifier = {
+	fib_netdev_event,
+	NULL,
+	0
+};
+
+__initfunc(void ip_fib_init(void))
+{
+#ifdef CONFIG_PROC_FS
+	proc_net_register(&(struct proc_dir_entry) {
+		PROC_NET_ROUTE, 5, "route",
+		S_IFREG | S_IRUGO, 1, 0, 0,
+		0, &proc_net_inode_operations,
+		fib_get_procinfo
+	});
+#endif		/* CONFIG_PROC_FS */
+
+#ifndef CONFIG_IP_MULTIPLE_TABLES
+	local_table = fib_hash_init(RT_TABLE_LOCAL);
+	main_table = fib_hash_init(RT_TABLE_MAIN);
+#else
+	fib_rules_init();
+#endif
+
+	register_netdevice_notifier(&fib_netdev_notifier);
+	register_inetaddr_notifier(&fib_inetaddr_notifier);
+}
+
diff --git a/pfinet/linux-src/net/ipv4/fib_hash.c b/pfinet/linux-src/net/ipv4/fib_hash.c
new file mode 100644
index 00000000..d9e029ce
--- /dev/null
+++ b/pfinet/linux-src/net/ipv4/fib_hash.c
@@ -0,0 +1,885 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		IPv4 FIB: lookup engine and maintenance routines.
+ *
+ * Version:	$Id: fib_hash.c,v 1.8 1999/03/25 10:04:17 davem Exp $
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/errno.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/proc_fs.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/init.h>
+
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/route.h>
+#include <net/tcp.h>
+#include <net/sock.h>
+#include <net/ip_fib.h>
+
+#define FTprint(a...)
+/*
+   printk(KERN_DEBUG a)
+ */
+
+/*
+   These bizarre types are just to force strict type checking.
+   When I reversed order of bytes and changed to natural mask lengths,
+   I forgot to make fixes in several places. Now I am lazy to return
+   it back.
+ */
+
+typedef struct {
+	u32	datum;
+} fn_key_t;
+
+typedef struct {
+	u32	datum;
+} fn_hash_idx_t;
+
+struct fib_node
+{
+	struct fib_node		*fn_next;
+	struct fib_info		*fn_info;
+#define FIB_INFO(f)	((f)->fn_info)
+	fn_key_t		fn_key;
+	u8			fn_tos;
+	u8			fn_type;
+	u8			fn_scope;
+	u8			fn_state;
+};
+
+#define FN_S_ZOMBIE	1
+#define FN_S_ACCESSED	2
+
+static int fib_hash_zombies;
+
+struct fn_zone
+{
+	struct fn_zone	*fz_next;	/* Next not empty zone	*/
+	struct fib_node	**fz_hash;	/* Hash table pointer	*/
+	int		fz_nent;	/* Number of entries	*/
+
+	int		fz_divisor;	/* Hash divisor		*/
+	u32		fz_hashmask;	/* (1<<fz_divisor) - 1	*/
+#define FZ_HASHMASK(fz)	((fz)->fz_hashmask)
+
+	int		fz_order;	/* Zone order		*/
+	u32		fz_mask;
+#define FZ_MASK(fz)	((fz)->fz_mask)
+};
+
+/* NOTE. On fast computers evaluation of fz_hashmask and fz_mask
+   can be cheaper than memory lookup, so that FZ_* macros are used.
+ */
+
+struct fn_hash
+{
+	struct fn_zone	*fn_zones[33];
+	struct fn_zone	*fn_zone_list;
+};
+
+static __inline__ fn_hash_idx_t fn_hash(fn_key_t key, struct fn_zone *fz)
+{
+	u32 h = ntohl(key.datum)>>(32 - fz->fz_order);
+	h ^= (h>>20);
+	h ^= (h>>10);
+	h ^= (h>>5);
+	h &= FZ_HASHMASK(fz);
+	return *(fn_hash_idx_t*)&h;
+}
+
+#define fz_key_0(key)		((key).datum = 0)
+#define fz_prefix(key,fz)	((key).datum)
+
+static __inline__ fn_key_t fz_key(u32 dst, struct fn_zone *fz)
+{
+	fn_key_t k;
+	k.datum = dst & FZ_MASK(fz);
+	return k;
+}
+
+static __inline__ struct fib_node ** fz_chain_p(fn_key_t key, struct fn_zone *fz)
+{
+	return &fz->fz_hash[fn_hash(key, fz).datum];
+}
+
+static __inline__ struct fib_node * fz_chain(fn_key_t key, struct fn_zone *fz)
+{
+	return fz->fz_hash[fn_hash(key, fz).datum];
+}
+
+extern __inline__ int fn_key_eq(fn_key_t a, fn_key_t b)
+{
+	return a.datum == b.datum;
+}
+
+extern __inline__ int fn_key_leq(fn_key_t a, fn_key_t b)
+{
+	return a.datum <= b.datum;
+}
+
+#define FZ_MAX_DIVISOR 1024
+
+#ifdef CONFIG_IP_ROUTE_LARGE_TABLES
+
+static __inline__ void fn_rebuild_zone(struct fn_zone *fz,
+					struct fib_node **old_ht,
+					int old_divisor)
+{
+	int i;
+	struct fib_node *f, **fp, *next;
+
+	for (i=0; i<old_divisor; i++) {
+		for (f=old_ht[i]; f; f=next) {
+			next = f->fn_next;
+			for (fp = fz_chain_p(f->fn_key, fz);
+			     *fp && fn_key_leq((*fp)->fn_key, f->fn_key);
+			     fp = &(*fp)->fn_next)
+				/* NONE */;
+			f->fn_next = *fp;
+			*fp = f;
+		}
+	}
+}
+
+static void fn_rehash_zone(struct fn_zone *fz)
+{
+	struct fib_node **ht, **old_ht;
+	int old_divisor, new_divisor;
+	u32 new_hashmask;
+		
+	old_divisor = fz->fz_divisor;
+
+	switch (old_divisor) {
+	case 16:
+		new_divisor = 256;
+		new_hashmask = 0xFF;
+		break;
+	case 256:
+		new_divisor = 1024;
+		new_hashmask = 0x3FF;
+		break;
+	default:
+		printk(KERN_CRIT "route.c: bad divisor %d!\n", old_divisor);
+		return;
+	}
+#if RT_CACHE_DEBUG >= 2
+	printk("fn_rehash_zone: hash for zone %d grows from %d\n", fz->fz_order, old_divisor);
+#endif
+
+	ht = kmalloc(new_divisor*sizeof(struct fib_node*), GFP_KERNEL);
+
+	if (ht)	{
+		memset(ht, 0, new_divisor*sizeof(struct fib_node*));
+		start_bh_atomic();
+		old_ht = fz->fz_hash;
+		fz->fz_hash = ht;
+		fz->fz_hashmask = new_hashmask;
+		fz->fz_divisor = new_divisor;
+		fn_rebuild_zone(fz, old_ht, old_divisor);
+		end_bh_atomic();
+		kfree(old_ht);
+	}
+}
+#endif /* CONFIG_IP_ROUTE_LARGE_TABLES */
+
+static void fn_free_node(struct fib_node * f)
+{
+	fib_release_info(FIB_INFO(f));
+	kfree_s(f, sizeof(struct fib_node));
+}
+
+
+static struct fn_zone *
+fn_new_zone(struct fn_hash *table, int z)
+{
+	int i;
+	struct fn_zone *fz = kmalloc(sizeof(struct fn_zone), GFP_KERNEL);
+	if (!fz)
+		return NULL;
+
+	memset(fz, 0, sizeof(struct fn_zone));
+	if (z) {
+		fz->fz_divisor = 16;
+		fz->fz_hashmask = 0xF;
+	} else {
+		fz->fz_divisor = 1;
+		fz->fz_hashmask = 0;
+	}
+	fz->fz_hash = kmalloc(fz->fz_divisor*sizeof(struct fib_node*), GFP_KERNEL);
+	if (!fz->fz_hash) {
+		kfree(fz);
+		return NULL;
+	}
+	memset(fz->fz_hash, 0, fz->fz_divisor*sizeof(struct fib_node*));
+	fz->fz_order = z;
+	fz->fz_mask = inet_make_mask(z);
+
+	/* Find the first not empty zone with more specific mask */
+	for (i=z+1; i<=32; i++)
+		if (table->fn_zones[i])
+			break;
+	if (i>32) {
+		/* No more specific masks, we are the first. */
+		fz->fz_next = table->fn_zone_list;
+		table->fn_zone_list = fz;
+	} else {
+		fz->fz_next = table->fn_zones[i]->fz_next;
+		table->fn_zones[i]->fz_next = fz;
+	}
+	table->fn_zones[z] = fz;
+	return fz;
+}
+
+static int
+fn_hash_lookup(struct fib_table *tb, const struct rt_key *key, struct fib_result *res)
+{
+	int err;
+	struct fn_zone *fz;
+	struct fn_hash *t = (struct fn_hash*)tb->tb_data;
+
+	for (fz = t->fn_zone_list; fz; fz = fz->fz_next) {
+		struct fib_node *f;
+		fn_key_t k = fz_key(key->dst, fz);
+
+		for (f = fz_chain(k, fz); f; f = f->fn_next) {
+			if (!fn_key_eq(k, f->fn_key)) {
+				if (fn_key_leq(k, f->fn_key))
+					break;
+				else
+					continue;
+			}
+#ifdef CONFIG_IP_ROUTE_TOS
+			if (f->fn_tos && f->fn_tos != key->tos)
+				continue;
+#endif
+			f->fn_state |= FN_S_ACCESSED;
+
+			if (f->fn_state&FN_S_ZOMBIE)
+				continue;
+			if (f->fn_scope < key->scope)
+				continue;
+
+			err = fib_semantic_match(f->fn_type, FIB_INFO(f), key, res);
+			if (err == 0) {
+				res->type = f->fn_type;
+				res->scope = f->fn_scope;
+				res->prefixlen = fz->fz_order;
+				res->prefix = &fz_prefix(f->fn_key, fz);
+				return 0;
+			}
+			if (err < 0)
+				return err;
+		}
+	}
+	return 1;
+}
+
+static int fn_hash_last_dflt=-1;
+
+static int fib_detect_death(struct fib_info *fi, int order,
+			    struct fib_info **last_resort, int *last_idx)
+{
+	struct neighbour *n;
+	int state = NUD_NONE;
+
+	n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
+	if (n) {
+		state = n->nud_state;
+		neigh_release(n);
+	}
+	if (state==NUD_REACHABLE)
+		return 0;
+	if ((state&NUD_VALID) && order != fn_hash_last_dflt)
+		return 0;
+	if ((state&NUD_VALID) ||
+	    (*last_idx<0 && order > fn_hash_last_dflt)) {
+		*last_resort = fi;
+		*last_idx = order;
+	}
+	return 1;
+}
+
+static void
+fn_hash_select_default(struct fib_table *tb, const struct rt_key *key, struct fib_result *res)
+{
+	int order, last_idx;
+	struct fib_node *f;
+	struct fib_info *fi = NULL;
+	struct fib_info *last_resort;
+	struct fn_hash *t = (struct fn_hash*)tb->tb_data;
+	struct fn_zone *fz = t->fn_zones[0];
+
+	if (fz == NULL)
+		return;
+
+	last_idx = -1;
+	last_resort = NULL;
+	order = -1;
+
+	for (f = fz->fz_hash[0]; f; f = f->fn_next) {
+		struct fib_info *next_fi = FIB_INFO(f);
+
+		if ((f->fn_state&FN_S_ZOMBIE) ||
+		    f->fn_scope != res->scope ||
+		    f->fn_type != RTN_UNICAST)
+			continue;
+
+		if (next_fi->fib_priority > res->fi->fib_priority)
+			break;
+		if (!next_fi->fib_nh[0].nh_gw || next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
+			continue;
+		f->fn_state |= FN_S_ACCESSED;
+
+		if (fi == NULL) {
+			if (next_fi != res->fi)
+				break;
+		} else if (!fib_detect_death(fi, order, &last_resort, &last_idx)) {
+			res->fi = fi;
+			fn_hash_last_dflt = order;
+			return;
+		}
+		fi = next_fi;
+		order++;
+	}
+
+	if (order<=0 || fi==NULL) {
+		fn_hash_last_dflt = -1;
+		return;
+	}
+
+	if (!fib_detect_death(fi, order, &last_resort, &last_idx)) {
+		res->fi = fi;
+		fn_hash_last_dflt = order;
+		return;
+	}
+
+	if (last_idx >= 0)
+		res->fi = last_resort;
+	fn_hash_last_dflt = last_idx;
+}
+
+#define FIB_SCAN(f, fp) \
+for ( ; ((f) = *(fp)) != NULL; (fp) = &(f)->fn_next)
+
+#define FIB_SCAN_KEY(f, fp, key) \
+for ( ; ((f) = *(fp)) != NULL && fn_key_eq((f)->fn_key, (key)); (fp) = &(f)->fn_next)
+
+#ifndef CONFIG_IP_ROUTE_TOS
+#define FIB_SCAN_TOS(f, fp, key, tos) FIB_SCAN_KEY(f, fp, key)
+#else
+#define FIB_SCAN_TOS(f, fp, key, tos) \
+for ( ; ((f) = *(fp)) != NULL && fn_key_eq((f)->fn_key, (key)) && \
+     (f)->fn_tos == (tos) ; (fp) = &(f)->fn_next)
+#endif
+
+
+#ifdef CONFIG_RTNETLINK
+static void rtmsg_fib(int, struct fib_node*, int, int,
+		      struct nlmsghdr *n,
+		      struct netlink_skb_parms *);
+#else
+#define rtmsg_fib(a, b, c, d, e, f)
+#endif
+
+
+static int
+fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
+		struct nlmsghdr *n, struct netlink_skb_parms *req)
+{
+	struct fn_hash *table = (struct fn_hash*)tb->tb_data;
+	struct fib_node *new_f, *f, **fp, **del_fp;
+	struct fn_zone *fz;
+	struct fib_info *fi;
+
+	int z = r->rtm_dst_len;
+	int type = r->rtm_type;
+#ifdef CONFIG_IP_ROUTE_TOS
+	u8 tos = r->rtm_tos;
+#endif
+	fn_key_t key;
+	int err;
+
+FTprint("tb(%d)_insert: %d %08x/%d %d %08x\n", tb->tb_id, r->rtm_type, rta->rta_dst ?
+*(u32*)rta->rta_dst : 0, z, rta->rta_oif ? *rta->rta_oif : -1,
+rta->rta_prefsrc ? *(u32*)rta->rta_prefsrc : 0);
+	if (z > 32)
+		return -EINVAL;
+	fz = table->fn_zones[z];
+	if (!fz && !(fz = fn_new_zone(table, z)))
+		return -ENOBUFS;
+
+	fz_key_0(key);
+	if (rta->rta_dst) {
+		u32 dst;
+		memcpy(&dst, rta->rta_dst, 4);
+		if (dst & ~FZ_MASK(fz))
+			return -EINVAL;
+		key = fz_key(dst, fz);
+	}
+
+	if  ((fi = fib_create_info(r, rta, n, &err)) == NULL)
+		return err;
+
+#ifdef CONFIG_IP_ROUTE_LARGE_TABLES
+	if (fz->fz_nent > (fz->fz_divisor<<2) &&
+	    fz->fz_divisor < FZ_MAX_DIVISOR &&
+	    (z==32 || (1<<z) > fz->fz_divisor))
+		fn_rehash_zone(fz);
+#endif
+
+	fp = fz_chain_p(key, fz);
+
+	/*
+	 * Scan list to find the first route with the same destination
+	 */
+	FIB_SCAN(f, fp) {
+		if (fn_key_leq(key,f->fn_key))
+			break;
+	}
+
+#ifdef CONFIG_IP_ROUTE_TOS
+	/*
+	 * Find route with the same destination and tos.
+	 */
+	FIB_SCAN_KEY(f, fp, key) {
+		if (f->fn_tos <= tos)
+			break;
+	}
+#endif
+
+	del_fp = NULL;
+
+	if (f && (f->fn_state&FN_S_ZOMBIE) &&
+#ifdef CONFIG_IP_ROUTE_TOS
+	    f->fn_tos == tos &&
+#endif
+	    fn_key_eq(f->fn_key, key)) {
+		del_fp = fp;
+		fp = &f->fn_next;
+		f = *fp;
+		goto create;
+	}
+
+	FIB_SCAN_TOS(f, fp, key, tos) {
+		if (fi->fib_priority <= FIB_INFO(f)->fib_priority)
+			break;
+	}
+
+	/* Now f==*fp points to the first node with the same
+	   keys [prefix,tos,priority], if such key already
+	   exists or to the node, before which we will insert new one.
+	 */
+
+	if (f && 
+#ifdef CONFIG_IP_ROUTE_TOS
+	    f->fn_tos == tos &&
+#endif
+	    fn_key_eq(f->fn_key, key) &&
+	    fi->fib_priority == FIB_INFO(f)->fib_priority) {
+		struct fib_node **ins_fp;
+
+		err = -EEXIST;
+		if (n->nlmsg_flags&NLM_F_EXCL)
+			goto out;
+
+		if (n->nlmsg_flags&NLM_F_REPLACE) {
+			del_fp = fp;
+			fp = &f->fn_next;
+			f = *fp;
+			goto replace;
+		}
+
+		ins_fp = fp;
+		err = -EEXIST;
+
+		FIB_SCAN_TOS(f, fp, key, tos) {
+			if (fi->fib_priority != FIB_INFO(f)->fib_priority)
+				break;
+			if (f->fn_type == type && f->fn_scope == r->rtm_scope
+			    && FIB_INFO(f) == fi)
+				goto out;
+		}
+
+		if (!(n->nlmsg_flags&NLM_F_APPEND)) {
+			fp = ins_fp;
+			f = *fp;
+		}
+	}
+
+create:
+	err = -ENOENT;
+	if (!(n->nlmsg_flags&NLM_F_CREATE))
+		goto out;
+
+replace:
+	err = -ENOBUFS;
+	new_f = (struct fib_node *) kmalloc(sizeof(struct fib_node), GFP_KERNEL);
+	if (new_f == NULL)
+		goto out;
+
+	memset(new_f, 0, sizeof(struct fib_node));
+
+	new_f->fn_key = key;
+#ifdef CONFIG_IP_ROUTE_TOS
+	new_f->fn_tos = tos;
+#endif
+	new_f->fn_type = type;
+	new_f->fn_scope = r->rtm_scope;
+	FIB_INFO(new_f) = fi;
+
+	/*
+	 * Insert new entry to the list.
+	 */
+
+	new_f->fn_next = f;
+	*fp = new_f;
+	fz->fz_nent++;
+
+	if (del_fp) {
+		f = *del_fp;
+		/* Unlink replaced node */
+		*del_fp = f->fn_next;
+		synchronize_bh();
+
+		if (!(f->fn_state&FN_S_ZOMBIE))
+			rtmsg_fib(RTM_DELROUTE, f, z, tb->tb_id, n, req);
+		if (f->fn_state&FN_S_ACCESSED)
+			rt_cache_flush(-1);
+		fn_free_node(f);
+		fz->fz_nent--;
+	} else {
+		rt_cache_flush(-1);
+	}
+	rtmsg_fib(RTM_NEWROUTE, new_f, z, tb->tb_id, n, req);
+	return 0;
+
+out:
+	fib_release_info(fi);
+	return err;
+}
+
+
+static int
+fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
+		struct nlmsghdr *n, struct netlink_skb_parms *req)
+{
+	struct fn_hash *table = (struct fn_hash*)tb->tb_data;
+	struct fib_node **fp, **del_fp, *f;
+	int z = r->rtm_dst_len;
+	struct fn_zone *fz;
+	fn_key_t key;
+	int matched;
+#ifdef CONFIG_IP_ROUTE_TOS
+	u8 tos = r->rtm_tos;
+#endif
+
+FTprint("tb(%d)_delete: %d %08x/%d %d\n", tb->tb_id, r->rtm_type, rta->rta_dst ?
+       *(u32*)rta->rta_dst : 0, z, rta->rta_oif ? *rta->rta_oif : -1);
+	if (z > 32)
+		return -EINVAL;
+	if ((fz  = table->fn_zones[z]) == NULL)
+		return -ESRCH;
+
+	fz_key_0(key);
+	if (rta->rta_dst) {
+		u32 dst;
+		memcpy(&dst, rta->rta_dst, 4);
+		if (dst & ~FZ_MASK(fz))
+			return -EINVAL;
+		key = fz_key(dst, fz);
+	}
+
+	fp = fz_chain_p(key, fz);
+
+	FIB_SCAN(f, fp) {
+		if (fn_key_eq(f->fn_key, key))
+			break;
+		if (fn_key_leq(key, f->fn_key))
+			return -ESRCH;
+	}
+#ifdef CONFIG_IP_ROUTE_TOS
+	FIB_SCAN_KEY(f, fp, key) {
+		if (f->fn_tos == tos)
+			break;
+	}
+#endif
+
+	matched = 0;
+	del_fp = NULL;
+	FIB_SCAN_TOS(f, fp, key, tos) {
+		struct fib_info * fi = FIB_INFO(f);
+
+		if (f->fn_state&FN_S_ZOMBIE)
+			return -ESRCH;
+
+		matched++;
+
+		if (del_fp == NULL &&
+		    (!r->rtm_type || f->fn_type == r->rtm_type) &&
+		    (r->rtm_scope == RT_SCOPE_NOWHERE || f->fn_scope == r->rtm_scope) &&
+		    (!r->rtm_protocol || fi->fib_protocol == r->rtm_protocol) &&
+		    fib_nh_match(r, n, rta, fi) == 0)
+			del_fp = fp;
+	}
+
+	if (del_fp) {
+		f = *del_fp;
+		rtmsg_fib(RTM_DELROUTE, f, z, tb->tb_id, n, req);
+
+		if (matched != 1) {
+			*del_fp = f->fn_next;
+			synchronize_bh();
+
+			if (f->fn_state&FN_S_ACCESSED)
+				rt_cache_flush(-1);
+			fn_free_node(f);
+			fz->fz_nent--;
+		} else {
+			f->fn_state |= FN_S_ZOMBIE;
+			if (f->fn_state&FN_S_ACCESSED) {
+				f->fn_state &= ~FN_S_ACCESSED;
+				rt_cache_flush(-1);
+			}
+			if (++fib_hash_zombies > 128)
+				fib_flush();
+		}
+
+		return 0;
+	}
+	return -ESRCH;
+}
+
+extern __inline__ int
+fn_flush_list(struct fib_node ** fp, int z, struct fn_hash *table)
+{
+	int found = 0;
+	struct fib_node *f;
+
+	while ((f = *fp) != NULL) {
+		struct fib_info *fi = FIB_INFO(f);
+
+		if (fi && ((f->fn_state&FN_S_ZOMBIE) || (fi->fib_flags&RTNH_F_DEAD))) {
+			*fp = f->fn_next;
+			synchronize_bh();
+
+			fn_free_node(f);
+			found++;
+			continue;
+		}
+		fp = &f->fn_next;
+	}
+	return found;
+}
+
+static int fn_hash_flush(struct fib_table *tb)
+{
+	struct fn_hash *table = (struct fn_hash*)tb->tb_data;
+	struct fn_zone *fz;
+	int found = 0;
+
+	fib_hash_zombies = 0;
+	for (fz = table->fn_zone_list; fz; fz = fz->fz_next) {
+		int i;
+		int tmp = 0;
+		for (i=fz->fz_divisor-1; i>=0; i--)
+			tmp += fn_flush_list(&fz->fz_hash[i], fz->fz_order, table);
+		fz->fz_nent -= tmp;
+		found += tmp;
+	}
+	return found;
+}
+
+
+#ifdef CONFIG_PROC_FS
+
+static int fn_hash_get_info(struct fib_table *tb, char *buffer, int first, int count)
+{
+	struct fn_hash *table = (struct fn_hash*)tb->tb_data;
+	struct fn_zone *fz;
+	int pos = 0;
+	int n = 0;
+
+	for (fz=table->fn_zone_list; fz; fz = fz->fz_next) {
+		int i;
+		struct fib_node *f;
+		int maxslot = fz->fz_divisor;
+		struct fib_node **fp = fz->fz_hash;
+
+		if (fz->fz_nent == 0)
+			continue;
+
+		if (pos + fz->fz_nent <= first) {
+			pos += fz->fz_nent;
+			continue;
+		}
+
+		for (i=0; i < maxslot; i++, fp++) {
+			for (f = *fp; f; f = f->fn_next) {
+				if (++pos <= first)
+					continue;
+				fib_node_get_info(f->fn_type,
+						  f->fn_state&FN_S_ZOMBIE,
+						  FIB_INFO(f),
+						  fz_prefix(f->fn_key, fz),
+						  FZ_MASK(fz), buffer);
+				buffer += 128;
+				if (++n >= count)
+					return n;
+			}
+		}
+	}
+  	return n;
+}
+#endif
+
+
+#ifdef CONFIG_RTNETLINK
+
+extern __inline__ int
+fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
+		     struct fib_table *tb,
+		     struct fn_zone *fz,
+		     struct fib_node *f)
+{
+	int i, s_i;
+
+	s_i = cb->args[3];
+	for (i=0; f; i++, f=f->fn_next) {
+		if (i < s_i) continue;
+		if (f->fn_state&FN_S_ZOMBIE) continue;
+		if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
+				  RTM_NEWROUTE,
+				  tb->tb_id, (f->fn_state&FN_S_ZOMBIE) ? 0 : f->fn_type, f->fn_scope,
+				  &f->fn_key, fz->fz_order, f->fn_tos,
+				  f->fn_info) < 0) {
+			cb->args[3] = i;
+			return -1;
+		}
+	}
+	cb->args[3] = i;
+	return skb->len;
+}
+
+extern __inline__ int
+fn_hash_dump_zone(struct sk_buff *skb, struct netlink_callback *cb,
+		   struct fib_table *tb,
+		   struct fn_zone *fz)
+{
+	int h, s_h;
+
+	s_h = cb->args[2];
+	for (h=0; h < fz->fz_divisor; h++) {
+		if (h < s_h) continue;
+		if (h > s_h)
+			memset(&cb->args[3], 0, sizeof(cb->args) - 3*sizeof(cb->args[0]));
+		if (fz->fz_hash == NULL || fz->fz_hash[h] == NULL)
+			continue;
+		if (fn_hash_dump_bucket(skb, cb, tb, fz, fz->fz_hash[h]) < 0) {
+			cb->args[2] = h;
+			return -1;
+		}
+	}
+	cb->args[2] = h;
+	return skb->len;
+}
+
+static int fn_hash_dump(struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb)
+{
+	int m, s_m;
+	struct fn_zone *fz;
+	struct fn_hash *table = (struct fn_hash*)tb->tb_data;
+
+	s_m = cb->args[1];
+	for (fz = table->fn_zone_list, m=0; fz; fz = fz->fz_next, m++) {
+		if (m < s_m) continue;
+		if (m > s_m)
+			memset(&cb->args[2], 0, sizeof(cb->args) - 2*sizeof(cb->args[0]));
+		if (fn_hash_dump_zone(skb, cb, tb, fz) < 0) {
+			cb->args[1] = m;
+			return -1;
+		}
+	}
+	cb->args[1] = m;
+	return skb->len;
+}
+
+static void rtmsg_fib(int event, struct fib_node* f, int z, int tb_id,
+		      struct nlmsghdr *n, struct netlink_skb_parms *req)
+{
+	struct sk_buff *skb;
+	u32 pid = req ? req->pid : 0;
+	int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
+
+	skb = alloc_skb(size, GFP_KERNEL);
+	if (!skb)
+		return;
+
+	if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id,
+			  f->fn_type, f->fn_scope, &f->fn_key, z, f->fn_tos,
+			  FIB_INFO(f)) < 0) {
+		kfree_skb(skb);
+		return;
+	}
+	NETLINK_CB(skb).dst_groups = RTMGRP_IPV4_ROUTE;
+	if (n->nlmsg_flags&NLM_F_ECHO)
+		atomic_inc(&skb->users);
+	netlink_broadcast(rtnl, skb, pid, RTMGRP_IPV4_ROUTE, GFP_KERNEL);
+	if (n->nlmsg_flags&NLM_F_ECHO)
+		netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
+}
+
+#endif /* CONFIG_RTNETLINK */
+
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+struct fib_table * fib_hash_init(int id)
+#else
+__initfunc(struct fib_table * fib_hash_init(int id))
+#endif
+{
+	struct fib_table *tb;
+	tb = kmalloc(sizeof(struct fib_table) + sizeof(struct fn_hash), GFP_KERNEL);
+	if (tb == NULL)
+		return NULL;
+	tb->tb_id = id;
+	tb->tb_lookup = fn_hash_lookup;
+	tb->tb_insert = fn_hash_insert;
+	tb->tb_delete = fn_hash_delete;
+	tb->tb_flush = fn_hash_flush;
+	tb->tb_select_default = fn_hash_select_default;
+#ifdef CONFIG_RTNETLINK
+	tb->tb_dump = fn_hash_dump;
+#endif
+#ifdef CONFIG_PROC_FS
+	tb->tb_get_info = fn_hash_get_info;
+#endif
+	memset(tb->tb_data, 0, sizeof(struct fn_hash));
+	return tb;
+}
diff --git a/pfinet/linux-src/net/ipv4/fib_rules.c b/pfinet/linux-src/net/ipv4/fib_rules.c
new file mode 100644
index 00000000..868c44c3
--- /dev/null
+++ b/pfinet/linux-src/net/ipv4/fib_rules.c
@@ -0,0 +1,419 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		IPv4 Forwarding Information Base: policy rules.
+ *
+ * Version:	$Id: fib_rules.c,v 1.9 1999/03/25 10:04:23 davem Exp $
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Fixes:
+ * 		Rani Assaf	:	local_rule cannot be deleted
+ *		Marc Boucher	:	routing by fwmark
+ */
+
+#include <linux/config.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/errno.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/proc_fs.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/init.h>
+
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/route.h>
+#include <net/tcp.h>
+#include <net/sock.h>
+#include <net/ip_fib.h>
+
+#define FRprintk(a...)
+
+struct fib_rule
+{
+	struct fib_rule *r_next;
+	u32		r_preference;
+	unsigned char	r_table;
+	unsigned char	r_action;
+	unsigned char	r_dst_len;
+	unsigned char	r_src_len;
+	u32		r_src;
+	u32		r_srcmask;
+	u32		r_dst;
+	u32		r_dstmask;
+	u32		r_srcmap;
+	u8		r_flags;
+	u8		r_tos;
+#ifdef CONFIG_IP_ROUTE_FWMARK
+	u32		r_fwmark;
+#endif
+	int		r_ifindex;
+#ifdef CONFIG_NET_CLS_ROUTE
+	__u32		r_tclassid;
+#endif
+	char		r_ifname[IFNAMSIZ];
+};
+
+static struct fib_rule default_rule = { NULL, 0x7FFF, RT_TABLE_DEFAULT, RTN_UNICAST, };
+static struct fib_rule main_rule = { &default_rule, 0x7FFE, RT_TABLE_MAIN, RTN_UNICAST, };
+static struct fib_rule local_rule = { &main_rule, 0, RT_TABLE_LOCAL, RTN_UNICAST, };
+
+static struct fib_rule *fib_rules = &local_rule;
+
+int inet_rtm_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+{
+	struct rtattr **rta = arg;
+	struct rtmsg *rtm = NLMSG_DATA(nlh);
+	struct fib_rule *r, **rp;
+
+	for (rp=&fib_rules; (r=*rp) != NULL; rp=&r->r_next) {
+		if ((!rta[RTA_SRC-1] || memcmp(RTA_DATA(rta[RTA_SRC-1]), &r->r_src, 4) == 0) &&
+		    rtm->rtm_src_len == r->r_src_len &&
+		    rtm->rtm_dst_len == r->r_dst_len &&
+		    (!rta[RTA_DST-1] || memcmp(RTA_DATA(rta[RTA_DST-1]), &r->r_dst, 4) == 0) &&
+		    rtm->rtm_tos == r->r_tos &&
+#ifdef CONFIG_IP_ROUTE_FWMARK
+		    (!rta[RTA_PROTOINFO-1] || memcmp(RTA_DATA(rta[RTA_PROTOINFO-1]), &r->r_fwmark, 4) == 0) &&
+#endif
+		    (!rtm->rtm_type || rtm->rtm_type == r->r_action) &&
+		    (!rta[RTA_PRIORITY-1] || memcmp(RTA_DATA(rta[RTA_PRIORITY-1]), &r->r_preference, 4) == 0) &&
+		    (!rta[RTA_IIF-1] || strcmp(RTA_DATA(rta[RTA_IIF-1]), r->r_ifname) == 0) &&
+		    (!rtm->rtm_table || (r && rtm->rtm_table == r->r_table))) {
+			if (r == &local_rule)
+				return -EPERM;
+
+			*rp = r->r_next;
+			synchronize_bh();
+
+			if (r != &default_rule && r != &main_rule)
+				kfree(r);
+			return 0;
+		}
+	}
+	return -ESRCH;
+}
+
+/* Allocate new unique table id */
+
+static struct fib_table *fib_empty_table(void)
+{
+	int id;
+
+	for (id = 1; id <= RT_TABLE_MAX; id++)
+		if (fib_tables[id] == NULL)
+			return __fib_new_table(id);
+	return NULL;
+}
+
+
+int inet_rtm_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+{
+	struct rtattr **rta = arg;
+	struct rtmsg *rtm = NLMSG_DATA(nlh);
+	struct fib_rule *r, *new_r, **rp;
+	unsigned char table_id;
+
+	if (rtm->rtm_src_len > 32 || rtm->rtm_dst_len > 32 ||
+	    (rtm->rtm_tos & ~IPTOS_TOS_MASK))
+		return -EINVAL;
+
+	if (rta[RTA_IIF-1] && RTA_PAYLOAD(rta[RTA_IIF-1]) > IFNAMSIZ)
+		return -EINVAL;
+
+	table_id = rtm->rtm_table;
+	if (table_id == RT_TABLE_UNSPEC) {
+		struct fib_table *table;
+		if (rtm->rtm_type == RTN_UNICAST || rtm->rtm_type == RTN_NAT) {
+			if ((table = fib_empty_table()) == NULL)
+				return -ENOBUFS;
+			table_id = table->tb_id;
+		}
+	}
+
+	new_r = kmalloc(sizeof(*new_r), GFP_KERNEL);
+	if (!new_r)
+		return -ENOMEM;
+	memset(new_r, 0, sizeof(*new_r));
+	if (rta[RTA_SRC-1])
+		memcpy(&new_r->r_src, RTA_DATA(rta[RTA_SRC-1]), 4);
+	if (rta[RTA_DST-1])
+		memcpy(&new_r->r_dst, RTA_DATA(rta[RTA_DST-1]), 4);
+	if (rta[RTA_GATEWAY-1])
+		memcpy(&new_r->r_srcmap, RTA_DATA(rta[RTA_GATEWAY-1]), 4);
+	new_r->r_src_len = rtm->rtm_src_len;
+	new_r->r_dst_len = rtm->rtm_dst_len;
+	new_r->r_srcmask = inet_make_mask(rtm->rtm_src_len);
+	new_r->r_dstmask = inet_make_mask(rtm->rtm_dst_len);
+	new_r->r_tos = rtm->rtm_tos;
+#ifdef CONFIG_IP_ROUTE_FWMARK
+	if (rta[RTA_PROTOINFO-1])
+		memcpy(&new_r->r_fwmark, RTA_DATA(rta[RTA_PROTOINFO-1]), 4);
+#endif
+	new_r->r_action = rtm->rtm_type;
+	new_r->r_flags = rtm->rtm_flags;
+	if (rta[RTA_PRIORITY-1])
+		memcpy(&new_r->r_preference, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
+	new_r->r_table = table_id;
+	if (rta[RTA_IIF-1]) {
+		struct device *dev;
+		memcpy(new_r->r_ifname, RTA_DATA(rta[RTA_IIF-1]), IFNAMSIZ);
+		new_r->r_ifname[IFNAMSIZ-1] = 0;
+		new_r->r_ifindex = -1;
+		dev = dev_get(new_r->r_ifname);
+		if (dev)
+			new_r->r_ifindex = dev->ifindex;
+	}
+#ifdef CONFIG_NET_CLS_ROUTE
+	if (rta[RTA_FLOW-1])
+		memcpy(&new_r->r_tclassid, RTA_DATA(rta[RTA_FLOW-1]), 4);
+#endif
+
+	rp = &fib_rules;
+	if (!new_r->r_preference) {
+		r = fib_rules;
+		if (r && (r = r->r_next) != NULL) {
+			rp = &fib_rules->r_next;
+			if (r->r_preference)
+				new_r->r_preference = r->r_preference - 1;
+		}
+	}
+
+	while ( (r = *rp) != NULL ) {
+		if (r->r_preference > new_r->r_preference)
+			break;
+		rp = &r->r_next;
+	}
+
+	new_r->r_next = r;
+	*rp = new_r;
+	return 0;
+}
+
+u32 fib_rules_map_destination(u32 daddr, struct fib_result *res)
+{
+	u32 mask = inet_make_mask(res->prefixlen);
+	return (daddr&~mask)|res->fi->fib_nh->nh_gw;
+}
+
+u32 fib_rules_policy(u32 saddr, struct fib_result *res, unsigned *flags)
+{
+	struct fib_rule *r = res->r;
+
+	if (r->r_action == RTN_NAT) {
+		int addrtype = inet_addr_type(r->r_srcmap);
+
+		if (addrtype == RTN_NAT) {
+			/* Packet is from  translated source; remember it */
+			saddr = (saddr&~r->r_srcmask)|r->r_srcmap;
+			*flags |= RTCF_SNAT;
+		} else if (addrtype == RTN_LOCAL || r->r_srcmap == 0) {
+			/* Packet is from masqueraded source; remember it */
+			saddr = r->r_srcmap;
+			*flags |= RTCF_MASQ;
+		}
+	}
+	return saddr;
+}
+
+#ifdef CONFIG_NET_CLS_ROUTE
+u32 fib_rules_tclass(struct fib_result *res)
+{
+	if (res->r)
+		return res->r->r_tclassid;
+	return 0;
+}
+#endif
+
+
+static void fib_rules_detach(struct device *dev)
+{
+	struct fib_rule *r;
+
+	for (r=fib_rules; r; r=r->r_next) {
+		if (r->r_ifindex == dev->ifindex)
+			r->r_ifindex = -1;
+	}
+}
+
+static void fib_rules_attach(struct device *dev)
+{
+	struct fib_rule *r;
+
+	for (r=fib_rules; r; r=r->r_next) {
+		if (r->r_ifindex == -1 && strcmp(dev->name, r->r_ifname) == 0)
+			r->r_ifindex = dev->ifindex;
+	}
+}
+
+int fib_lookup(const struct rt_key *key, struct fib_result *res)
+{
+	int err;
+	struct fib_rule *r, *policy;
+	struct fib_table *tb;
+
+	u32 daddr = key->dst;
+	u32 saddr = key->src;
+
+FRprintk("Lookup: %08x <- %08x ", key->dst, key->src);
+	for (r = fib_rules; r; r=r->r_next) {
+		if (((saddr^r->r_src) & r->r_srcmask) ||
+		    ((daddr^r->r_dst) & r->r_dstmask) ||
+#ifdef CONFIG_IP_ROUTE_TOS
+		    (r->r_tos && r->r_tos != key->tos) ||
+#endif
+#ifdef CONFIG_IP_ROUTE_FWMARK
+		    (r->r_fwmark && r->r_fwmark != key->fwmark) ||
+#endif
+		    (r->r_ifindex && r->r_ifindex != key->iif))
+			continue;
+
+FRprintk("tb %d r %d ", r->r_table, r->r_action);
+		switch (r->r_action) {
+		case RTN_UNICAST:
+		case RTN_NAT:
+			policy = r;
+			break;
+		case RTN_UNREACHABLE:
+			return -ENETUNREACH;
+		default:
+		case RTN_BLACKHOLE:
+			return -EINVAL;
+		case RTN_PROHIBIT:
+			return -EACCES;
+		}
+
+		if ((tb = fib_get_table(r->r_table)) == NULL)
+			continue;
+		err = tb->tb_lookup(tb, key, res);
+		if (err == 0) {
+FRprintk("ok\n");
+			res->r = policy;
+			return 0;
+		}
+		if (err < 0 && err != -EAGAIN)
+			return err;
+	}
+FRprintk("FAILURE\n");
+	return -ENETUNREACH;
+}
+
+void fib_select_default(const struct rt_key *key, struct fib_result *res)
+{
+	if (res->r && res->r->r_action == RTN_UNICAST &&
+	    FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) {
+		struct fib_table *tb;
+		if ((tb = fib_get_table(res->r->r_table)) != NULL)
+			tb->tb_select_default(tb, key, res);
+	}
+}
+
+static int fib_rules_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+	struct device *dev = ptr;
+
+	if (event == NETDEV_UNREGISTER)
+		fib_rules_detach(dev);
+	else if (event == NETDEV_REGISTER)
+		fib_rules_attach(dev);
+	return NOTIFY_DONE;
+}
+
+
+struct notifier_block fib_rules_notifier = {
+	fib_rules_event,
+	NULL,
+	0
+};
+
+#ifdef CONFIG_RTNETLINK
+
+extern __inline__ int inet_fill_rule(struct sk_buff *skb,
+				     struct fib_rule *r,
+				     struct netlink_callback *cb)
+{
+	struct rtmsg *rtm;
+	struct nlmsghdr  *nlh;
+	unsigned char	 *b = skb->tail;
+
+	nlh = NLMSG_PUT(skb, NETLINK_CREDS(cb->skb)->pid, cb->nlh->nlmsg_seq, RTM_NEWRULE, sizeof(*rtm));
+	rtm = NLMSG_DATA(nlh);
+	rtm->rtm_family = AF_INET;
+	rtm->rtm_dst_len = r->r_dst_len;
+	rtm->rtm_src_len = r->r_src_len;
+	rtm->rtm_tos = r->r_tos;
+#ifdef CONFIG_IP_ROUTE_FWMARK
+	if (r->r_fwmark)
+		RTA_PUT(skb, RTA_PROTOINFO, 4, &r->r_fwmark);
+#endif
+	rtm->rtm_table = r->r_table;
+	rtm->rtm_protocol = 0;
+	rtm->rtm_scope = 0;
+	rtm->rtm_type = r->r_action;
+	rtm->rtm_flags = r->r_flags;
+
+	if (r->r_dst_len)
+		RTA_PUT(skb, RTA_DST, 4, &r->r_dst);
+	if (r->r_src_len)
+		RTA_PUT(skb, RTA_SRC, 4, &r->r_src);
+	if (r->r_ifname[0])
+		RTA_PUT(skb, RTA_IIF, IFNAMSIZ, &r->r_ifname);
+	if (r->r_preference)
+		RTA_PUT(skb, RTA_PRIORITY, 4, &r->r_preference);
+	if (r->r_srcmap)
+		RTA_PUT(skb, RTA_GATEWAY, 4, &r->r_srcmap);
+#ifdef CONFIG_NET_CLS_ROUTE
+	if (r->r_tclassid)
+		RTA_PUT(skb, RTA_FLOW, 4, &r->r_tclassid);
+#endif
+	nlh->nlmsg_len = skb->tail - b;
+	return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+	skb_put(skb, b - skb->tail);
+	return -1;
+}
+
+int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	int idx;
+	int s_idx = cb->args[0];
+	struct fib_rule *r;
+
+	for (r=fib_rules, idx=0; r; r = r->r_next, idx++) {
+		if (idx < s_idx)
+			continue;
+		if (inet_fill_rule(skb, r, cb) < 0)
+			break;
+	}
+	cb->args[0] = idx;
+
+	return skb->len;
+}
+
+#endif /* CONFIG_RTNETLINK */
+
+__initfunc(void fib_rules_init(void))
+{
+	register_netdevice_notifier(&fib_rules_notifier);
+}
diff --git a/pfinet/linux-src/net/ipv4/fib_semantics.c b/pfinet/linux-src/net/ipv4/fib_semantics.c
new file mode 100644
index 00000000..b78f7eba
--- /dev/null
+++ b/pfinet/linux-src/net/ipv4/fib_semantics.c
@@ -0,0 +1,991 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		IPv4 Forwarding Information Base: semantics.
+ *
+ * Version:	$Id: fib_semantics.c,v 1.13 1999/03/21 05:22:34 davem Exp $
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/errno.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/proc_fs.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/init.h>
+
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/route.h>
+#include <net/tcp.h>
+#include <net/sock.h>
+#include <net/ip_fib.h>
+
+#define FSprintk(a...)
+
+static struct fib_info 	*fib_info_list;
+
+#define for_fib_info() { struct fib_info *fi; \
+	for (fi = fib_info_list; fi; fi = fi->fib_next)
+
+#define endfor_fib_info() }
+
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+
+#define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
+for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
+
+#define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
+for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
+
+#else /* CONFIG_IP_ROUTE_MULTIPATH */
+
+/* Hope, that gcc will optimize it to get rid of dummy loop */
+
+#define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
+for (nhsel=0; nhsel < 1; nhsel++)
+
+#define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
+for (nhsel=0; nhsel < 1; nhsel++)
+
+#endif /* CONFIG_IP_ROUTE_MULTIPATH */
+
+#define endfor_nexthops(fi) }
+
+
+static struct 
+{
+	int	error;
+	u8	scope;
+} fib_props[RTA_MAX+1] = {
+        { 0, RT_SCOPE_NOWHERE},		/* RTN_UNSPEC */
+	{ 0, RT_SCOPE_UNIVERSE},	/* RTN_UNICAST */
+	{ 0, RT_SCOPE_HOST},		/* RTN_LOCAL */
+	{ 0, RT_SCOPE_LINK},		/* RTN_BROADCAST */
+	{ 0, RT_SCOPE_LINK},		/* RTN_ANYCAST */
+	{ 0, RT_SCOPE_UNIVERSE},	/* RTN_MULTICAST */
+	{ -EINVAL, RT_SCOPE_UNIVERSE},	/* RTN_BLACKHOLE */
+	{ -EHOSTUNREACH, RT_SCOPE_UNIVERSE},/* RTN_UNREACHABLE */
+	{ -EACCES, RT_SCOPE_UNIVERSE},	/* RTN_PROHIBIT */
+	{ -EAGAIN, RT_SCOPE_UNIVERSE},	/* RTN_THROW */
+#ifdef CONFIG_IP_ROUTE_NAT
+	{ 0, RT_SCOPE_HOST},		/* RTN_NAT */
+#else
+	{ -EINVAL, RT_SCOPE_NOWHERE},	/* RTN_NAT */
+#endif
+	{ -EINVAL, RT_SCOPE_NOWHERE}	/* RTN_XRESOLVE */
+};
+
+/* Release a nexthop info record */
+
+void fib_release_info(struct fib_info *fi)
+{
+	if (fi && !--fi->fib_refcnt) {
+		if (fi->fib_next)
+			fi->fib_next->fib_prev = fi->fib_prev;
+		if (fi->fib_prev)
+			fi->fib_prev->fib_next = fi->fib_next;
+		if (fi == fib_info_list)
+			fib_info_list = fi->fib_next;
+		kfree(fi);
+	}
+}
+
+extern __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
+{
+	const struct fib_nh *onh = ofi->fib_nh;
+
+	for_nexthops(fi) {
+		if (nh->nh_oif != onh->nh_oif ||
+		    nh->nh_gw  != onh->nh_gw ||
+		    nh->nh_scope != onh->nh_scope ||
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+		    nh->nh_weight != onh->nh_weight ||
+#endif
+#ifdef CONFIG_NET_CLS_ROUTE
+		    nh->nh_tclassid != onh->nh_tclassid ||
+#endif
+		    ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
+			return -1;
+		onh++;
+	} endfor_nexthops(fi);
+	return 0;
+}
+
+extern __inline__ struct fib_info * fib_find_info(const struct fib_info *nfi)
+{
+	for_fib_info() {
+		if (fi->fib_nhs != nfi->fib_nhs)
+			continue;
+		if (nfi->fib_protocol == fi->fib_protocol &&
+		    nfi->fib_prefsrc == fi->fib_prefsrc &&
+		    nfi->fib_priority == fi->fib_priority &&
+		    nfi->fib_mtu == fi->fib_mtu &&
+		    nfi->fib_rtt == fi->fib_rtt &&
+		    nfi->fib_window == fi->fib_window &&
+		    ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
+		    (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
+			return fi;
+	} endfor_fib_info();
+	return NULL;
+}
+
+/* Check, that the gateway is already configured.
+   Used only by redirect accept routine.
+ */
+
+int ip_fib_check_default(u32 gw, struct device *dev)
+{
+	for_fib_info() {
+		if (fi->fib_flags & RTNH_F_DEAD)
+			continue;
+		for_nexthops(fi) {
+			if (nh->nh_dev == dev && nh->nh_gw == gw &&
+			    !(nh->nh_flags&RTNH_F_DEAD))
+				return 0;
+		} endfor_nexthops(fi);
+	} endfor_fib_info();
+	return -1;
+}
+
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+
+static u32 fib_get_attr32(struct rtattr *attr, int attrlen, int type)
+{
+	while (RTA_OK(attr,attrlen)) {
+		if (attr->rta_type == type)
+			return *(u32*)RTA_DATA(attr);
+		attr = RTA_NEXT(attr, attrlen);
+	}
+	return 0;
+}
+
+static int
+fib_count_nexthops(struct rtattr *rta)
+{
+	int nhs = 0;
+	struct rtnexthop *nhp = RTA_DATA(rta);
+	int nhlen = RTA_PAYLOAD(rta);
+
+	while (nhlen >= (int)sizeof(struct rtnexthop)) {
+		if ((nhlen -= nhp->rtnh_len) < 0)
+			return 0;
+		nhs++;
+		nhp = RTNH_NEXT(nhp);
+	};
+	return nhs;
+}
+
+static int
+fib_get_nhs(struct fib_info *fi, const struct rtattr *rta, const struct rtmsg *r)
+{
+	struct rtnexthop *nhp = RTA_DATA(rta);
+	int nhlen = RTA_PAYLOAD(rta);
+
+	change_nexthops(fi) {
+		int attrlen = nhlen - sizeof(struct rtnexthop);
+		if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
+			return -EINVAL;
+		nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags;
+		nh->nh_oif = nhp->rtnh_ifindex;
+		nh->nh_weight = nhp->rtnh_hops + 1;
+		if (attrlen) {
+			nh->nh_gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
+#ifdef CONFIG_NET_CLS_ROUTE
+			nh->nh_tclassid = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
+#endif
+		}
+		nhp = RTNH_NEXT(nhp);
+	} endfor_nexthops(fi);
+	return 0;
+}
+
+#endif
+
+int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta,
+		 struct fib_info *fi)
+{
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+	struct rtnexthop *nhp;
+	int nhlen;
+#endif
+
+	if (rta->rta_priority &&
+	    *rta->rta_priority != fi->fib_priority)
+		return 1;
+
+	if (rta->rta_oif || rta->rta_gw) {
+		if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) &&
+		    (!rta->rta_gw  || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 4) == 0))
+			return 0;
+		return 1;
+	}
+
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+	if (rta->rta_mp == NULL)
+		return 0;
+	nhp = RTA_DATA(rta->rta_mp);
+	nhlen = RTA_PAYLOAD(rta->rta_mp);
+	
+	for_nexthops(fi) {
+		int attrlen = nhlen - sizeof(struct rtnexthop);
+		u32 gw;
+
+		if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
+			return -EINVAL;
+		if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif)
+			return 1;
+		if (attrlen) {
+			gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
+			if (gw && gw != nh->nh_gw)
+				return 1;
+#ifdef CONFIG_NET_CLS_ROUTE
+			gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
+			if (gw && gw != nh->nh_tclassid)
+				return 1;
+#endif
+		}
+		nhp = RTNH_NEXT(nhp);
+	} endfor_nexthops(fi);
+#endif
+	return 0;
+}
+
+
+/*
+   Picture
+   -------
+
+   Semantics of nexthop is very messy by historical reasons.
+   We have to take into account, that:
+   a) gateway can be actually local interface address,
+      so that gatewayed route is direct.
+   b) gateway must be on-link address, possibly
+      described not by an ifaddr, but also by a direct route.
+   c) If both gateway and interface are specified, they should not
+      contradict.
+   d) If we use tunnel routes, gateway could be not on-link.
+
+   Attempt to reconcile all of these (alas, self-contradictory) conditions
+   results in pretty ugly and hairy code with obscure logic.
+
+   I choosed to generalized it instead, so that the size
+   of code does not increase practically, but it becomes
+   much more general.
+   Every prefix is assigned a "scope" value: "host" is local address,
+   "link" is direct route,
+   [ ... "site" ... "interior" ... ]
+   and "universe" is true gateway route with global meaning.
+
+   Every prefix refers to a set of "nexthop"s (gw, oif),
+   where gw must have narrower scope. This recursion stops
+   when gw has LOCAL scope or if "nexthop" is declared ONLINK,
+   which means that gw is forced to be on link.
+
+   Code is still hairy, but now it is apparently logically
+   consistent and very flexible. F.e. as by-product it allows
+   to co-exists in peace independent exterior and interior
+   routing processes.
+
+   Normally it looks as following.
+
+   {universe prefix}  -> (gw, oif) [scope link]
+                          |
+			  |-> {link prefix} -> (gw, oif) [scope local]
+			                        |
+						|-> {local prefix} (terminal node)
+ */
+
+static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_nh *nh)
+{
+	int err;
+
+	if (nh->nh_gw) {
+		struct rt_key key;
+		struct fib_result res;
+
+#ifdef CONFIG_IP_ROUTE_PERVASIVE
+		if (nh->nh_flags&RTNH_F_PERVASIVE)
+			return 0;
+#endif
+		if (nh->nh_flags&RTNH_F_ONLINK) {
+			struct device *dev;
+
+			if (r->rtm_scope >= RT_SCOPE_LINK)
+				return -EINVAL;
+			if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
+				return -EINVAL;
+			if ((dev = dev_get_by_index(nh->nh_oif)) == NULL)
+				return -ENODEV;
+			if (!(dev->flags&IFF_UP))
+				return -ENETDOWN;
+			nh->nh_dev = dev;
+			nh->nh_scope = RT_SCOPE_LINK;
+			return 0;
+		}
+		memset(&key, 0, sizeof(key));
+		key.dst = nh->nh_gw;
+		key.oif = nh->nh_oif;
+		key.scope = r->rtm_scope + 1;
+
+		/* It is not necessary, but requires a bit of thinking */
+		if (key.scope < RT_SCOPE_LINK)
+			key.scope = RT_SCOPE_LINK;
+
+		if ((err = fib_lookup(&key, &res)) != 0)
+			return err;
+		nh->nh_scope = res.scope;
+		nh->nh_oif = FIB_RES_OIF(res);
+		nh->nh_dev = FIB_RES_DEV(res);
+	} else {
+		struct in_device *in_dev;
+
+		if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
+			return -EINVAL;
+
+		in_dev = inetdev_by_index(nh->nh_oif);
+		if (in_dev == NULL)
+			return -ENODEV;
+		if (!(in_dev->dev->flags&IFF_UP))
+			return -ENETDOWN;
+		nh->nh_dev = in_dev->dev;
+		nh->nh_scope = RT_SCOPE_HOST;
+	}
+	return 0;
+}
+
+struct fib_info *
+fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
+		const struct nlmsghdr *nlh, int *errp)
+{
+	int err;
+	struct fib_info *fi = NULL;
+	struct fib_info *ofi;
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+	int nhs = 1;
+#else
+	const int nhs = 1;
+#endif
+
+	/* Fast check to catch the most weird cases */
+	if (fib_props[r->rtm_type].scope > r->rtm_scope)
+		goto err_inval;
+
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+	if (rta->rta_mp) {
+		nhs = fib_count_nexthops(rta->rta_mp);
+		if (nhs == 0)
+			goto err_inval;
+	}
+#endif
+
+	fi = kmalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
+	err = -ENOBUFS;
+	if (fi == NULL)
+		goto failure;
+	memset(fi, 0, sizeof(*fi)+nhs*sizeof(struct fib_nh));
+
+	fi->fib_protocol = r->rtm_protocol;
+	fi->fib_nhs = nhs;
+	fi->fib_flags = r->rtm_flags;
+	if (rta->rta_priority)
+		fi->fib_priority = *rta->rta_priority;
+	if (rta->rta_mx) {
+		int attrlen = RTA_PAYLOAD(rta->rta_mx);
+		struct rtattr *attr = RTA_DATA(rta->rta_mx);
+
+		while (RTA_OK(attr, attrlen)) {
+			unsigned flavor = attr->rta_type;
+			if (flavor) {
+				if (flavor > FIB_MAX_METRICS)
+					goto err_inval;
+				fi->fib_metrics[flavor-1] = *(unsigned*)RTA_DATA(attr);
+			}
+			attr = RTA_NEXT(attr, attrlen);
+		}
+	}
+	if (rta->rta_prefsrc)
+		memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 4);
+
+	if (rta->rta_mp) {
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+		if ((err = fib_get_nhs(fi, rta->rta_mp, r)) != 0)
+			goto failure;
+		if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif)
+			goto err_inval;
+		if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 4))
+			goto err_inval;
+#ifdef CONFIG_NET_CLS_ROUTE
+		if (rta->rta_flow && memcmp(&fi->fib_nh->nh_tclassid, rta->rta_flow, 4))
+			goto err_inval;
+#endif
+#else
+		goto err_inval;
+#endif
+	} else {
+		struct fib_nh *nh = fi->fib_nh;
+		if (rta->rta_oif)
+			nh->nh_oif = *rta->rta_oif;
+		if (rta->rta_gw)
+			memcpy(&nh->nh_gw, rta->rta_gw, 4);
+#ifdef CONFIG_NET_CLS_ROUTE
+		if (rta->rta_flow)
+			memcpy(&nh->nh_tclassid, rta->rta_flow, 4);
+#endif
+		nh->nh_flags = r->rtm_flags;
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+		nh->nh_weight = 1;
+#endif
+	}
+
+#ifdef CONFIG_IP_ROUTE_NAT
+	if (r->rtm_type == RTN_NAT) {
+		if (rta->rta_gw == NULL || nhs != 1 || rta->rta_oif)
+			goto err_inval;
+		memcpy(&fi->fib_nh->nh_gw, rta->rta_gw, 4);
+		goto link_it;
+	}
+#endif
+
+	if (fib_props[r->rtm_type].error) {
+		if (rta->rta_gw || rta->rta_oif || rta->rta_mp)
+			goto err_inval;
+		goto link_it;
+	}
+
+	if (r->rtm_scope > RT_SCOPE_HOST)
+		goto err_inval;
+
+	if (r->rtm_scope == RT_SCOPE_HOST) {
+		struct fib_nh *nh = fi->fib_nh;
+
+		/* Local address is added. */
+		if (nhs != 1 || nh->nh_gw)
+			goto err_inval;
+		nh->nh_scope = RT_SCOPE_NOWHERE;
+		nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif);
+		err = -ENODEV;
+		if (nh->nh_dev == NULL)
+			goto failure;
+	} else {
+		change_nexthops(fi) {
+			if ((err = fib_check_nh(r, fi, nh)) != 0)
+				goto failure;
+		} endfor_nexthops(fi)
+	}
+
+	if (fi->fib_prefsrc) {
+		if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL ||
+		    memcmp(&fi->fib_prefsrc, rta->rta_dst, 4))
+			if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
+				goto err_inval;
+	}
+
+link_it:
+	if ((ofi = fib_find_info(fi)) != NULL) {
+		kfree(fi);
+		ofi->fib_refcnt++;
+		return ofi;
+	}
+
+	fi->fib_refcnt++;
+	fi->fib_next = fib_info_list;
+	fi->fib_prev = NULL;
+	if (fib_info_list)
+		fib_info_list->fib_prev = fi;
+	fib_info_list = fi;
+	return fi;
+
+err_inval:
+	err = -EINVAL;
+
+failure:
+        *errp = err;
+        if (fi)
+		kfree(fi);
+	return NULL;
+}
+
+int 
+fib_semantic_match(int type, struct fib_info *fi, const struct rt_key *key, struct fib_result *res)
+{
+	int err = fib_props[type].error;
+
+	if (err == 0) {
+		if (fi->fib_flags&RTNH_F_DEAD)
+			return 1;
+
+		res->fi = fi;
+
+		switch (type) {
+#ifdef CONFIG_IP_ROUTE_NAT
+		case RTN_NAT:
+			FIB_RES_RESET(*res);
+			return 0;
+#endif
+		case RTN_UNICAST:
+		case RTN_LOCAL:
+		case RTN_BROADCAST:
+		case RTN_ANYCAST:
+		case RTN_MULTICAST:
+			for_nexthops(fi) {
+				if (nh->nh_flags&RTNH_F_DEAD)
+					continue;
+				if (!key->oif || key->oif == nh->nh_oif)
+					break;
+			}
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+			if (nhsel < fi->fib_nhs) {
+				res->nh_sel = nhsel;
+				return 0;
+			}
+#else
+			if (nhsel < 1)
+				return 0;
+#endif
+			endfor_nexthops(fi);
+			return 1;
+		default:
+			printk(KERN_DEBUG "impossible 102\n");
+			return -EINVAL;
+		}
+	}
+	return err;
+}
+
+/* Find appropriate source address to this destination */
+
+u32 __fib_res_prefsrc(struct fib_result *res)
+{
+	return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
+}
+
+#ifdef CONFIG_RTNETLINK
+
+int
+fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
+	      u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos,
+	      struct fib_info *fi)
+{
+	struct rtmsg *rtm;
+	struct nlmsghdr  *nlh;
+	unsigned char	 *b = skb->tail;
+
+	nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*rtm));
+	rtm = NLMSG_DATA(nlh);
+	rtm->rtm_family = AF_INET;
+	rtm->rtm_dst_len = dst_len;
+	rtm->rtm_src_len = 0;
+	rtm->rtm_tos = tos;
+	rtm->rtm_table = tb_id;
+	rtm->rtm_type = type;
+	rtm->rtm_flags = fi->fib_flags;
+	rtm->rtm_scope = scope;
+	if (rtm->rtm_dst_len)
+		RTA_PUT(skb, RTA_DST, 4, dst);
+	rtm->rtm_protocol = fi->fib_protocol;
+	if (fi->fib_priority)
+		RTA_PUT(skb, RTA_PRIORITY, 4, &fi->fib_priority);
+#ifdef CONFIG_NET_CLS_ROUTE
+	if (fi->fib_nh[0].nh_tclassid)
+		RTA_PUT(skb, RTA_FLOW, 4, &fi->fib_nh[0].nh_tclassid);
+#endif
+	if (fi->fib_mtu || fi->fib_window || fi->fib_rtt) {
+		int i;
+		struct rtattr *mx = (struct rtattr *)skb->tail;
+		RTA_PUT(skb, RTA_METRICS, 0, NULL);
+		for (i=0; i<FIB_MAX_METRICS; i++) {
+			if (fi->fib_metrics[i])
+				RTA_PUT(skb, i+1, sizeof(unsigned), fi->fib_metrics + i);
+		}
+		mx->rta_len = skb->tail - (u8*)mx;
+	}
+	if (fi->fib_prefsrc)
+		RTA_PUT(skb, RTA_PREFSRC, 4, &fi->fib_prefsrc);
+	if (fi->fib_nhs == 1) {
+		if (fi->fib_nh->nh_gw)
+			RTA_PUT(skb, RTA_GATEWAY, 4, &fi->fib_nh->nh_gw);
+		if (fi->fib_nh->nh_oif)
+			RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif);
+	}
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+	if (fi->fib_nhs > 1) {
+		struct rtnexthop *nhp;
+		struct rtattr *mp_head;
+		if (skb_tailroom(skb) <= RTA_SPACE(0))
+			goto rtattr_failure;
+		mp_head = (struct rtattr*)skb_put(skb, RTA_SPACE(0));
+
+		for_nexthops(fi) {
+			if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
+				goto rtattr_failure;
+			nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
+			nhp->rtnh_flags = nh->nh_flags & 0xFF;
+			nhp->rtnh_hops = nh->nh_weight-1;
+			nhp->rtnh_ifindex = nh->nh_oif;
+			if (nh->nh_gw)
+				RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw);
+			nhp->rtnh_len = skb->tail - (unsigned char*)nhp;
+		} endfor_nexthops(fi);
+		mp_head->rta_type = RTA_MULTIPATH;
+		mp_head->rta_len = skb->tail - (u8*)mp_head;
+	}
+#endif
+	nlh->nlmsg_len = skb->tail - b;
+	return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+#endif /* CONFIG_RTNETLINK */
+
+#ifndef CONFIG_IP_NOSIOCRT
+
+int
+fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
+		    struct kern_rta *rta, struct rtentry *r)
+{
+	int    plen;
+	u32    *ptr;
+
+	memset(rtm, 0, sizeof(*rtm));
+	memset(rta, 0, sizeof(*rta));
+
+	if (r->rt_dst.sa_family != AF_INET)
+		return -EAFNOSUPPORT;
+
+	/* Check mask for validity:
+	   a) it must be contiguous.
+	   b) destination must have all host bits clear.
+	   c) if application forgot to set correct family (AF_INET),
+	      reject request unless it is absolutely clear i.e.
+	      both family and mask are zero.
+	 */
+	plen = 32;
+	ptr = &((struct sockaddr_in*)&r->rt_dst)->sin_addr.s_addr;
+	if (!(r->rt_flags&RTF_HOST)) {
+		u32 mask = ((struct sockaddr_in*)&r->rt_genmask)->sin_addr.s_addr;
+		if (r->rt_genmask.sa_family != AF_INET) {
+			if (mask || r->rt_genmask.sa_family)
+				return -EAFNOSUPPORT;
+		}
+		if (bad_mask(mask, *ptr))
+			return -EINVAL;
+		plen = inet_mask_len(mask);
+	}
+
+	nl->nlmsg_flags = NLM_F_REQUEST;
+	nl->nlmsg_pid = 0;
+	nl->nlmsg_seq = 0;
+	nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm));
+	if (cmd == SIOCDELRT) {
+		nl->nlmsg_type = RTM_DELROUTE;
+		nl->nlmsg_flags = 0;
+	} else {
+		nl->nlmsg_type = RTM_NEWROUTE;
+		nl->nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE;
+		rtm->rtm_protocol = RTPROT_BOOT;
+	}
+
+	rtm->rtm_dst_len = plen;
+	rta->rta_dst = ptr;
+
+	if (r->rt_metric) {
+		*(u32*)&r->rt_pad3 = r->rt_metric - 1;
+		rta->rta_priority = (u32*)&r->rt_pad3;
+	}
+	if (r->rt_flags&RTF_REJECT) {
+		rtm->rtm_scope = RT_SCOPE_HOST;
+		rtm->rtm_type = RTN_UNREACHABLE;
+		return 0;
+	}
+	rtm->rtm_scope = RT_SCOPE_NOWHERE;
+	rtm->rtm_type = RTN_UNICAST;
+
+	if (r->rt_dev) {
+#ifdef CONFIG_IP_ALIAS
+		char *colon;
+#endif
+		struct device *dev;
+		char   devname[IFNAMSIZ];
+
+		if (copy_from_user(devname, r->rt_dev, IFNAMSIZ-1))
+			return -EFAULT;
+		devname[IFNAMSIZ-1] = 0;
+#ifdef CONFIG_IP_ALIAS
+		colon = strchr(devname, ':');
+		if (colon)
+			*colon = 0;
+#endif
+		dev = dev_get(devname);
+		if (!dev)
+			return -ENODEV;
+		rta->rta_oif = &dev->ifindex;
+#ifdef CONFIG_IP_ALIAS
+		if (colon) {
+			struct in_ifaddr *ifa;
+			struct in_device *in_dev = dev->ip_ptr;
+			if (!in_dev)
+				return -ENODEV;
+			*colon = ':';
+			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
+				if (strcmp(ifa->ifa_label, devname) == 0)
+					break;
+			if (ifa == NULL)
+				return -ENODEV;
+			rta->rta_prefsrc = &ifa->ifa_local;
+		}
+#endif
+	}
+
+	ptr = &((struct sockaddr_in*)&r->rt_gateway)->sin_addr.s_addr;
+	if (r->rt_gateway.sa_family == AF_INET && *ptr) {
+		rta->rta_gw = ptr;
+		if (r->rt_flags&RTF_GATEWAY && inet_addr_type(*ptr) == RTN_UNICAST)
+			rtm->rtm_scope = RT_SCOPE_UNIVERSE;
+	}
+
+	if (cmd == SIOCDELRT)
+		return 0;
+
+	if (r->rt_flags&RTF_GATEWAY && rta->rta_gw == NULL)
+		return -EINVAL;
+
+	if (rtm->rtm_scope == RT_SCOPE_NOWHERE)
+		rtm->rtm_scope = RT_SCOPE_LINK;
+
+	if (r->rt_flags&(RTF_MTU|RTF_WINDOW|RTF_IRTT)) {
+		struct rtattr *rec;
+		struct rtattr *mx = kmalloc(RTA_LENGTH(3*RTA_LENGTH(4)), GFP_KERNEL);
+		if (mx == NULL)
+			return -ENOMEM;
+		rta->rta_mx = mx;
+		mx->rta_type = RTA_METRICS;
+		mx->rta_len  = RTA_LENGTH(0);
+		if (r->rt_flags&RTF_MTU) {
+			rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
+			rec->rta_type = RTAX_MTU;
+			rec->rta_len = RTA_LENGTH(4);
+			mx->rta_len += RTA_LENGTH(4);
+			*(u32*)RTA_DATA(rec) = r->rt_mtu;
+		}
+		if (r->rt_flags&RTF_WINDOW) {
+			rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
+			rec->rta_type = RTAX_WINDOW;
+			rec->rta_len = RTA_LENGTH(4);
+			mx->rta_len += RTA_LENGTH(4);
+			*(u32*)RTA_DATA(rec) = r->rt_window;
+		}
+		if (r->rt_flags&RTF_IRTT) {
+			rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
+			rec->rta_type = RTAX_RTT;
+			rec->rta_len = RTA_LENGTH(4);
+			mx->rta_len += RTA_LENGTH(4);
+			*(u32*)RTA_DATA(rec) = r->rt_irtt;
+		}
+	}
+	return 0;
+}
+
+#endif
+
+/*
+   Update FIB if:
+   - local address disappeared -> we must delete all the entries
+     referring to it.
+   - device went down -> we must shutdown all nexthops going via it.
+ */
+
+int fib_sync_down(u32 local, struct device *dev, int force)
+{
+	int ret = 0;
+	int scope = RT_SCOPE_NOWHERE;
+	
+	if (force)
+		scope = -1;
+
+	for_fib_info() {
+		if (local && fi->fib_prefsrc == local) {
+			fi->fib_flags |= RTNH_F_DEAD;
+			ret++;
+		} else if (dev && fi->fib_nhs) {
+			int dead = 0;
+
+			change_nexthops(fi) {
+				if (nh->nh_flags&RTNH_F_DEAD)
+					dead++;
+				else if (nh->nh_dev == dev &&
+					 nh->nh_scope != scope) {
+					nh->nh_flags |= RTNH_F_DEAD;
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+					fi->fib_power -= nh->nh_power;
+					nh->nh_power = 0;
+#endif
+					dead++;
+				}
+			} endfor_nexthops(fi)
+			if (dead == fi->fib_nhs) {
+				fi->fib_flags |= RTNH_F_DEAD;
+				ret++;
+			}
+		}
+	} endfor_fib_info();
+	return ret;
+}
+
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+
+/*
+   Dead device goes up. We wake up dead nexthops.
+   It takes sense only on multipath routes.
+ */
+
+int fib_sync_up(struct device *dev)
+{
+	int ret = 0;
+
+	if (!(dev->flags&IFF_UP))
+		return 0;
+
+	for_fib_info() {
+		int alive = 0;
+
+		change_nexthops(fi) {
+			if (!(nh->nh_flags&RTNH_F_DEAD)) {
+				alive++;
+				continue;
+			}
+			if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
+				continue;
+			if (nh->nh_dev != dev || dev->ip_ptr == NULL)
+				continue;
+			alive++;
+			nh->nh_power = 0;
+			nh->nh_flags &= ~RTNH_F_DEAD;
+		} endfor_nexthops(fi)
+
+		if (alive == fi->fib_nhs) {
+			fi->fib_flags &= ~RTNH_F_DEAD;
+			ret++;
+		}
+	} endfor_fib_info();
+	return ret;
+}
+
+/*
+   The algorithm is suboptimal, but it provides really
+   fair weighted route distribution.
+ */
+
+void fib_select_multipath(const struct rt_key *key, struct fib_result *res)
+{
+	struct fib_info *fi = res->fi;
+	int w;
+
+	if (fi->fib_power <= 0) {
+		int power = 0;
+		change_nexthops(fi) {
+			if (!(nh->nh_flags&RTNH_F_DEAD)) {
+				power += nh->nh_weight;
+				nh->nh_power = nh->nh_weight;
+			}
+		} endfor_nexthops(fi);
+		fi->fib_power = power;
+#if 1
+		if (power <= 0) {
+			printk(KERN_CRIT "impossible 777\n");
+			return;
+		}
+#endif
+	}
+
+
+	/* w should be random number [0..fi->fib_power-1],
+	   it is pretty bad approximation.
+	 */
+
+	w = jiffies % fi->fib_power;
+
+	change_nexthops(fi) {
+		if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
+			if ((w -= nh->nh_power) <= 0) {
+				nh->nh_power--;
+				fi->fib_power--;
+				res->nh_sel = nhsel;
+				return;
+			}
+		}
+	} endfor_nexthops(fi);
+
+#if 1
+	printk(KERN_CRIT "impossible 888\n");
+#endif
+	return;
+}
+#endif
+
+
+#ifdef CONFIG_PROC_FS
+
+static unsigned fib_flag_trans(int type, int dead, u32 mask, struct fib_info *fi)
+{
+	static unsigned type2flags[RTN_MAX+1] = {
+		0, 0, 0, 0, 0, 0, 0, RTF_REJECT, RTF_REJECT, 0, 0, 0
+	};
+	unsigned flags = type2flags[type];
+
+	if (fi && fi->fib_nh->nh_gw)
+		flags |= RTF_GATEWAY;
+	if (mask == 0xFFFFFFFF)
+		flags |= RTF_HOST;
+	if (!dead)
+		flags |= RTF_UP;
+	return flags;
+}
+
+void fib_node_get_info(int type, int dead, struct fib_info *fi, u32 prefix, u32 mask, char *buffer)
+{
+	int len;
+	unsigned flags = fib_flag_trans(type, dead, mask, fi);
+
+	if (fi) {
+		len = sprintf(buffer, "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
+			      fi->fib_dev ? fi->fib_dev->name : "*", prefix,
+			      fi->fib_nh->nh_gw, flags, 0, 0, fi->fib_priority,
+			      mask, fi->fib_mtu, fi->fib_window, fi->fib_rtt);
+	} else {
+		len = sprintf(buffer, "*\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
+			      prefix, 0,
+			      flags, 0, 0, 0,
+			      mask, 0, 0, 0);
+	}
+	memset(buffer+len, ' ', 127-len);
+	buffer[127] = '\n';
+}
+
+#endif
diff --git a/pfinet/linux-src/net/ipv4/icmp.c b/pfinet/linux-src/net/ipv4/icmp.c
new file mode 100644
index 00000000..34b48a93
--- /dev/null
+++ b/pfinet/linux-src/net/ipv4/icmp.c
@@ -0,0 +1,1155 @@
+/*
+ *	NET3:	Implementation of the ICMP protocol layer. 
+ *	
+ *		Alan Cox, <alan@redhat.com>
+ *
+ *	Version: $Id: icmp.c,v 1.52.2.2 1999/06/20 21:27:39 davem Exp $
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *
+ *	Some of the function names and the icmp unreach table for this
+ *	module were derived from [icmp.c 1.0.11 06/02/93] by
+ *	Ross Biro, Fred N. van Kempen, Mark Evans, Alan Cox, Gerhard Koerting.
+ *	Other than that this module is a complete rewrite.
+ *
+ *	Fixes:
+ *		Mike Shaver	:	RFC1122 checks.
+ *		Alan Cox	:	Multicast ping reply as self.
+ *		Alan Cox	:	Fix atomicity lockup in ip_build_xmit 
+ *					call.
+ *		Alan Cox	:	Added 216,128 byte paths to the MTU 
+ *					code.
+ *		Martin Mares	:	RFC1812 checks.
+ *		Martin Mares	:	Can be configured to follow redirects 
+ *					if acting as a router _without_ a
+ *					routing protocol (RFC 1812).
+ *		Martin Mares	:	Echo requests may be configured to 
+ *					be ignored (RFC 1812).
+ *		Martin Mares	:	Limitation of ICMP error message 
+ *					transmit rate (RFC 1812).
+ *		Martin Mares	:	TOS and Precedence set correctly 
+ *					(RFC 1812).
+ *		Martin Mares	:	Now copying as much data from the 
+ *					original packet as we can without
+ *					exceeding 576 bytes (RFC 1812).
+ *	Willy Konynenberg	:	Transparent proxying support.
+ *		Keith Owens	:	RFC1191 correction for 4.2BSD based 
+ *					path MTU bug.
+ *		Thomas Quinot	:	ICMP Dest Unreach codes up to 15 are
+ *					valid (RFC 1812).
+ *		Andi Kleen	:	Check all packet lengths properly
+ *					and moved all kfree_skb() up to
+ *					icmp_rcv.
+ *		Andi Kleen	:	Move the rate limit bookkeeping
+ *					into the dest entry and use a token
+ *					bucket filter (thanks to ANK). Make
+ *					the rates sysctl configurable.
+ *		Yu Tianli	:	Fixed two ugly bugs in icmp_send
+ *					- IP option length was accounted wrongly
+ *					- ICMP header length was not accounted at all.
+ *              Tristan Greaves :       Added sysctl option to ignore bogus broadcast
+ *                                      responses from broken routers.
+ *
+ * To Fix:
+ *
+ *	- Should use skb_pull() instead of all the manual checking.
+ *	  This would also greatly simply some upper layer error handlers. --AK
+ *
+ * RFC1122 (Host Requirements -- Comm. Layer) Status:
+ * (boy, are there a lot of rules for ICMP)
+ *  3.2.2 (Generic ICMP stuff)
+ *   MUST discard messages of unknown type. (OK)
+ *   MUST copy at least the first 8 bytes from the offending packet
+ *     when sending ICMP errors. (OBSOLETE -- see RFC1812)
+ *   MUST pass received ICMP errors up to protocol level. (OK)
+ *   SHOULD send ICMP errors with TOS == 0. (OBSOLETE -- see RFC1812)
+ *   MUST NOT send ICMP errors in reply to:
+ *     ICMP errors (OK)
+ *     Broadcast/multicast datagrams (OK)
+ *     MAC broadcasts (OK)
+ *     Non-initial fragments (OK)
+ *     Datagram with a source address that isn't a single host. (OK)
+ *  3.2.2.1 (Destination Unreachable)
+ *   All the rules govern the IP layer, and are dealt with in ip.c, not here.
+ *  3.2.2.2 (Redirect)
+ *   Host SHOULD NOT send ICMP_REDIRECTs.  (OK)
+ *   MUST update routing table in response to host or network redirects.
+ *     (host OK, network OBSOLETE)
+ *   SHOULD drop redirects if they're not from directly connected gateway
+ *     (OK -- we drop it if it's not from our old gateway, which is close
+ *      enough)
+ * 3.2.2.3 (Source Quench)
+ *   MUST pass incoming SOURCE_QUENCHs to transport layer (OK)
+ *   Other requirements are dealt with at the transport layer.
+ * 3.2.2.4 (Time Exceeded)
+ *   MUST pass TIME_EXCEEDED to transport layer (OK)
+ *   Other requirements dealt with at IP (generating TIME_EXCEEDED).
+ * 3.2.2.5 (Parameter Problem)
+ *   SHOULD generate these (OK)
+ *   MUST pass received PARAMPROBLEM to transport layer (NOT YET)
+ *   	[Solaris 2.X seems to assert EPROTO when this occurs] -- AC
+ * 3.2.2.6 (Echo Request/Reply)
+ *   MUST reply to ECHO_REQUEST, and give app to do ECHO stuff (OK, OK)
+ *   MAY discard broadcast ECHO_REQUESTs. (Configurable with a sysctl.)
+ *   MUST reply using same source address as the request was sent to.
+ *     We're OK for unicast ECHOs, and it doesn't say anything about
+ *     how to handle broadcast ones, since it's optional.
+ *   MUST copy data from REQUEST to REPLY (OK)
+ *     unless it would require illegal fragmentation (OK)
+ *   MUST pass REPLYs to transport/user layer (OK)
+ *   MUST use any provided source route (reversed) for REPLY. (NOT YET)
+ * 3.2.2.7 (Information Request/Reply)
+ *   MUST NOT implement this. (I guess that means silently discard...?) (OK)
+ * 3.2.2.8 (Timestamp Request/Reply)
+ *   MAY implement (OK)
+ *   SHOULD be in-kernel for "minimum variability" (OK)
+ *   MAY discard broadcast REQUESTs.  (OK, but see source for inconsistency)
+ *   MUST reply using same source address as the request was sent to. (OK)
+ *   MUST reverse source route, as per ECHO (NOT YET)
+ *   MUST pass REPLYs to transport/user layer (requires RAW, just like 
+ *	ECHO) (OK)
+ *   MUST update clock for timestamp at least 15 times/sec (OK)
+ *   MUST be "correct within a few minutes" (OK)
+ * 3.2.2.9 (Address Mask Request/Reply)
+ *   MAY implement (OK)
+ *   MUST send a broadcast REQUEST if using this system to set netmask
+ *     (OK... we don't use it)
+ *   MUST discard received REPLYs if not using this system (OK)
+ *   MUST NOT send replies unless specifically made agent for this sort
+ *     of thing. (OK)
+ *
+ *
+ * RFC 1812 (IPv4 Router Requirements) Status (even longer):
+ *  4.3.2.1 (Unknown Message Types)
+ *   MUST pass messages of unknown type to ICMP user iface or silently discard
+ *     them (OK)
+ *  4.3.2.2 (ICMP Message TTL)
+ *   MUST initialize TTL when originating an ICMP message (OK)
+ *  4.3.2.3 (Original Message Header)
+ *   SHOULD copy as much data from the offending packet as possible without
+ *     the length of the ICMP datagram exceeding 576 bytes (OK)
+ *   MUST leave original IP header of the offending packet, but we're not
+ *     required to undo modifications made (OK)
+ *  4.3.2.4 (Original Message Source Address)
+ *   MUST use one of addresses for the interface the orig. packet arrived as
+ *     source address (OK)
+ *  4.3.2.5 (TOS and Precedence)
+ *   SHOULD leave TOS set to the same value unless the packet would be 
+ *     discarded for that reason (OK)
+ *   MUST use TOS=0 if not possible to leave original value (OK)
+ *   MUST leave IP Precedence for Source Quench messages (OK -- not sent 
+ *	at all)
+ *   SHOULD use IP Precedence = 6 (Internetwork Control) or 7 (Network Control)
+ *     for all other error messages (OK, we use 6)
+ *   MAY allow configuration of IP Precedence (OK -- not done)
+ *   MUST leave IP Precedence and TOS for reply messages (OK)
+ *  4.3.2.6 (Source Route)
+ *   SHOULD use reverse source route UNLESS sending Parameter Problem on source
+ *     routing and UNLESS the packet would be immediately discarded (NOT YET)
+ *  4.3.2.7 (When Not to Send ICMP Errors)
+ *   MUST NOT send ICMP errors in reply to:
+ *     ICMP errors (OK)
+ *     Packets failing IP header validation tests unless otherwise noted (OK)
+ *     Broadcast/multicast datagrams (OK)
+ *     MAC broadcasts (OK)
+ *     Non-initial fragments (OK)
+ *     Datagram with a source address that isn't a single host. (OK)
+ *  4.3.2.8 (Rate Limiting)
+ *   SHOULD be able to limit error message rate (OK)
+ *   SHOULD allow setting of rate limits (OK, in the source)
+ *  4.3.3.1 (Destination Unreachable)
+ *   All the rules govern the IP layer, and are dealt with in ip.c, not here.
+ *  4.3.3.2 (Redirect)
+ *   MAY ignore ICMP Redirects if running a routing protocol or if forwarding
+ *     is enabled on the interface (OK -- ignores)
+ *  4.3.3.3 (Source Quench)
+ *   SHOULD NOT originate SQ messages (OK)
+ *   MUST be able to limit SQ rate if originates them (OK as we don't 
+ *	send them)
+ *   MAY ignore SQ messages it receives (OK -- we don't)
+ *  4.3.3.4 (Time Exceeded)
+ *   Requirements dealt with at IP (generating TIME_EXCEEDED).
+ *  4.3.3.5 (Parameter Problem)
+ *   MUST generate these for all errors not covered by other messages (OK)
+ *   MUST include original value of the value pointed by (OK)
+ *  4.3.3.6 (Echo Request)
+ *   MUST implement echo server function (OK)
+ *   MUST process at ER of at least max(576, MTU) (OK)
+ *   MAY reject broadcast/multicast ER's (We don't, but that's OK)
+ *   SHOULD have a config option for silently ignoring ER's (OK)
+ *   MUST have a default value for the above switch = NO (OK)
+ *   MUST have application layer interface for Echo Request/Reply (OK)
+ *   MUST reply using same source address as the request was sent to.
+ *     We're OK for unicast ECHOs, and it doesn't say anything about
+ *     how to handle broadcast ones, since it's optional.
+ *   MUST copy data from Request to Reply (OK)
+ *   SHOULD update Record Route / Timestamp options (??)
+ *   MUST use reversed Source Route for Reply if possible (NOT YET)
+ *  4.3.3.7 (Information Request/Reply)
+ *   SHOULD NOT originate or respond to these (OK)
+ *  4.3.3.8 (Timestamp / Timestamp Reply)
+ *   MAY implement (OK)
+ *   MUST reply to every Timestamp message received (OK)
+ *   MAY discard broadcast REQUESTs.  (OK, but see source for inconsistency)
+ *   MUST reply using same source address as the request was sent to. (OK)
+ *   MUST use reversed Source Route if possible (NOT YET)
+ *   SHOULD update Record Route / Timestamp options (??)
+ *   MUST pass REPLYs to transport/user layer (requires RAW, just like 
+ *	ECHO) (OK)
+ *   MUST update clock for timestamp at least 16 times/sec (OK)
+ *   MUST be "correct within a few minutes" (OK)
+ * 4.3.3.9 (Address Mask Request/Reply)
+ *   MUST have support for receiving AMRq and responding with AMRe (OK, 
+ *	but only as a compile-time option)
+ *   SHOULD have option for each interface for AMRe's, MUST default to 
+ *	NO (NOT YET)
+ *   MUST NOT reply to AMRq before knows the correct AM (OK)
+ *   MUST NOT respond to AMRq with source address 0.0.0.0 on physical
+ *    	interfaces having multiple logical i-faces with different masks
+ *	(NOT YET)
+ *   SHOULD examine all AMRe's it receives and check them (NOT YET)
+ *   SHOULD log invalid AMRe's (AM+sender) (NOT YET)
+ *   MUST NOT use contents of AMRe to determine correct AM (OK)
+ *   MAY broadcast AMRe's after having configured address masks (OK -- doesn't)
+ *   MUST NOT do broadcast AMRe's if not set by extra option (OK, no option)
+ *   MUST use the { <NetPrefix>, -1 } form of broadcast addresses (OK)
+ * 4.3.3.10 (Router Advertisement and Solicitations)
+ *   MUST support router part of Router Discovery Protocol on all networks we
+ *     support broadcast or multicast addressing. (OK -- done by gated)
+ *   MUST have all config parameters with the respective defaults (OK)
+ * 5.2.7.1 (Destination Unreachable)
+ *   MUST generate DU's (OK)
+ *   SHOULD choose a best-match response code (OK)
+ *   SHOULD NOT generate Host Isolated codes (OK)
+ *   SHOULD use Communication Administratively Prohibited when administratively
+ *     filtering packets (NOT YET -- bug-to-bug compatibility)
+ *   MAY include config option for not generating the above and silently
+ *	discard the packets instead (OK)
+ *   MAY include config option for not generating Precedence Violation and
+ *     Precedence Cutoff messages (OK as we don't generate them at all)
+ *   MUST use Host Unreachable or Dest. Host Unknown codes whenever other hosts
+ *     on the same network might be reachable (OK -- no net unreach's at all)
+ *   MUST use new form of Fragmentation Needed and DF Set messages (OK)
+ * 5.2.7.2 (Redirect)
+ *   MUST NOT generate network redirects (OK)
+ *   MUST be able to generate host redirects (OK)
+ *   SHOULD be able to generate Host+TOS redirects (NO as we don't use TOS)
+ *   MUST have an option to use Host redirects instead of Host+TOS ones (OK as
+ *     no Host+TOS Redirects are used)
+ *   MUST NOT generate redirects unless forwarding to the same i-face and the
+ *     dest. address is on the same subnet as the src. address and no source
+ *     routing is in use. (OK)
+ *   MUST NOT follow redirects when using a routing protocol (OK)
+ *   MAY use redirects if not using a routing protocol (OK, compile-time option)
+ *   MUST comply to Host Requirements when not acting as a router (OK)
+ *  5.2.7.3 (Time Exceeded)
+ *   MUST generate Time Exceeded Code 0 when discarding packet due to TTL=0 (OK)
+ *   MAY have a per-interface option to disable origination of TE messages, but
+ *     it MUST default to "originate" (OK -- we don't support it)
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/fcntl.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/string.h>
+#include <net/snmp.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include <net/protocol.h>
+#include <net/icmp.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <net/raw.h>
+#include <net/snmp.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <linux/errno.h>
+#include <linux/timer.h>
+#include <linux/init.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <net/checksum.h>
+
+#ifdef CONFIG_IP_MASQUERADE
+#include <net/ip_masq.h>
+#endif
+
+#define min(a,b)	((a)<(b)?(a):(b))
+
+/*
+ *	Statistics
+ */
+ 
+struct icmp_mib icmp_statistics;
+
+/* An array of errno for error messages from dest unreach. */
+/* RFC 1122: 3.2.2.1 States that NET_UNREACH, HOS_UNREACH and SR_FAIELD MUST be considered 'transient errs'. */
+
+struct icmp_err icmp_err_convert[] = {
+  { ENETUNREACH,	0 },	/*	ICMP_NET_UNREACH	*/
+  { EHOSTUNREACH,	0 },	/*	ICMP_HOST_UNREACH	*/
+  { ENOPROTOOPT,	1 },	/*	ICMP_PROT_UNREACH	*/
+  { ECONNREFUSED,	1 },	/*	ICMP_PORT_UNREACH	*/
+  { EMSGSIZE,		0 },	/*	ICMP_FRAG_NEEDED	*/
+  { EOPNOTSUPP,		0 },	/*	ICMP_SR_FAILED		*/
+  { ENETUNREACH,	1 },	/* 	ICMP_NET_UNKNOWN	*/
+  { EHOSTDOWN,		1 },	/*	ICMP_HOST_UNKNOWN	*/
+  { ENONET,		1 },	/*	ICMP_HOST_ISOLATED	*/
+  { ENETUNREACH,	1 },	/*	ICMP_NET_ANO		*/
+  { EHOSTUNREACH,	1 },	/*	ICMP_HOST_ANO		*/
+  { ENETUNREACH,	0 },	/*	ICMP_NET_UNR_TOS	*/
+  { EHOSTUNREACH,	0 },	/*	ICMP_HOST_UNR_TOS	*/
+  { EHOSTUNREACH,	1 },	/*	ICMP_PKT_FILTERED	*/
+  { EHOSTUNREACH,	1 },	/*	ICMP_PREC_VIOLATION	*/
+  { EHOSTUNREACH,	1 }	/*	ICMP_PREC_CUTOFF	*/
+};
+
+/* Control parameters for ECHO relies. */
+int sysctl_icmp_echo_ignore_all = 0;
+int sysctl_icmp_echo_ignore_broadcasts = 0;
+
+/* Control parameter - ignore bogus broadcast responses? */
+int sysctl_icmp_ignore_bogus_error_responses =0;
+
+/*
+ *	ICMP control array. This specifies what to do with each ICMP.
+ */
+
+struct icmp_control
+{
+	unsigned long *output;		/* Address to increment on output */
+	unsigned long *input;		/* Address to increment on input */
+	void (*handler)(struct icmphdr *icmph, struct sk_buff *skb, int len);
+	short	error;		/* This ICMP is classed as an error message */
+	int *timeout; /* Rate limit */
+};
+
+static struct icmp_control icmp_pointers[NR_ICMP_TYPES+1];
+
+/*
+ *	Build xmit assembly blocks
+ */
+
+struct icmp_bxm
+{
+	void *data_ptr;
+	int data_len;
+	struct icmphdr icmph;
+	unsigned long csum;
+	struct ip_options replyopts;
+	unsigned char  optbuf[40];
+};
+
+/*
+ *	The ICMP socket. This is the most convenient way to flow control
+ *	our ICMP output as well as maintain a clean interface throughout
+ *	all layers. All Socketless IP sends will soon be gone.
+ */
+	
+struct inode icmp_inode;
+struct socket *icmp_socket=&icmp_inode.u.socket_i;
+
+/*
+ *	Send an ICMP frame.
+ */
+
+/*
+ *	Check transmit rate limitation for given message.
+ *	The rate information is held in the destination cache now.
+ *	This function is generic and could be used for other purposes
+ *	too. It uses a Token bucket filter as suggested by Alexey Kuznetsov.
+ *
+ *	Note that the same dst_entry fields are modified by functions in 
+ *	route.c too, but these work for packet destinations while xrlim_allow
+ *	works for icmp destinations. This means the rate limiting information
+ *	for one "ip object" is shared.
+ *
+ *	Note that the same dst_entry fields are modified by functions in 
+ *	route.c too, but these work for packet destinations while xrlim_allow
+ *	works for icmp destinations. This means the rate limiting information
+ *	for one "ip object" is shared - and these ICMPs are twice limited:
+ *	by source and by destination.
+ *
+ *	RFC 1812: 4.3.2.8 SHOULD be able to limit error message rate
+ *			  SHOULD allow setting of rate limits 
+ *
+ * 	Shared between ICMPv4 and ICMPv6.
+ */
+#define XRLIM_BURST_FACTOR 6
+int xrlim_allow(struct dst_entry *dst, int timeout)
+{
+	unsigned long now;
+
+	now = jiffies;
+	dst->rate_tokens += now - dst->rate_last;
+	dst->rate_last = now;
+	if (dst->rate_tokens > XRLIM_BURST_FACTOR*timeout)
+		dst->rate_tokens = XRLIM_BURST_FACTOR*timeout;
+	if (dst->rate_tokens >= timeout) {
+		dst->rate_tokens -= timeout;
+		return 1;
+	}
+	return 0; 
+}
+
+static inline int icmpv4_xrlim_allow(struct rtable *rt, int type, int code)
+{
+	struct dst_entry *dst = &rt->u.dst; 
+
+	if (type > NR_ICMP_TYPES || !icmp_pointers[type].timeout)
+		return 1;
+
+	/* Don't limit PMTU discovery. */
+	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)
+		return 1;
+
+	/* Redirect has its own rate limit mechanism */
+	if (type == ICMP_REDIRECT)
+		return 1;
+
+	/* No rate limit on loopback */
+	if (dst->dev && (dst->dev->flags&IFF_LOOPBACK))
+ 		return 1;
+
+	return xrlim_allow(dst, *(icmp_pointers[type].timeout));
+}
+
+/*
+ *	Maintain the counters used in the SNMP statistics for outgoing ICMP
+ */
+ 
+static void icmp_out_count(int type)
+{
+	if (type>NR_ICMP_TYPES)
+		return;
+	(*icmp_pointers[type].output)++;
+	icmp_statistics.IcmpOutMsgs++;
+}
+ 
+/*
+ *	Checksum each fragment, and on the first include the headers and final checksum.
+ */
+ 
+static int icmp_glue_bits(const void *p, char *to, unsigned int offset, unsigned int fraglen)
+{
+	struct icmp_bxm *icmp_param = (struct icmp_bxm *)p;
+	struct icmphdr *icmph;
+	unsigned long csum;
+
+	if (offset) {
+		icmp_param->csum=csum_partial_copy(icmp_param->data_ptr+offset-sizeof(struct icmphdr), 
+				to, fraglen,icmp_param->csum);
+		return 0;
+	}
+
+	/*
+	 *	First fragment includes header. Note that we've done
+	 *	the other fragments first, so that we get the checksum
+	 *	for the whole packet here.
+	 */
+	csum = csum_partial_copy((void *)&icmp_param->icmph,
+		to, sizeof(struct icmphdr), 
+		icmp_param->csum);
+	csum = csum_partial_copy(icmp_param->data_ptr,
+		to+sizeof(struct icmphdr),
+		fraglen-sizeof(struct icmphdr), csum);
+	icmph=(struct icmphdr *)to;
+	icmph->checksum = csum_fold(csum);
+	return 0;
+}
+ 
+/*
+ *	Driving logic for building and sending ICMP messages.
+ */
+
+static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
+{
+	struct sock *sk=icmp_socket->sk;
+	struct ipcm_cookie ipc;
+	struct rtable *rt = (struct rtable*)skb->dst;
+	u32 daddr;
+
+	if (ip_options_echo(&icmp_param->replyopts, skb))
+		return;
+
+	icmp_param->icmph.checksum=0;
+	icmp_param->csum=0;
+	icmp_out_count(icmp_param->icmph.type);
+
+	sk->ip_tos = skb->nh.iph->tos;
+	daddr = ipc.addr = rt->rt_src;
+	ipc.opt = &icmp_param->replyopts;
+	if (ipc.opt->srr)
+		daddr = icmp_param->replyopts.faddr;
+	if (ip_route_output(&rt, daddr, rt->rt_spec_dst, RT_TOS(skb->nh.iph->tos), 0))
+		return;
+	ip_build_xmit(sk, icmp_glue_bits, icmp_param, 
+		icmp_param->data_len+sizeof(struct icmphdr),
+		&ipc, rt, MSG_DONTWAIT);
+	ip_rt_put(rt);
+}
+
+
+/*
+ *	Send an ICMP message in response to a situation
+ *
+ *	RFC 1122: 3.2.2	MUST send at least the IP header and 8 bytes of header. MAY send more (we do).
+ *			MUST NOT change this header information.
+ *			MUST NOT reply to a multicast/broadcast IP address.
+ *			MUST NOT reply to a multicast/broadcast MAC address.
+ *			MUST reply to only the first fragment.
+ */
+
+void icmp_send(struct sk_buff *skb_in, int type, int code, unsigned long info)
+{
+	struct iphdr *iph;
+	struct icmphdr *icmph;
+	int room;
+	struct icmp_bxm icmp_param;
+	struct rtable *rt = (struct rtable*)skb_in->dst;
+	struct ipcm_cookie ipc;
+	u32 saddr;
+	u8  tos;
+	
+	/*
+	 *	Find the original header
+	 */
+	 
+	iph = skb_in->nh.iph;
+	
+	/*
+	 *	No replies to physical multicast/broadcast
+	 */
+	 
+	if (skb_in->pkt_type!=PACKET_HOST)
+		return;
+		
+	/*
+	 *	Now check at the protocol level
+	 */
+	if (!rt) {
+#ifndef CONFIG_IP_ALWAYS_DEFRAG
+		if (net_ratelimit())
+			printk(KERN_DEBUG "icmp_send: destinationless packet\n");
+#endif
+		return;
+	}
+	if (rt->rt_flags&(RTCF_BROADCAST|RTCF_MULTICAST))
+		return;
+	 
+		
+	/*
+	 *	Only reply to fragment 0. We byte re-order the constant
+	 *	mask for efficiency.
+	 */
+	 
+	if (iph->frag_off&htons(IP_OFFSET))
+		return;
+		
+	/* 
+	 *	If we send an ICMP error to an ICMP error a mess would result..
+	 */
+	 
+	if (icmp_pointers[type].error) {
+		/*
+		 *	We are an error, check if we are replying to an ICMP error
+		 */
+		 
+		if (iph->protocol==IPPROTO_ICMP) {
+			icmph = (struct icmphdr *)((char *)iph + (iph->ihl<<2));
+			/*
+			 *	Assume any unknown ICMP type is an error. This isn't
+			 *	specified by the RFC, but think about it..
+			 */
+			if (icmph->type>NR_ICMP_TYPES || icmp_pointers[icmph->type].error)
+				return;
+		}
+	}
+
+
+	/*
+	 *	Construct source address and options.
+	 */
+
+#ifdef CONFIG_IP_ROUTE_NAT	
+	/*
+	 *	Restore original addresses if packet has been translated.
+	 */
+	if (rt->rt_flags&RTCF_NAT && IPCB(skb_in)->flags&IPSKB_TRANSLATED) {
+		iph->daddr = rt->key.dst;
+		iph->saddr = rt->key.src;
+	}
+#endif
+#ifdef CONFIG_IP_MASQUERADE
+	if (type==ICMP_DEST_UNREACH && IPCB(skb_in)->flags&IPSKB_MASQUERADED) {
+			ip_fw_unmasq_icmp(skb_in);
+	}
+#endif
+
+	saddr = iph->daddr;
+	if (!(rt->rt_flags & RTCF_LOCAL))
+		saddr = 0;
+
+	tos = icmp_pointers[type].error ?
+		((iph->tos & IPTOS_TOS_MASK) | IPTOS_PREC_INTERNETCONTROL) :
+			iph->tos;
+
+	/* XXX: use a more aggressive expire for routes created by 
+	 * this call (not longer than the rate limit timeout). 
+	 * It could be also worthwhile to not put them into ipv4
+	 * fast routing cache at first. Otherwise an attacker can
+	 * grow the routing table.
+	 */
+	if (ip_route_output(&rt, iph->saddr, saddr, RT_TOS(tos), 0))
+		return;
+	
+	if (ip_options_echo(&icmp_param.replyopts, skb_in)) 
+		goto ende;
+
+
+	/*
+	 *	Prepare data for ICMP header.
+	 */
+
+	icmp_param.icmph.type=type;
+	icmp_param.icmph.code=code;
+	icmp_param.icmph.un.gateway = info;
+	icmp_param.icmph.checksum=0;
+	icmp_param.csum=0;
+	icmp_param.data_ptr=iph;
+	icmp_out_count(icmp_param.icmph.type);
+	icmp_socket->sk->ip_tos = tos;
+	ipc.addr = iph->saddr;
+	ipc.opt = &icmp_param.replyopts;
+	if (icmp_param.replyopts.srr) {
+		ip_rt_put(rt);
+		if (ip_route_output(&rt, icmp_param.replyopts.faddr, saddr, RT_TOS(tos), 0))
+			return;
+	}
+
+	if (!icmpv4_xrlim_allow(rt, type, code))
+		goto ende;
+
+	/* RFC says return as much as we can without exceeding 576 bytes. */
+
+	room = rt->u.dst.pmtu;
+	if (room > 576)
+		room = 576;
+	room -= sizeof(struct iphdr) + icmp_param.replyopts.optlen;
+	room -= sizeof(struct icmphdr);
+
+	icmp_param.data_len=(iph->ihl<<2)+skb_in->len;
+	if (icmp_param.data_len > room)
+		icmp_param.data_len = room;
+	
+	ip_build_xmit(icmp_socket->sk, icmp_glue_bits, &icmp_param, 
+		icmp_param.data_len+sizeof(struct icmphdr),
+		&ipc, rt, MSG_DONTWAIT);
+
+ende:
+	ip_rt_put(rt);
+}
+
+
+/* 
+ *	Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEED, and ICMP_QUENCH. 
+ */
+
+static void icmp_unreach(struct icmphdr *icmph, struct sk_buff *skb, int len)
+{
+	struct iphdr *iph;
+	int hash;
+	struct inet_protocol *ipprot;
+	unsigned char *dp;
+	struct sock *raw_sk;
+	
+	/*
+	 *	Incomplete header ?
+	 * 	Only checks for the IP header, there should be an
+	 *	additional check for longer headers in upper levels.
+	 */
+
+	if(len<sizeof(struct iphdr)) {
+		icmp_statistics.IcmpInErrors++;
+		return;
+	}
+		
+	iph = (struct iphdr *) (icmph + 1);
+	dp = (unsigned char*)iph;
+	
+	if(icmph->type==ICMP_DEST_UNREACH) {
+		switch(icmph->code & 15) {
+			case ICMP_NET_UNREACH:
+				break;
+			case ICMP_HOST_UNREACH:
+				break;
+			case ICMP_PROT_UNREACH:
+				break;
+			case ICMP_PORT_UNREACH:
+				break;
+			case ICMP_FRAG_NEEDED:
+				if (ipv4_config.no_pmtu_disc) {
+					if (net_ratelimit())
+						printk(KERN_INFO "ICMP: %d.%d.%d.%d: fragmentation needed and DF set.\n",
+					       NIPQUAD(iph->daddr));
+				} else {
+					unsigned short new_mtu;
+					new_mtu = ip_rt_frag_needed(iph, ntohs(icmph->un.frag.mtu));
+					if (!new_mtu) 
+						return;
+					icmph->un.frag.mtu = htons(new_mtu);
+				}
+				break;
+			case ICMP_SR_FAILED:
+				if (net_ratelimit())
+					printk(KERN_INFO "ICMP: %d.%d.%d.%d: Source Route Failed.\n", NIPQUAD(iph->daddr));
+				break;
+			default:
+				break;
+		}
+		if (icmph->code>NR_ICMP_UNREACH) 
+			return;
+	}
+	
+	/*
+	 *	Throw it at our lower layers
+	 *
+	 *	RFC 1122: 3.2.2 MUST extract the protocol ID from the passed header.
+	 *	RFC 1122: 3.2.2.1 MUST pass ICMP unreach messages to the transport layer.
+	 *	RFC 1122: 3.2.2.2 MUST pass ICMP time expired messages to transport layer.
+	 */
+	 
+	/*
+	 *	Check the other end isnt violating RFC 1122. Some routers send
+	 *	bogus responses to broadcast frames. If you see this message
+	 *	first check your netmask matches at both ends, if it does then
+	 *	get the other vendor to fix their kit.
+	 */
+
+	if (!sysctl_icmp_ignore_bogus_error_responses)
+	{
+	
+		if (inet_addr_type(iph->daddr) == RTN_BROADCAST)
+		{
+			if (net_ratelimit())
+				printk(KERN_WARNING "%d.%d.%d.%d sent an invalid ICMP error to a broadcast.\n",
+			       	NIPQUAD(skb->nh.iph->saddr));
+			return; 
+		}
+	}
+
+	/*
+	 *	Deliver ICMP message to raw sockets. Pretty useless feature?
+	 */
+
+	/* Note: See raw.c and net/raw.h, RAWV4_HTABLE_SIZE==MAX_INET_PROTOS */
+	hash = iph->protocol & (MAX_INET_PROTOS - 1);
+	if ((raw_sk = raw_v4_htable[hash]) != NULL) 
+	{
+		while ((raw_sk = raw_v4_lookup(raw_sk, iph->protocol, iph->saddr,
+					       iph->daddr, skb->dev->ifindex)) != NULL) {
+			raw_err(raw_sk, skb);
+			raw_sk = raw_sk->next;
+		}
+	}
+
+	/*
+	 *	This can't change while we are doing it. 
+	 */
+
+	ipprot = (struct inet_protocol *) inet_protos[hash];
+	while(ipprot != NULL) {
+		struct inet_protocol *nextip;
+
+		nextip = (struct inet_protocol *) ipprot->next;
+	
+		/* 
+		 *	Pass it off to everyone who wants it. 
+		 */
+
+		/* RFC1122: OK. Passes appropriate ICMP errors to the */
+		/* appropriate protocol layer (MUST), as per 3.2.2. */
+
+		if (iph->protocol == ipprot->protocol && ipprot->err_handler)
+ 			ipprot->err_handler(skb, dp, len);
+
+		ipprot = nextip;
+  	}
+}
+
+
+/*
+ *	Handle ICMP_REDIRECT. 
+ */
+
+static void icmp_redirect(struct icmphdr *icmph, struct sk_buff *skb, int len)
+{
+	struct iphdr *iph;
+	unsigned long ip;
+
+	if (len < sizeof(struct iphdr)) {
+		icmp_statistics.IcmpInErrors++;
+		return; 
+	}
+		
+	/*
+	 *	Get the copied header of the packet that caused the redirect
+	 */
+	 
+	iph = (struct iphdr *) (icmph + 1);
+	ip = iph->daddr;
+
+	switch(icmph->code & 7) {
+		case ICMP_REDIR_NET:
+		case ICMP_REDIR_NETTOS:
+			/*
+			 *	As per RFC recommendations now handle it as
+			 *	a host redirect.
+			 */
+			 
+		case ICMP_REDIR_HOST:
+		case ICMP_REDIR_HOSTTOS:
+			ip_rt_redirect(skb->nh.iph->saddr, ip, icmph->un.gateway, iph->saddr, iph->tos, skb->dev);
+			break;
+		default:
+			break;
+  	}
+}
+
+/*
+ *	Handle ICMP_ECHO ("ping") requests. 
+ *
+ *	RFC 1122: 3.2.2.6 MUST have an echo server that answers ICMP echo requests.
+ *	RFC 1122: 3.2.2.6 Data received in the ICMP_ECHO request MUST be included in the reply.
+ *	RFC 1812: 4.3.3.6 SHOULD have a config option for silently ignoring echo requests, MUST have default=NOT.
+ *	See also WRT handling of options once they are done and working.
+ */
+
+static void icmp_echo(struct icmphdr *icmph, struct sk_buff *skb, int len)
+{
+	if (!sysctl_icmp_echo_ignore_all) {
+		struct icmp_bxm icmp_param;
+
+		icmp_param.icmph=*icmph;
+		icmp_param.icmph.type=ICMP_ECHOREPLY;
+		icmp_param.data_ptr=(icmph+1);
+		icmp_param.data_len=len;
+		icmp_reply(&icmp_param, skb);
+	}
+}
+
+/*
+ *	Handle ICMP Timestamp requests. 
+ *	RFC 1122: 3.2.2.8 MAY implement ICMP timestamp requests.
+ *		  SHOULD be in the kernel for minimum random latency.
+ *		  MUST be accurate to a few minutes.
+ *		  MUST be updated at least at 15Hz.
+ */
+ 
+static void icmp_timestamp(struct icmphdr *icmph, struct sk_buff *skb, int len)
+{
+	struct timeval tv;
+	__u32 times[3];		/* So the new timestamp works on ALPHA's.. */
+	struct icmp_bxm icmp_param;
+	
+	/*
+	 *	Too short.
+	 */
+	 
+	if(len<12) {
+		icmp_statistics.IcmpInErrors++;
+		return;
+	}
+	
+	/*
+	 *	Fill in the current time as ms since midnight UT: 
+	 */
+	 
+	do_gettimeofday(&tv);
+	times[1] = htonl((tv.tv_sec % 86400) * 1000 + tv.tv_usec / 1000);
+	times[2] = times[1];
+	memcpy((void *)&times[0], icmph+1, 4);		/* Incoming stamp */
+	icmp_param.icmph=*icmph;
+	icmp_param.icmph.type=ICMP_TIMESTAMPREPLY;
+	icmp_param.icmph.code=0;
+	icmp_param.data_ptr=&times;
+	icmp_param.data_len=12;
+	icmp_reply(&icmp_param, skb);
+}
+
+
+/* 
+ *	Handle ICMP_ADDRESS_MASK requests.  (RFC950)
+ *
+ * RFC1122 (3.2.2.9).  A host MUST only send replies to 
+ * ADDRESS_MASK requests if it's been configured as an address mask 
+ * agent.  Receiving a request doesn't constitute implicit permission to 
+ * act as one. Of course, implementing this correctly requires (SHOULD) 
+ * a way to turn the functionality on and off.  Another one for sysctl(), 
+ * I guess. -- MS
+ *
+ * RFC1812 (4.3.3.9).	A router MUST implement it.
+ *			A router SHOULD have switch turning it on/off.
+ *		      	This switch MUST be ON by default.
+ *
+ * Gratuitous replies, zero-source replies are not implemented,
+ * that complies with RFC. DO NOT implement them!!! All the idea
+ * of broadcast addrmask replies as specified in RFC950 is broken.
+ * The problem is that it is not uncommon to have several prefixes
+ * on one physical interface. Moreover, addrmask agent can even be
+ * not aware of existing another prefixes.
+ * If source is zero, addrmask agent cannot choose correct prefix.
+ * Gratuitous mask announcements suffer from the same problem.
+ * RFC1812 explains it, but still allows to use ADDRMASK,
+ * that is pretty silly. --ANK
+ *
+ * All these rules are so bizarre, that I removed kernel addrmask
+ * support at all. It is wrong, it is obsolete, nobody uses it in
+ * any case. --ANK
+ *
+ * Furthermore you can do it with a usermode address agent program
+ * anyway...
+ */
+
+static void icmp_address(struct icmphdr *icmph, struct sk_buff *skb, int len)
+{
+#if 0
+	if (net_ratelimit())
+		printk(KERN_DEBUG "a guy asks for address mask. Who is it?\n");
+#endif		
+}
+
+/*
+ * RFC1812 (4.3.3.9).	A router SHOULD listen all replies, and complain
+ *			loudly if an inconsistency is found.
+ */
+
+static void icmp_address_reply(struct icmphdr *icmph, struct sk_buff *skb, int len)
+{
+	struct rtable *rt = (struct rtable*)skb->dst;
+	struct device *dev = skb->dev;
+	struct in_device *in_dev = dev->ip_ptr;
+	struct in_ifaddr *ifa;
+	u32 mask;
+
+	if (!in_dev || !in_dev->ifa_list ||
+	    !IN_DEV_LOG_MARTIANS(in_dev) ||
+	    !IN_DEV_FORWARD(in_dev) ||
+	    len < 4 ||
+	    !(rt->rt_flags&RTCF_DIRECTSRC))
+		return;
+
+	mask = *(u32*)&icmph[1];
+	for (ifa=in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
+		if (mask == ifa->ifa_mask && inet_ifa_match(rt->rt_src, ifa))
+			return;
+	}
+	if (net_ratelimit())
+		printk(KERN_INFO "Wrong address mask %08lX from %08lX/%s\n",
+		       ntohl(mask), ntohl(rt->rt_src), dev->name);
+}
+
+static void icmp_discard(struct icmphdr *icmph, struct sk_buff *skb, int len)
+{
+}
+
+#ifdef CONFIG_IP_TRANSPARENT_PROXY
+/*
+ *	Check incoming icmp packets not addressed locally, to check whether
+ *	they relate to a (proxying) socket on our system.
+ *	Needed for transparent proxying.
+ *
+ *	This code is presently ugly and needs cleanup.
+ *	Probably should add a chkaddr entry to ipprot to call a chk routine
+ *	in udp.c or tcp.c...
+ */
+
+/* This should work with the new hashes now. -DaveM */
+extern struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif);
+extern struct sock *udp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif);
+
+int icmp_chkaddr(struct sk_buff *skb)
+{
+	struct icmphdr *icmph=(struct icmphdr *)(skb->nh.raw + skb->nh.iph->ihl*4);
+	struct iphdr *iph = (struct iphdr *) (icmph + 1);
+	void (*handler)(struct icmphdr *icmph, struct sk_buff *skb, int len) = icmp_pointers[icmph->type].handler;
+
+	if (handler == icmp_unreach || handler == icmp_redirect) {
+		struct sock *sk;
+
+		switch (iph->protocol) {
+		case IPPROTO_TCP:
+			{
+			struct tcphdr *th = (struct tcphdr *)(((unsigned char *)iph)+(iph->ihl<<2));
+
+			sk = tcp_v4_lookup(iph->daddr, th->dest, iph->saddr, th->source, skb->dev->ifindex);
+			if (!sk || (sk->state == TCP_LISTEN))
+				return 0;
+			/*
+			 * This packet came from us.
+			 */
+			return 1;
+			}
+		case IPPROTO_UDP:
+			{
+			struct udphdr *uh = (struct udphdr *)(((unsigned char *)iph)+(iph->ihl<<2));
+
+			sk = udp_v4_lookup(iph->daddr, uh->dest, iph->saddr, uh->source, skb->dev->ifindex);
+			if (!sk) return 0;
+			if (sk->saddr != iph->saddr && inet_addr_type(iph->saddr) != RTN_LOCAL)
+				return 0;
+			/*
+			 * This packet may have come from us.
+			 * Assume it did.
+			 */
+			return 1;
+			}
+		}
+	}
+	return 0;
+}
+
+#endif
+
+/* 
+ *	Deal with incoming ICMP packets.
+ */
+ 
+int icmp_rcv(struct sk_buff *skb, unsigned short len)
+{
+	struct icmphdr *icmph = skb->h.icmph;
+	struct rtable *rt = (struct rtable*)skb->dst;
+
+	icmp_statistics.IcmpInMsgs++;
+
+	/*
+	 *	18 is the highest 'known' ICMP type. Anything else is a mystery
+	 *
+	 *	RFC 1122: 3.2.2  Unknown ICMP messages types MUST be silently discarded.
+	 */
+	if(len < sizeof(struct icmphdr) ||
+	   ip_compute_csum((unsigned char *) icmph, len) ||
+	   icmph->type > NR_ICMP_TYPES)
+		goto error;
+	 
+	/*
+	 *	Parse the ICMP message 
+	 */
+
+ 	if (rt->rt_flags&(RTCF_BROADCAST|RTCF_MULTICAST)) {
+		/*
+		 *	RFC 1122: 3.2.2.6 An ICMP_ECHO to broadcast MAY be
+		 *	  silently ignored (we let user decide with a sysctl).
+		 *	RFC 1122: 3.2.2.8 An ICMP_TIMESTAMP MAY be silently
+		 *	  discarded if to broadcast/multicast.
+		 */
+		if (icmph->type == ICMP_ECHO &&
+		    sysctl_icmp_echo_ignore_broadcasts) {
+			goto error;
+		}
+		if (icmph->type != ICMP_ECHO &&
+		    icmph->type != ICMP_TIMESTAMP &&
+		    icmph->type != ICMP_ADDRESS &&
+		    icmph->type != ICMP_ADDRESSREPLY) {
+			goto error;
+  		}
+	}
+
+	len -= sizeof(struct icmphdr);
+	(*icmp_pointers[icmph->type].input)++;
+	(icmp_pointers[icmph->type].handler)(icmph, skb, len);
+
+drop:
+	kfree_skb(skb);
+	return 0;
+error:
+	icmp_statistics.IcmpInErrors++;
+	goto drop;
+}
+
+/*
+ *	A spare long used to speed up statistics updating
+ */
+ 
+static unsigned long dummy;
+
+/* 
+ * 	Configurable rate limits.
+ *	Someone should check if these default values are correct.
+ *	Note that these values interact with the routing cache GC timeout.
+ *	If you chose them too high they won't take effect, because the
+ *	dst_entry gets expired too early. The same should happen when
+ *	the cache grows too big.
+ */
+int sysctl_icmp_destunreach_time = 1*HZ;
+int sysctl_icmp_timeexceed_time = 1*HZ;
+int sysctl_icmp_paramprob_time = 1*HZ;
+int sysctl_icmp_echoreply_time = 0; /* don't limit it per default. */
+
+/*
+ *	This table is the definition of how we handle ICMP.
+ */
+ 
+static struct icmp_control icmp_pointers[NR_ICMP_TYPES+1] = {
+/* ECHO REPLY (0) */
+ { &icmp_statistics.IcmpOutEchoReps, &icmp_statistics.IcmpInEchoReps, icmp_discard, 0, &sysctl_icmp_echoreply_time},
+ { &dummy, &icmp_statistics.IcmpInErrors, icmp_discard, 1, },
+ { &dummy, &icmp_statistics.IcmpInErrors, icmp_discard, 1, },
+/* DEST UNREACH (3) */
+ { &icmp_statistics.IcmpOutDestUnreachs, &icmp_statistics.IcmpInDestUnreachs, icmp_unreach, 1, &sysctl_icmp_destunreach_time },
+/* SOURCE QUENCH (4) */
+ { &icmp_statistics.IcmpOutSrcQuenchs, &icmp_statistics.IcmpInSrcQuenchs, icmp_unreach, 1, },
+/* REDIRECT (5) */
+ { &icmp_statistics.IcmpOutRedirects, &icmp_statistics.IcmpInRedirects, icmp_redirect, 1, },
+ { &dummy, &icmp_statistics.IcmpInErrors, icmp_discard, 1, },
+ { &dummy, &icmp_statistics.IcmpInErrors, icmp_discard, 1, },
+/* ECHO (8) */
+ { &icmp_statistics.IcmpOutEchos, &icmp_statistics.IcmpInEchos, icmp_echo, 0, },
+ { &dummy, &icmp_statistics.IcmpInErrors, icmp_discard, 1, },
+ { &dummy, &icmp_statistics.IcmpInErrors, icmp_discard, 1, },
+/* TIME EXCEEDED (11) */
+ { &icmp_statistics.IcmpOutTimeExcds, &icmp_statistics.IcmpInTimeExcds, icmp_unreach, 1, &sysctl_icmp_timeexceed_time },
+/* PARAMETER PROBLEM (12) */
+ { &icmp_statistics.IcmpOutParmProbs, &icmp_statistics.IcmpInParmProbs, icmp_unreach, 1, &sysctl_icmp_paramprob_time },
+/* TIMESTAMP (13) */
+ { &icmp_statistics.IcmpOutTimestamps, &icmp_statistics.IcmpInTimestamps, icmp_timestamp, 0,  },
+/* TIMESTAMP REPLY (14) */
+ { &icmp_statistics.IcmpOutTimestampReps, &icmp_statistics.IcmpInTimestampReps, icmp_discard, 0, },
+/* INFO (15) */
+ { &dummy, &dummy, icmp_discard, 0, },
+/* INFO REPLY (16) */
+ { &dummy, &dummy, icmp_discard, 0, },
+/* ADDR MASK (17) */
+ { &icmp_statistics.IcmpOutAddrMasks, &icmp_statistics.IcmpInAddrMasks, icmp_address, 0,  },
+/* ADDR MASK REPLY (18) */
+ { &icmp_statistics.IcmpOutAddrMaskReps, &icmp_statistics.IcmpInAddrMaskReps, icmp_address_reply, 0, }
+};
+
+__initfunc(void icmp_init(struct net_proto_family *ops))
+{
+	int err;
+
+	icmp_inode.i_mode = S_IFSOCK;
+	icmp_inode.i_sock = 1;
+	icmp_inode.i_uid = 0;
+	icmp_inode.i_gid = 0;
+
+	icmp_socket->inode = &icmp_inode;
+	icmp_socket->state = SS_UNCONNECTED;
+	icmp_socket->type=SOCK_RAW;
+
+	if ((err=ops->create(icmp_socket, IPPROTO_ICMP))<0)
+		panic("Failed to create the ICMP control socket.\n");
+	icmp_socket->sk->allocation=GFP_ATOMIC;
+	icmp_socket->sk->num = 256;		/* Don't receive any data */
+	icmp_socket->sk->ip_ttl = MAXTTL;
+}
diff --git a/pfinet/linux-src/net/ipv4/igmp.c b/pfinet/linux-src/net/ipv4/igmp.c
new file mode 100644
index 00000000..934e8601
--- /dev/null
+++ b/pfinet/linux-src/net/ipv4/igmp.c
@@ -0,0 +1,698 @@
+/*
+ *	Linux NET3:	Internet Group Management Protocol  [IGMP]
+ *
+ *	This code implements the IGMP protocol as defined in RFC1112. There has
+ *	been a further revision of this protocol since which is now supported.
+ *
+ *	If you have trouble with this module be careful what gcc you have used,
+ *	the older version didn't come out right using gcc 2.5.8, the newer one
+ *	seems to fall out with gcc 2.6.2.
+ *
+ *	Version: $Id: igmp.c,v 1.30.2.1 1999/07/23 15:29:22 davem Exp $
+ *
+ *	Authors:
+ *		Alan Cox <Alan.Cox@linux.org>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *
+ *	Fixes:
+ *
+ *		Alan Cox	:	Added lots of __inline__ to optimise
+ *					the memory usage of all the tiny little
+ *					functions.
+ *		Alan Cox	:	Dumped the header building experiment.
+ *		Alan Cox	:	Minor tweaks ready for multicast routing
+ *					and extended IGMP protocol.
+ *		Alan Cox	:	Removed a load of inline directives. Gcc 2.5.8
+ *					writes utterly bogus code otherwise (sigh)
+ *					fixed IGMP loopback to behave in the manner
+ *					desired by mrouted, fixed the fact it has been
+ *					broken since 1.3.6 and cleaned up a few minor
+ *					points.
+ *
+ *		Chih-Jen Chang	:	Tried to revise IGMP to Version 2
+ *		Tsu-Sheng Tsao		E-mail: chihjenc@scf.usc.edu and tsusheng@scf.usc.edu
+ *					The enhancements are mainly based on Steve Deering's 
+ * 					ipmulti-3.5 source code.
+ *		Chih-Jen Chang	:	Added the igmp_get_mrouter_info and
+ *		Tsu-Sheng Tsao		igmp_set_mrouter_info to keep track of
+ *					the mrouted version on that device.
+ *		Chih-Jen Chang	:	Added the max_resp_time parameter to
+ *		Tsu-Sheng Tsao		igmp_heard_query(). Using this parameter
+ *					to identify the multicast router version
+ *					and do what the IGMP version 2 specified.
+ *		Chih-Jen Chang	:	Added a timer to revert to IGMP V2 router
+ *		Tsu-Sheng Tsao		if the specified time expired.
+ *		Alan Cox	:	Stop IGMP from 0.0.0.0 being accepted.
+ *		Alan Cox	:	Use GFP_ATOMIC in the right places.
+ *		Christian Daudt :	igmp timer wasn't set for local group
+ *					memberships but was being deleted, 
+ *					which caused a "del_timer() called 
+ *					from %p with timer not initialized\n"
+ *					message (960131).
+ *		Christian Daudt :	removed del_timer from 
+ *					igmp_timer_expire function (960205).
+ *             Christian Daudt :       igmp_heard_report now only calls
+ *                                     igmp_timer_expire if tm->running is
+ *                                     true (960216).
+ *		Malcolm Beattie :	ttl comparison wrong in igmp_rcv made
+ *					igmp_heard_query never trigger. Expiry
+ *					miscalculation fixed in igmp_heard_query
+ *					and random() made to return unsigned to
+ *					prevent negative expiry times.
+ *		Alexey Kuznetsov:	Wrong group leaving behaviour, backport
+ *					fix from pending 2.1.x patches.
+ *		Alan Cox:		Forget to enable FDDI support earlier.
+ *		Alexey Kuznetsov:	Fixed leaving groups on device down.
+ *		Alexey Kuznetsov:	Accordance to igmp-v2-06 draft.
+ */
+
+
+#include <linux/config.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/inetdevice.h>
+#include <linux/igmp.h>
+#include <linux/if_arp.h>
+#include <linux/rtnetlink.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/route.h>
+#include <net/sock.h>
+#include <net/checksum.h>
+#ifdef CONFIG_IP_MROUTE
+#include <linux/mroute.h>
+#endif
+
+#define IP_MAX_MEMBERSHIPS 20
+
+#ifdef CONFIG_IP_MULTICAST
+
+/* Parameter names and values are taken from igmp-v2-06 draft */
+
+#define IGMP_V1_Router_Present_Timeout		(400*HZ)
+#define IGMP_Unsolicited_Report_Interval	(10*HZ)
+#define IGMP_Query_Response_Interval		(10*HZ)
+#define IGMP_Unsolicited_Report_Count		2
+
+
+#define IGMP_Initial_Report_Delay		(1*HZ)
+
+/* IGMP_Initial_Report_Delay is not from IGMP specs!
+ * IGMP specs require to report membership immediately after
+ * joining a group, but we delay the first report by a
+ * small interval. It seems more natural and still does not
+ * contradict to specs provided this delay is small enough.
+ */
+
+#define IGMP_V1_SEEN(in_dev) ((in_dev)->mr_v1_seen && (long)(jiffies - (in_dev)->mr_v1_seen) < 0)
+
+/*
+ *	Timer management
+ */
+
+static __inline__ void igmp_stop_timer(struct ip_mc_list *im)
+{
+	if (im->tm_running) {
+		del_timer(&im->timer);
+		im->tm_running=0;
+	}
+}
+
+static __inline__ void igmp_start_timer(struct ip_mc_list *im, int max_delay)
+{
+	int tv;
+	if (im->tm_running)
+		return;
+	tv=net_random() % max_delay;
+	im->timer.expires=jiffies+tv+2;
+	im->tm_running=1;
+	add_timer(&im->timer);
+}
+
+/*
+ *	Send an IGMP report.
+ */
+
+#define IGMP_SIZE (sizeof(struct igmphdr)+sizeof(struct iphdr)+4)
+
+static int igmp_send_report(struct device *dev, u32 group, int type)
+{
+	struct sk_buff *skb;
+	struct iphdr *iph;
+	struct igmphdr *ih;
+	struct rtable *rt;
+	u32	dst;
+
+	/* According to IGMPv2 specs, LEAVE messages are
+	 * sent to all-routers group.
+	 */
+	dst = group;
+	if (type == IGMP_HOST_LEAVE_MESSAGE)
+		dst = IGMP_ALL_ROUTER;
+
+	if (ip_route_output(&rt, dst, 0, 0, dev->ifindex))
+		return -1;
+	if (rt->rt_src == 0) {
+		ip_rt_put(rt);
+		return -1;
+	}
+
+	skb=alloc_skb(IGMP_SIZE+dev->hard_header_len+15, GFP_ATOMIC);
+	if (skb == NULL) {
+		ip_rt_put(rt);
+		return -1;
+	}
+
+	skb->dst = &rt->u.dst;
+
+	skb_reserve(skb, (dev->hard_header_len+15)&~15);
+
+	skb->nh.iph = iph = (struct iphdr *)skb_put(skb, sizeof(struct iphdr)+4);
+
+	iph->version  = 4;
+	iph->ihl      = (sizeof(struct iphdr)+4)>>2;
+	iph->tos      = 0;
+	iph->frag_off = 0;
+	iph->ttl      = 1;
+	iph->daddr    = dst;
+	iph->saddr    = rt->rt_src;
+	iph->protocol = IPPROTO_IGMP;
+	iph->tot_len  = htons(IGMP_SIZE);
+	iph->id	      = htons(ip_id_count++);
+	((u8*)&iph[1])[0] = IPOPT_RA;
+	((u8*)&iph[1])[1] = 4;
+	((u8*)&iph[1])[2] = 0;
+	((u8*)&iph[1])[3] = 0;
+	ip_send_check(iph);
+
+	ih = (struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
+	ih->type=type;
+	ih->code=0;
+	ih->csum=0;
+	ih->group=group;
+	ih->csum=ip_compute_csum((void *)ih, sizeof(struct igmphdr));
+
+	return skb->dst->output(skb);
+}
+
+
+static void igmp_timer_expire(unsigned long data)
+{
+	struct ip_mc_list *im=(struct ip_mc_list *)data;
+	struct in_device *in_dev = im->interface;
+	int err;
+
+	im->tm_running=0;
+
+	if (IGMP_V1_SEEN(in_dev))
+		err = igmp_send_report(in_dev->dev, im->multiaddr, IGMP_HOST_MEMBERSHIP_REPORT);
+	else
+		err = igmp_send_report(in_dev->dev, im->multiaddr, IGMP_HOST_NEW_MEMBERSHIP_REPORT);
+
+	/* Failed. Retry later. */
+	if (err) {
+		igmp_start_timer(im, IGMP_Unsolicited_Report_Interval);
+		return;
+	}
+
+	if (im->unsolicit_count) {
+		im->unsolicit_count--;
+		igmp_start_timer(im, IGMP_Unsolicited_Report_Interval);
+	}
+	im->reporter = 1;
+}
+
+static void igmp_heard_report(struct in_device *in_dev, u32 group)
+{
+	struct ip_mc_list *im;
+
+	/* Timers are only set for non-local groups */
+
+	if (group == IGMP_ALL_HOSTS)
+		return;
+
+	for (im=in_dev->mc_list; im!=NULL; im=im->next) {
+		if (im->multiaddr == group) {
+			igmp_stop_timer(im);
+			im->reporter = 0;
+			im->unsolicit_count = 0;
+			return;
+		}
+	}
+}
+
+static void igmp_heard_query(struct in_device *in_dev, unsigned char max_resp_time,
+			     u32 group)
+{
+	struct ip_mc_list	*im;
+	int			max_delay;
+
+	max_delay = max_resp_time*(HZ/IGMP_TIMER_SCALE);
+
+	if (max_resp_time == 0) {
+		/* Alas, old v1 router presents here. */
+
+		max_delay = IGMP_Query_Response_Interval;
+		in_dev->mr_v1_seen = jiffies + IGMP_V1_Router_Present_Timeout;
+		group = 0;
+	}
+		
+	/*
+	 * - Start the timers in all of our membership records
+	 *   that the query applies to for the interface on
+	 *   which the query arrived excl. those that belong
+	 *   to a "local" group (224.0.0.X)
+	 * - For timers already running check if they need to
+	 *   be reset.
+	 * - Use the igmp->igmp_code field as the maximum
+	 *   delay possible
+	 */
+	for (im=in_dev->mc_list; im!=NULL; im=im->next) {
+		if (group && group != im->multiaddr)
+			continue;
+		if (im->multiaddr == IGMP_ALL_HOSTS)
+			continue;
+		im->unsolicit_count = 0;
+		if (im->tm_running && (long)(im->timer.expires-jiffies) > max_delay)
+			igmp_stop_timer(im);
+		igmp_start_timer(im, max_delay);
+	}
+}
+
+int igmp_rcv(struct sk_buff *skb, unsigned short len)
+{
+	/* This basically follows the spec line by line -- see RFC1112 */
+	struct igmphdr *ih = skb->h.igmph;
+	struct in_device *in_dev = skb->dev->ip_ptr;
+
+	if (len < sizeof(struct igmphdr) || ip_compute_csum((void *)ih, len)
+	    || in_dev==NULL) {
+		kfree_skb(skb);
+		return 0;
+	}
+	
+	switch (ih->type) {
+	case IGMP_HOST_MEMBERSHIP_QUERY:
+		igmp_heard_query(in_dev, ih->code, ih->group);
+		break;
+	case IGMP_HOST_MEMBERSHIP_REPORT:
+	case IGMP_HOST_NEW_MEMBERSHIP_REPORT:
+		/* Is it our report looped back? */
+		if (((struct rtable*)skb->dst)->key.iif == 0)
+			break;
+		igmp_heard_report(in_dev, ih->group);
+		break;
+	case IGMP_PIM:
+#ifdef CONFIG_IP_PIMSM_V1
+		return pim_rcv_v1(skb, len);
+#endif
+	case IGMP_DVMRP:
+	case IGMP_TRACE:
+	case IGMP_HOST_LEAVE_MESSAGE:
+	case IGMP_MTRACE:
+	case IGMP_MTRACE_RESP:
+		break;
+	default:
+		NETDEBUG(printk(KERN_DEBUG "New IGMP type=%d, why we do not know about it?\n", ih->type));
+	}
+	kfree_skb(skb);
+	return 0;
+}
+
+#endif
+
+
+/*
+ *	Add a filter to a device
+ */
+
+static void ip_mc_filter_add(struct in_device *in_dev, u32 addr)
+{
+	char buf[MAX_ADDR_LEN];
+	struct device *dev = in_dev->dev;
+
+	/* Checking for IFF_MULTICAST here is WRONG-WRONG-WRONG.
+	   We will get multicast token leakage, when IFF_MULTICAST
+	   is changed. This check should be done in dev->set_multicast_list
+	   routine. Something sort of:
+	   if (dev->mc_list && dev->flags&IFF_MULTICAST) { do it; }
+	   --ANK
+	 */
+	if (arp_mc_map(addr, buf, dev, 0) == 0)
+		dev_mc_add(dev,buf,dev->addr_len,0);
+}
+
+/*
+ *	Remove a filter from a device
+ */
+
+static void ip_mc_filter_del(struct in_device *in_dev, u32 addr)
+{
+	char buf[MAX_ADDR_LEN];
+	struct device *dev = in_dev->dev;
+
+	if (arp_mc_map(addr, buf, dev, 0) == 0)
+		dev_mc_delete(dev,buf,dev->addr_len,0);
+}
+
+static void igmp_group_dropped(struct ip_mc_list *im)
+{
+	if (im->loaded) {
+		im->loaded = 0;
+		ip_mc_filter_del(im->interface, im->multiaddr);
+	}
+
+#ifdef CONFIG_IP_MULTICAST
+	if (im->multiaddr == IGMP_ALL_HOSTS)
+		return;
+
+	start_bh_atomic();
+	igmp_stop_timer(im);
+	end_bh_atomic();
+
+	if (im->reporter && !IGMP_V1_SEEN(im->interface))
+		igmp_send_report(im->interface->dev, im->multiaddr, IGMP_HOST_LEAVE_MESSAGE);
+#endif
+}
+
+static void igmp_group_added(struct ip_mc_list *im)
+{
+	if (im->loaded == 0) {
+		im->loaded = 1;
+		ip_mc_filter_add(im->interface, im->multiaddr);
+	}
+
+#ifdef CONFIG_IP_MULTICAST
+	if (im->multiaddr == IGMP_ALL_HOSTS)
+		return;
+
+	start_bh_atomic();
+	igmp_start_timer(im, IGMP_Initial_Report_Delay);
+	end_bh_atomic();
+#endif
+}
+
+
+/*
+ *	Multicast list managers
+ */
+
+
+/*
+ *	A socket has joined a multicast group on device dev.
+ */
+
+void ip_mc_inc_group(struct in_device *in_dev, u32 addr)
+{
+	struct ip_mc_list *i, *im;
+
+	im = (struct ip_mc_list *)kmalloc(sizeof(*im), GFP_KERNEL);
+
+	for (i=in_dev->mc_list; i; i=i->next) {
+		if (i->multiaddr == addr) {
+			i->users++;
+			if (im)
+				kfree(im);
+			return;
+		}
+	}
+	if (!im)
+		return;
+	im->users=1;
+	im->interface=in_dev;
+	im->multiaddr=addr;
+#ifdef  CONFIG_IP_MULTICAST
+	im->tm_running=0;
+	init_timer(&im->timer);
+	im->timer.data=(unsigned long)im;
+	im->timer.function=&igmp_timer_expire;
+	im->unsolicit_count = IGMP_Unsolicited_Report_Count;
+	im->reporter = 0;
+	im->loaded = 0;
+#endif
+	im->next=in_dev->mc_list;
+	in_dev->mc_list=im;
+	igmp_group_added(im);
+	if (in_dev->dev->flags & IFF_UP)
+		ip_rt_multicast_event(in_dev);
+	return;
+}
+
+/*
+ *	A socket has left a multicast group on device dev
+ */
+
+int ip_mc_dec_group(struct in_device *in_dev, u32 addr)
+{
+	struct ip_mc_list *i, **ip;
+
+	for (ip=&in_dev->mc_list; (i=*ip)!=NULL; ip=&i->next) {
+		if (i->multiaddr==addr) {
+			if (--i->users == 0) {
+				*ip = i->next;
+				synchronize_bh();
+
+				igmp_group_dropped(i);
+				if (in_dev->dev->flags & IFF_UP)
+					ip_rt_multicast_event(in_dev);
+				kfree_s(i, sizeof(*i));
+			}
+			return 0;
+		}
+	}
+	return -ESRCH;
+}
+
+/* Device going down */
+
+void ip_mc_down(struct in_device *in_dev)
+{
+	struct ip_mc_list *i;
+
+	for (i=in_dev->mc_list; i; i=i->next)
+		igmp_group_dropped(i);
+
+	ip_mc_dec_group(in_dev, IGMP_ALL_HOSTS);
+}
+
+/* Device going up */
+
+void ip_mc_up(struct in_device *in_dev)
+{
+	struct ip_mc_list *i;
+
+	ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS);
+
+	for (i=in_dev->mc_list; i; i=i->next)
+		igmp_group_added(i);
+}
+
+/*
+ *	Device is about to be destroyed: clean up.
+ */
+
+void ip_mc_destroy_dev(struct in_device *in_dev)
+{
+	struct ip_mc_list *i;
+
+	while ((i = in_dev->mc_list) != NULL) {
+		in_dev->mc_list = i->next;
+		igmp_group_dropped(i);
+		kfree_s(i, sizeof(*i));
+	}
+}
+
+static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr)
+{
+	struct rtable *rt;
+	struct device *dev = NULL;
+
+	if (imr->imr_address.s_addr) {
+		dev = ip_dev_find(imr->imr_address.s_addr);
+		if (!dev)
+			return NULL;
+	}
+
+	if (!dev && !ip_route_output(&rt, imr->imr_multiaddr.s_addr, 0, 0, 0)) {
+		dev = rt->u.dst.dev;
+		ip_rt_put(rt);
+	}
+	if (dev) {
+		imr->imr_ifindex = dev->ifindex;
+		return dev->ip_ptr;
+	}
+	return NULL;
+}
+
+/*
+ *	Join a socket to a group
+ */
+int sysctl_igmp_max_memberships = IP_MAX_MEMBERSHIPS;
+
+int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr)
+{
+	int err;
+	u32 addr = imr->imr_multiaddr.s_addr;
+	struct ip_mc_socklist *iml, *i;
+	struct in_device *in_dev;
+	int count = 0;
+
+	if (!MULTICAST(addr))
+		return -EINVAL;
+
+	rtnl_shlock();
+
+	if (!imr->imr_ifindex)
+		in_dev = ip_mc_find_dev(imr);
+	else
+		in_dev = inetdev_by_index(imr->imr_ifindex);
+
+	if (!in_dev) {
+		iml = NULL;
+		err = -ENODEV;
+		goto done;
+	}
+
+	iml = (struct ip_mc_socklist *)sock_kmalloc(sk, sizeof(*iml), GFP_KERNEL);
+
+	err = -EADDRINUSE;
+	for (i=sk->ip_mc_list; i; i=i->next) {
+		if (memcmp(&i->multi, imr, sizeof(*imr)) == 0) {
+			/* New style additions are reference counted */
+			if (imr->imr_address.s_addr == 0) {
+				i->count++;
+				err = 0;
+			}
+			goto done;
+		}
+		count++;
+	}
+	err = -ENOBUFS;
+	if (iml == NULL || count >= sysctl_igmp_max_memberships)
+		goto done;
+	memcpy(&iml->multi, imr, sizeof(*imr));
+	iml->next = sk->ip_mc_list;
+	iml->count = 1;
+	sk->ip_mc_list = iml;
+	ip_mc_inc_group(in_dev, addr);
+	iml = NULL;
+	err = 0;
+done:
+	rtnl_shunlock();
+	if (iml)
+		sock_kfree_s(sk, iml, sizeof(*iml));
+	return err;
+}
+
+/*
+ *	Ask a socket to leave a group.
+ */
+
+int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
+{
+	struct ip_mc_socklist *iml, **imlp;
+
+	for (imlp=&sk->ip_mc_list; (iml=*imlp)!=NULL; imlp=&iml->next) {
+		if (iml->multi.imr_multiaddr.s_addr==imr->imr_multiaddr.s_addr &&
+		    iml->multi.imr_address.s_addr==imr->imr_address.s_addr &&
+		    (!imr->imr_ifindex || iml->multi.imr_ifindex==imr->imr_ifindex)) {
+			struct in_device *in_dev;
+			if (--iml->count)
+				return 0;
+
+			*imlp = iml->next;
+			synchronize_bh();
+
+			in_dev = inetdev_by_index(iml->multi.imr_ifindex);
+			if (in_dev)
+				ip_mc_dec_group(in_dev, imr->imr_multiaddr.s_addr);
+			sock_kfree_s(sk, iml, sizeof(*iml));
+			return 0;
+		}
+	}
+	return -EADDRNOTAVAIL;
+}
+
+/*
+ *	A socket is closing.
+ */
+
+void ip_mc_drop_socket(struct sock *sk)
+{
+	struct ip_mc_socklist *iml;
+
+	while ((iml=sk->ip_mc_list) != NULL) {
+		struct in_device *in_dev;
+		sk->ip_mc_list = iml->next;
+		if ((in_dev = inetdev_by_index(iml->multi.imr_ifindex)) != NULL)
+			ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr);
+		sock_kfree_s(sk, iml, sizeof(*iml));
+	}
+}
+
+
+#ifdef CONFIG_IP_MULTICAST
+ 
+int ip_mc_procinfo(char *buffer, char **start, off_t offset, int length, int dummy)
+{
+	off_t pos=0, begin=0;
+	struct ip_mc_list *im;
+	int len=0;
+	struct device *dev;
+	
+	len=sprintf(buffer,"Idx\tDevice    : Count Querier\tGroup    Users Timer\tReporter\n");  
+	
+	for(dev = dev_base; dev; dev = dev->next)
+	{
+		struct in_device *in_dev = dev->ip_ptr;
+		char   *querier = "NONE";
+		
+		if (in_dev == NULL)
+			continue;
+
+		querier = IGMP_V1_SEEN(in_dev) ? "V1" : "V2";
+
+		len+=sprintf(buffer+len,"%d\t%-10s: %5d %7s\n",
+			     dev->ifindex, dev->name, dev->mc_count, querier);
+
+		for (im = in_dev->mc_list; im; im = im->next) {
+			len+=sprintf(buffer+len,
+				     "\t\t\t\t%08lX %5d %d:%08lX\t\t%d\n",
+				     im->multiaddr, im->users,
+				     im->tm_running, im->timer.expires-jiffies, im->reporter);
+
+			pos=begin+len;
+			if(pos<offset)
+			{
+				len=0;
+				begin=pos;
+			}
+			if(pos>offset+length)
+				goto done;
+		}
+	}
+done:
+	*start=buffer+(offset-begin);
+	len-=(offset-begin);
+	if(len>length)
+		len=length;
+	if(len<0)
+		len=0;
+	return len;
+}
+#endif
+
diff --git a/pfinet/linux-src/net/ipv4/ip_forward.c b/pfinet/linux-src/net/ipv4/ip_forward.c
new file mode 100644
index 00000000..08ebbc2f
--- /dev/null
+++ b/pfinet/linux-src/net/ipv4/ip_forward.c
@@ -0,0 +1,297 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		The IP forwarding functionality.
+ *		
+ * Version:	$Id: ip_forward.c,v 1.43 1999/03/21 05:22:37 davem Exp $
+ *
+ * Authors:	see ip.c
+ *
+ * Fixes:
+ *		Many		:	Split from ip.c , see ip_input.c for 
+ *					history.
+ *		Dave Gregorich	:	NULL ip_rt_put fix for multicast 
+ *					routing.
+ *		Jos Vos		:	Add call_out_firewall before sending,
+ *					use output device for accounting.
+ *		Jos Vos		:	Call forward firewall after routing
+ *					(always use output device).
+ *		Mike McLagan	:	Routing by source
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/icmp.h>
+#include <linux/netdevice.h>
+#include <net/sock.h>
+#include <net/ip.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <net/icmp.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/firewall.h>
+#include <linux/ip_fw.h>
+#ifdef CONFIG_IP_MASQUERADE
+#include <net/ip_masq.h>
+#endif
+#include <net/checksum.h>
+#include <linux/route.h>
+#include <net/route.h>
+
+#ifdef CONFIG_IP_TRANSPARENT_PROXY
+/*
+ *	Check the packet against our socket administration to see
+ *	if it is related to a connection on our system.
+ *	Needed for transparent proxying.
+ */
+
+int ip_chksock(struct sk_buff *skb)
+{
+	switch (skb->nh.iph->protocol) {
+	case IPPROTO_ICMP:
+		return icmp_chkaddr(skb);
+	case IPPROTO_TCP:
+		return tcp_chkaddr(skb);
+	case IPPROTO_UDP:
+		return udp_chkaddr(skb);
+	default:
+		return 0;
+	}
+}
+#endif
+
+
+int ip_forward(struct sk_buff *skb)
+{
+	struct device *dev2;	/* Output device */
+	struct iphdr *iph;	/* Our header */
+	struct rtable *rt;	/* Route we use */
+	struct ip_options * opt	= &(IPCB(skb)->opt);
+	unsigned short mtu;
+#if defined(CONFIG_FIREWALL) || defined(CONFIG_IP_MASQUERADE)
+	int fw_res = 0;
+#endif
+
+	if (IPCB(skb)->opt.router_alert && ip_call_ra_chain(skb))
+		return 0;
+
+	if (skb->pkt_type != PACKET_HOST)
+		goto drop;
+	
+	/*
+	 *	According to the RFC, we must first decrease the TTL field. If
+	 *	that reaches zero, we must reply an ICMP control message telling
+	 *	that the packet's lifetime expired.
+	 */
+
+	iph = skb->nh.iph;
+	rt = (struct rtable*)skb->dst;
+
+#ifdef CONFIG_CPU_IS_SLOW
+	if (net_cpu_congestion > 1 && !(iph->tos&IPTOS_RELIABILITY) &&
+	    IPTOS_PREC(iph->tos) < IPTOS_PREC_INTERNETCONTROL) {
+		if (((xtime.tv_usec&0xF)<<net_cpu_congestion) > 0x1C)
+			goto drop;
+	}
+#endif
+
+
+#ifdef CONFIG_IP_TRANSPARENT_PROXY
+	if (ip_chksock(skb))
+                goto local_pkt;
+#endif
+
+	if (iph->ttl <= 1)
+                goto too_many_hops;
+
+	if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
+                goto sr_failed;
+
+	/*
+	 *	Having picked a route we can now send the frame out
+	 *	after asking the firewall permission to do so.
+	 */
+
+	skb->priority = rt_tos2priority(iph->tos);
+	dev2 = rt->u.dst.dev;
+	mtu = rt->u.dst.pmtu;
+
+#ifdef CONFIG_NET_SECURITY
+	call_fw_firewall(PF_SECURITY, dev2, NULL, &mtu, NULL);
+#endif	
+	
+	/*
+	 *	We now generate an ICMP HOST REDIRECT giving the route
+	 *	we calculated.
+	 */
+	if (rt->rt_flags&RTCF_DOREDIRECT && !opt->srr)
+		ip_rt_send_redirect(skb);
+
+	/* We are about to mangle packet. Copy it! */
+	if ((skb = skb_cow(skb, dev2->hard_header_len)) == NULL)
+		return -1;
+	iph = skb->nh.iph;
+	opt = &(IPCB(skb)->opt);
+
+	/* Decrease ttl after skb cow done */
+	ip_decrease_ttl(iph);
+
+	/*
+	 * We now may allocate a new buffer, and copy the datagram into it.
+	 * If the indicated interface is up and running, kick it.
+	 */
+
+	if (skb->len > mtu && (ntohs(iph->frag_off) & IP_DF))
+		goto frag_needed;
+
+#ifdef CONFIG_IP_ROUTE_NAT
+	if (rt->rt_flags & RTCF_NAT) {
+		if (ip_do_nat(skb)) {
+			kfree_skb(skb);
+			return -1;
+		}
+	}
+#endif
+
+#ifdef CONFIG_IP_MASQUERADE
+	if(!(IPCB(skb)->flags&IPSKB_MASQUERADED)) {
+		/* 
+		 *	Check that any ICMP packets are not for a 
+		 *	masqueraded connection.  If so rewrite them
+		 *	and skip the firewall checks
+		 */
+		if (iph->protocol == IPPROTO_ICMP) {
+			__u32 maddr;
+#ifdef CONFIG_IP_MASQUERADE_ICMP
+			struct icmphdr *icmph = (struct icmphdr *)((char*)iph + (iph->ihl << 2));
+			if ((icmph->type==ICMP_DEST_UNREACH)||
+			    (icmph->type==ICMP_SOURCE_QUENCH)||
+			    (icmph->type==ICMP_TIME_EXCEEDED))
+			{
+#endif
+				maddr = inet_select_addr(dev2, rt->rt_gateway, RT_SCOPE_UNIVERSE);
+				fw_res = ip_fw_masq_icmp(&skb, maddr);
+			        if (fw_res < 0) {
+					kfree_skb(skb);
+					return -1;
+				}
+
+				if (fw_res)
+					/* ICMP matched - skip firewall */
+					goto skip_call_fw_firewall;
+#ifdef CONFIG_IP_MASQUERADE_ICMP
+			       }
+#endif				
+		}
+		if (rt->rt_flags&RTCF_MASQ)
+			goto skip_call_fw_firewall;
+#endif /* CONFIG_IP_MASQUERADE */
+
+#ifdef CONFIG_FIREWALL
+		fw_res=call_fw_firewall(PF_INET, dev2, iph, NULL, &skb);
+		switch (fw_res) {
+		case FW_ACCEPT:
+		case FW_MASQUERADE:
+			break;
+		case FW_REJECT:
+			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
+			/* fall thru */
+		default:
+			kfree_skb(skb);
+			return -1;
+		}
+#endif
+
+#ifdef CONFIG_IP_MASQUERADE
+	}
+
+skip_call_fw_firewall:
+	/*
+	 * If this fragment needs masquerading, make it so...
+	 * (Don't masquerade de-masqueraded fragments)
+	 */
+	if (!(IPCB(skb)->flags&IPSKB_MASQUERADED) &&
+	    (fw_res==FW_MASQUERADE || rt->rt_flags&RTCF_MASQ)) {
+		u32 maddr;
+
+#ifdef CONFIG_IP_ROUTE_NAT
+		maddr = (rt->rt_flags&RTCF_MASQ) ? rt->rt_src_map : 0;
+
+		if (maddr == 0)
+#endif
+			maddr = inet_select_addr(dev2, rt->rt_gateway, RT_SCOPE_UNIVERSE);
+
+			if (ip_fw_masquerade(&skb, maddr) < 0) {
+				kfree_skb(skb);
+				return -1;
+			} else {
+				/*
+				 *      Masquerader may have changed skb 
+				 */
+				iph = skb->nh.iph;
+				opt = &(IPCB(skb)->opt);
+			}
+	}
+#endif
+
+
+#ifdef CONFIG_FIREWALL
+	if ((fw_res = call_out_firewall(PF_INET, dev2, iph, NULL,&skb)) < FW_ACCEPT) {
+		/* FW_ACCEPT and FW_MASQUERADE are treated equal:
+		   masquerading is only supported via forward rules */
+		if (fw_res == FW_REJECT)
+			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
+		kfree_skb(skb);
+		return -1;
+	}
+#endif
+
+	ip_statistics.IpForwDatagrams++;
+
+	if (opt->optlen == 0) {
+#ifdef CONFIG_NET_FASTROUTE
+		if (rt->rt_flags&RTCF_FAST && !netdev_fastroute_obstacles) {
+			unsigned h = ((*(u8*)&rt->key.dst)^(*(u8*)&rt->key.src))&NETDEV_FASTROUTE_HMASK;
+			/* Time to switch to functional programming :-) */
+			dst_release_irqwait(xchg(&skb->dev->fastpath[h], dst_clone(&rt->u.dst)));
+		}
+#endif
+		ip_send(skb);
+		return 0;
+	}
+
+	ip_forward_options(skb);
+	ip_send(skb);
+	return 0;
+
+#ifdef CONFIG_IP_TRANSPARENT_PROXY
+local_pkt:
+	return ip_local_deliver(skb);
+#endif
+
+frag_needed:
+	ip_statistics.IpFragFails++;
+	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
+        goto drop;
+
+sr_failed:
+        /*
+	 *	Strict routing permits no gatewaying
+	 */
+         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_SR_FAILED, 0);
+         goto drop;
+
+too_many_hops:
+        /* Tell the sender its packet died... */
+        icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0);
+drop:
+	kfree_skb(skb);
+	return -1;
+}
diff --git a/pfinet/linux-src/net/ipv4/ip_fragment.c b/pfinet/linux-src/net/ipv4/ip_fragment.c
new file mode 100644
index 00000000..f066e607
--- /dev/null
+++ b/pfinet/linux-src/net/ipv4/ip_fragment.c
@@ -0,0 +1,593 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		The IP fragmentation functionality.
+ *		
+ * Version:	$Id: ip_fragment.c,v 1.40 1999/03/20 23:58:34 davem Exp $
+ *
+ * Authors:	Fred N. van Kempen <waltje@uWalt.NL.Mugnet.ORG>
+ *		Alan Cox <Alan.Cox@linux.org>
+ *
+ * Fixes:
+ *		Alan Cox	:	Split from ip.c , see ip_input.c for history.
+ *		David S. Miller :	Begin massive cleanup...
+ *		Andi Kleen	:	Add sysctls.
+ *		xxxx		:	Overlapfrag bug.
+ *		Ultima          :       ip_expire() kernel panic.
+ *		Bill Hawes	:	Frag accounting and evictor fixes.
+ *		John McDonald	:	0 length frag bug.
+ */
+
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/icmp.h>
+#include <linux/netdevice.h>
+#include <net/sock.h>
+#include <net/ip.h>
+#include <net/icmp.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/inet.h>
+#include <linux/firewall.h>
+#include <linux/ip_fw.h>
+
+/* Fragment cache limits. We will commit 256K at one time. Should we
+ * cross that limit we will prune down to 192K. This should cope with
+ * even the most extreme cases without allowing an attacker to measurably
+ * harm machine performance.
+ */
+int sysctl_ipfrag_high_thresh = 256*1024;
+int sysctl_ipfrag_low_thresh = 192*1024;
+
+int sysctl_ipfrag_time = IP_FRAG_TIME;
+
+/* Describe an IP fragment. */
+struct ipfrag {
+	int		offset;		/* offset of fragment in IP datagram	*/
+	int		end;		/* last byte of data in datagram	*/
+	int		len;		/* length of this fragment		*/
+	struct sk_buff	*skb;		/* complete received fragment		*/
+	unsigned char	*ptr;		/* pointer into real fragment data	*/
+	struct ipfrag	*next;		/* linked list pointers			*/
+	struct ipfrag	*prev;
+};
+
+/* Describe an entry in the "incomplete datagrams" queue. */
+struct ipq {
+	struct iphdr	*iph;		/* pointer to IP header			*/
+	struct ipq	*next;		/* linked list pointers			*/
+	struct ipfrag	*fragments;	/* linked list of received fragments	*/
+	int		len;		/* total length of original datagram	*/
+	short		ihlen;		/* length of the IP header		*/	
+	struct timer_list timer;	/* when will this queue expire?		*/
+	struct ipq	**pprev;
+	struct device	*dev;		/* Device - for icmp replies */
+};
+
+#define IPQ_HASHSZ	64
+
+struct ipq *ipq_hash[IPQ_HASHSZ];
+
+#define ipqhashfn(id, saddr, daddr, prot) \
+	((((id) >> 1) ^ (saddr) ^ (daddr) ^ (prot)) & (IPQ_HASHSZ - 1))
+
+atomic_t ip_frag_mem = ATOMIC_INIT(0);		/* Memory used for fragments */
+
+/* Memory Tracking Functions. */
+extern __inline__ void frag_kfree_skb(struct sk_buff *skb)
+{
+	atomic_sub(skb->truesize, &ip_frag_mem);
+	kfree_skb(skb);
+}
+
+extern __inline__ void frag_kfree_s(void *ptr, int len)
+{
+	atomic_sub(len, &ip_frag_mem);
+	kfree(ptr);
+}
+ 
+extern __inline__ void *frag_kmalloc(int size, int pri)
+{
+	void *vp = kmalloc(size, pri);
+
+	if(!vp)
+		return NULL;
+	atomic_add(size, &ip_frag_mem);
+	return vp;
+}
+ 
+/* Create a new fragment entry. */
+static struct ipfrag *ip_frag_create(int offset, int end,
+				     struct sk_buff *skb, unsigned char *ptr)
+{
+	struct ipfrag *fp;
+
+	fp = (struct ipfrag *) frag_kmalloc(sizeof(struct ipfrag), GFP_ATOMIC);
+	if (fp == NULL)
+		goto out_nomem;
+
+	/* Fill in the structure. */
+	fp->offset = offset;
+	fp->end = end;
+	fp->len = end - offset;
+	fp->skb = skb;
+	fp->ptr = ptr;
+	fp->next = fp->prev = NULL;
+	
+	/* Charge for the SKB as well. */
+	atomic_add(skb->truesize, &ip_frag_mem);
+
+	return(fp);
+
+out_nomem:
+	NETDEBUG(printk(KERN_ERR "IP: frag_create: no memory left !\n"));
+	return(NULL);
+}
+
+/* Find the correct entry in the "incomplete datagrams" queue for
+ * this IP datagram, and return the queue entry address if found.
+ */
+static inline struct ipq *ip_find(struct iphdr *iph, struct dst_entry *dst)
+{
+	__u16 id = iph->id;
+	__u32 saddr = iph->saddr;
+	__u32 daddr = iph->daddr;
+	__u8 protocol = iph->protocol;
+	unsigned int hash = ipqhashfn(id, saddr, daddr, protocol);
+	struct ipq *qp;
+
+	/* Always, we are in a BH context, so no locking.  -DaveM */
+	for(qp = ipq_hash[hash]; qp; qp = qp->next) {
+		if(qp->iph->id == id		&&
+		   qp->iph->saddr == saddr	&&
+		   qp->iph->daddr == daddr	&&
+		   qp->iph->protocol == protocol) {
+			del_timer(&qp->timer);
+			break;
+		}
+	}
+	return qp;
+}
+
+/* Remove an entry from the "incomplete datagrams" queue, either
+ * because we completed, reassembled and processed it, or because
+ * it timed out.
+ *
+ * This is called _only_ from BH contexts, on packet reception
+ * processing and from frag queue expiration timers.  -DaveM
+ */
+static void ip_free(struct ipq *qp)
+{
+	struct ipfrag *fp;
+
+	/* Stop the timer for this entry. */
+	del_timer(&qp->timer);
+
+	/* Remove this entry from the "incomplete datagrams" queue. */
+	if(qp->next)
+		qp->next->pprev = qp->pprev;
+	*qp->pprev = qp->next;
+
+	/* Release all fragment data. */
+	fp = qp->fragments;
+	while (fp) {
+		struct ipfrag *xp = fp->next;
+
+		frag_kfree_skb(fp->skb);
+		frag_kfree_s(fp, sizeof(struct ipfrag));
+		fp = xp;
+	}
+
+	/* Release the IP header. */
+	frag_kfree_s(qp->iph, 64 + 8);
+
+	/* Finally, release the queue descriptor itself. */
+	frag_kfree_s(qp, sizeof(struct ipq));
+}
+
+/*
+ * Oops, a fragment queue timed out.  Kill it and send an ICMP reply.
+ */
+static void ip_expire(unsigned long arg)
+{
+	struct ipq *qp = (struct ipq *) arg;
+
+  	if(!qp->fragments)
+        {	
+#ifdef IP_EXPIRE_DEBUG
+	  	printk("warning: possible ip-expire attack\n");
+#endif
+		goto out;
+  	}
+  
+	/* Send an ICMP "Fragment Reassembly Timeout" message. */
+	ip_statistics.IpReasmTimeout++;
+	ip_statistics.IpReasmFails++;   
+	icmp_send(qp->fragments->skb, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
+
+out:
+	/* Nuke the fragment queue. */
+	ip_free(qp);
+}
+
+/* Memory limiting on fragments.  Evictor trashes the oldest 
+ * fragment queue until we are back under the low threshold.
+ */
+static void ip_evictor(void)
+{
+	int i, progress;
+
+restart:
+	progress = 0;
+	/* FIXME: Make LRU queue of frag heads. -DaveM */
+	for (i = 0; i < IPQ_HASHSZ; i++) {
+		struct ipq *qp;
+		if (atomic_read(&ip_frag_mem) <= sysctl_ipfrag_low_thresh)
+			return;
+		/* We are in a BH context, so these queue
+		 * accesses are safe.  -DaveM
+		 */
+		qp = ipq_hash[i];
+		if (qp) {
+			/* find the oldest queue for this hash bucket */
+			while (qp->next)
+				qp = qp->next;
+			ip_free(qp);
+			progress = 1;
+		}
+	}
+	if (progress)
+		goto restart;
+	panic("ip_evictor: memcount");
+}
+
+/* Add an entry to the 'ipq' queue for a newly received IP datagram.
+ * We will (hopefully :-) receive all other fragments of this datagram
+ * in time, so we just create a queue for this datagram, in which we
+ * will insert the received fragments at their respective positions.
+ */
+static struct ipq *ip_create(struct sk_buff *skb, struct iphdr *iph)
+{
+	struct ipq *qp;
+	unsigned int hash;
+	int ihlen;
+
+	qp = (struct ipq *) frag_kmalloc(sizeof(struct ipq), GFP_ATOMIC);
+	if (qp == NULL)
+		goto out_nomem;
+
+	/* Allocate memory for the IP header (plus 8 octets for ICMP). */
+	ihlen = iph->ihl * 4;
+
+	qp->iph = (struct iphdr *) frag_kmalloc(64 + 8, GFP_ATOMIC);
+	if (qp->iph == NULL)
+		goto out_free;
+
+	memcpy(qp->iph, iph, ihlen + 8);
+	qp->len = 0;
+	qp->ihlen = ihlen;
+	qp->fragments = NULL;
+	qp->dev = skb->dev;
+
+	/* Initialize a timer for this entry. */
+	init_timer(&qp->timer);
+	qp->timer.expires = 0;			/* (to be set later)	*/
+	qp->timer.data = (unsigned long) qp;	/* pointer to queue	*/
+	qp->timer.function = ip_expire;		/* expire function	*/
+
+	/* Add this entry to the queue. */
+	hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol);
+
+	/* We are in a BH context, no locking necessary.  -DaveM */
+	if((qp->next = ipq_hash[hash]) != NULL)
+		qp->next->pprev = &qp->next;
+	ipq_hash[hash] = qp;
+	qp->pprev = &ipq_hash[hash];
+
+	return qp;
+
+out_free:
+	frag_kfree_s(qp, sizeof(struct ipq));
+out_nomem:
+	NETDEBUG(printk(KERN_ERR "IP: create: no memory left !\n"));
+	return(NULL);
+}
+
+/* See if a fragment queue is complete. */
+static int ip_done(struct ipq *qp)
+{
+	struct ipfrag *fp;
+	int offset;
+
+	/* Only possible if we received the final fragment. */
+	if (qp->len == 0)
+		return 0;
+
+	/* Check all fragment offsets to see if they connect. */
+	fp = qp->fragments;
+	offset = 0;
+	while (fp) {
+		if (fp->offset > offset)
+			return(0);	/* fragment(s) missing */
+		offset = fp->end;
+		fp = fp->next;
+	}
+
+	/* All fragments are present. */
+	return 1;
+}
+
+/* Build a new IP datagram from all its fragments.
+ *
+ * FIXME: We copy here because we lack an effective way of handling lists
+ * of bits on input. Until the new skb data handling is in I'm not going
+ * to touch this with a bargepole. 
+ */
+static struct sk_buff *ip_glue(struct ipq *qp)
+{
+	struct sk_buff *skb;
+	struct iphdr *iph;
+	struct ipfrag *fp;
+	unsigned char *ptr;
+	int count, len;
+
+	/* Allocate a new buffer for the datagram. */
+	len = qp->ihlen + qp->len;
+	
+	if(len > 65535)
+		goto out_oversize;
+	
+	skb = dev_alloc_skb(len);
+	if (!skb)
+		goto out_nomem;
+
+	/* Fill in the basic details. */
+	skb->mac.raw = ptr = skb->data;
+	skb->nh.iph = iph = (struct iphdr *) skb_put(skb, len);
+
+	/* Copy the original IP headers into the new buffer. */
+	memcpy(ptr, qp->iph, qp->ihlen);
+	ptr += qp->ihlen;
+
+	/* Copy the data portions of all fragments into the new buffer. */
+	fp = qp->fragments;
+	count = qp->ihlen;
+	while(fp) {
+		if ((fp->len <= 0) || ((count + fp->len) > skb->len))
+			goto out_invalid;
+		memcpy((ptr + fp->offset), fp->ptr, fp->len);
+		if (count == qp->ihlen) {
+			skb->dst = dst_clone(fp->skb->dst);
+			skb->dev = fp->skb->dev;
+		}
+		count += fp->len;
+		fp = fp->next;
+	}
+
+	skb->pkt_type = qp->fragments->skb->pkt_type;
+	skb->protocol = qp->fragments->skb->protocol;
+	/*
+	*  Clearly bogus, because security markings of the individual
+	*  fragments should have been checked for consistency before
+	*  gluing, and intermediate coalescing of fragments may have
+	*  taken place in ip_defrag() before ip_glue() ever got called.
+	*  If we're not going to do the consistency checking, we might
+	*  as well take the value associated with the first fragment.
+	*	--rct
+	*/
+	skb->security = qp->fragments->skb->security;
+
+	/* Done with all fragments. Fixup the new IP header. */
+	iph = skb->nh.iph;
+	iph->frag_off = 0;
+	iph->tot_len = htons(count);
+	ip_statistics.IpReasmOKs++;
+	return skb;
+
+out_invalid:
+	NETDEBUG(printk(KERN_ERR
+			"Invalid fragment list: Fragment over size.\n"));
+	kfree_skb(skb);
+	goto out_fail;
+out_nomem:
+ 	NETDEBUG(printk(KERN_ERR 
+			"IP: queue_glue: no memory for gluing queue %p\n",
+			qp));
+	goto out_fail;
+out_oversize:
+	if (net_ratelimit())
+		printk(KERN_INFO
+			"Oversized IP packet from %d.%d.%d.%d.\n",
+			NIPQUAD(qp->iph->saddr));
+out_fail:
+	ip_statistics.IpReasmFails++;
+	return NULL;
+}
+
+/* Process an incoming IP datagram fragment. */
+struct sk_buff *ip_defrag(struct sk_buff *skb)
+{
+	struct iphdr *iph = skb->nh.iph;
+	struct ipfrag *prev, *next, *tmp, *tfp;
+	struct ipq *qp;
+	unsigned char *ptr;
+	int flags, offset;
+	int i, ihl, end;
+	
+	ip_statistics.IpReasmReqds++;
+
+	/* Start by cleaning up the memory. */
+	if (atomic_read(&ip_frag_mem) > sysctl_ipfrag_high_thresh)
+		ip_evictor();
+
+	/*
+	 * Look for the entry for this IP datagram in the
+	 * "incomplete datagrams" queue. If found, the
+	 * timer is removed.
+	 */
+	qp = ip_find(iph, skb->dst);
+
+	/* Is this a non-fragmented datagram? */
+	offset = ntohs(iph->frag_off);
+	flags = offset & ~IP_OFFSET;
+	offset &= IP_OFFSET;
+
+	offset <<= 3;		/* offset is in 8-byte chunks */
+	ihl = iph->ihl * 4;
+
+	/*
+	 * Check whether to create a fresh queue entry. If the
+	 * queue already exists, its timer will be restarted as
+	 * long as we continue to receive fragments.
+	 */
+	if (qp) {
+		/* ANK. If the first fragment is received,
+		 * we should remember the correct IP header (with options)
+		 */
+	        if (offset == 0) {
+			/* Fragmented frame replaced by unfragmented copy? */
+			if ((flags & IP_MF) == 0)
+				goto out_freequeue;
+			qp->ihlen = ihl;
+			memcpy(qp->iph, iph, (ihl + 8));
+		}
+	} else {
+		/* Fragmented frame replaced by unfragmented copy? */
+		if ((offset == 0) && ((flags & IP_MF) == 0))
+			goto out_skb;
+
+		/* If we failed to create it, then discard the frame. */
+		qp = ip_create(skb, iph);
+		if (!qp)
+			goto out_freeskb;
+	}
+	
+	/* Attempt to construct an oversize packet. */
+	if((ntohs(iph->tot_len) + ((int) offset)) > 65535)
+		goto out_oversize;
+
+	/* Determine the position of this fragment. */
+	end = offset + ntohs(iph->tot_len) - ihl;
+
+	/* Is this the final fragment? */
+	if ((flags & IP_MF) == 0)
+		qp->len = end;
+
+	/* Find out which fragments are in front and at the back of us
+	 * in the chain of fragments so far.  We must know where to put
+	 * this fragment, right?
+	 */
+	prev = NULL;
+	for(next = qp->fragments; next != NULL; next = next->next) {
+		if (next->offset >= offset)
+			break;	/* bingo! */
+		prev = next;
+	}
+
+	/* Point into the IP datagram 'data' part. */
+	ptr = skb->data + ihl;
+
+	/* We found where to put this one.  Check for overlap with
+	 * preceding fragment, and, if needed, align things so that
+	 * any overlaps are eliminated.
+	 */
+	if ((prev != NULL) && (offset < prev->end)) {
+		i = prev->end - offset;
+		offset += i;	/* ptr into datagram */
+		ptr += i;	/* ptr into fragment data */
+	}
+
+	/* Look for overlap with succeeding segments.
+	 * If we can merge fragments, do it.
+	 */
+	for (tmp = next; tmp != NULL; tmp = tfp) {
+		tfp = tmp->next;
+		if (tmp->offset >= end)
+			break;		/* no overlaps at all	*/
+
+		i = end - next->offset;	/* overlap is 'i' bytes */
+		tmp->len -= i;		/* so reduce size of	*/
+		tmp->offset += i;	/* next fragment	*/
+		tmp->ptr += i;
+
+		/* If we get a frag size of <= 0, remove it and the packet
+		 * that it goes with.
+		 */
+		if (tmp->len <= 0) {
+			if (tmp->prev != NULL)
+				tmp->prev->next = tmp->next;
+			else
+				qp->fragments = tmp->next;
+
+			if (tmp->next != NULL)
+				tmp->next->prev = tmp->prev;
+			
+			/* We have killed the original next frame. */
+			next = tfp;
+
+			frag_kfree_skb(tmp->skb);
+			frag_kfree_s(tmp, sizeof(struct ipfrag));
+		}
+	}
+
+	/*
+	 * Create a fragment to hold this skb.
+	 * No memory to save the fragment? throw the lot ...
+	 */
+	tfp = ip_frag_create(offset, end, skb, ptr);
+	if (!tfp)
+		goto out_freeskb;
+
+	/* Insert this fragment in the chain of fragments. */
+	tfp->prev = prev;
+	tfp->next = next;
+	if (prev != NULL)
+		prev->next = tfp;
+	else
+		qp->fragments = tfp;
+
+	if (next != NULL)
+		next->prev = tfp;
+
+	/* OK, so we inserted this new fragment into the chain.
+	 * Check if we now have a full IP datagram which we can
+	 * bump up to the IP layer...
+	 */
+	if (ip_done(qp)) {
+		/* Glue together the fragments. */
+ 		skb = ip_glue(qp);
+		/* Free the queue entry. */
+out_freequeue:
+		ip_free(qp);
+out_skb:
+		return skb;
+	}
+
+	/*
+	 * The queue is still active ... reset its timer.
+	 */
+out_timer:
+	mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time); /* ~ 30 seconds */
+out:
+	return NULL;
+
+	/*
+	 * Error exits ... we need to reset the timer if there's a queue.
+	 */
+out_oversize:
+	if (net_ratelimit())
+		printk(KERN_INFO "Oversized packet received from %d.%d.%d.%d\n",
+			NIPQUAD(iph->saddr));
+	/* the skb isn't in a fragment, so fall through to free it */
+out_freeskb:
+	kfree_skb(skb);
+	ip_statistics.IpReasmFails++;
+	if (qp)
+		goto out_timer;
+	goto out;
+}
diff --git a/pfinet/linux-src/net/ipv4/ip_fw.c b/pfinet/linux-src/net/ipv4/ip_fw.c
new file mode 100644
index 00000000..99a91d53
--- /dev/null
+++ b/pfinet/linux-src/net/ipv4/ip_fw.c
@@ -0,0 +1,1759 @@
+/*
+ * This code is heavily based on the code on the old ip_fw.c code; see below for
+ * copyrights and attributions of the old code.  This code is basically GPL.
+ *
+ * 15-Aug-1997: Major changes to allow graphs for firewall rules.
+ *              Paul Russell <Paul.Russell@rustcorp.com.au> and
+ *		Michael Neuling <Michael.Neuling@rustcorp.com.au> 
+ * 24-Aug-1997: Generalised protocol handling (not just TCP/UDP/ICMP).
+ *              Added explicit RETURN from chains.
+ *              Removed TOS mangling (done in ipchains 1.0.1).
+ *              Fixed read & reset bug by reworking proc handling.
+ *              Paul Russell <Paul.Russell@rustcorp.com.au>
+ * 28-Sep-1997: Added packet marking for net sched code.
+ *              Removed fw_via comparisons: all done on device name now,
+ *              similar to changes in ip_fw.c in DaveM's CVS970924 tree.
+ *              Paul Russell <Paul.Russell@rustcorp.com.au>
+ * 2-Nov-1997:  Moved types across to __u16, etc.
+ *              Added inverse flags.
+ *              Fixed fragment bug (in args to port_match).
+ *              Changed mark to only one flag (MARKABS).
+ * 21-Nov-1997: Added ability to test ICMP code.
+ * 19-Jan-1998: Added wildcard interfaces.
+ * 6-Feb-1998:  Merged 2.0 and 2.1 versions.
+ *              Initialised ip_masq for 2.0.x version.
+ *              Added explicit NETLINK option for 2.1.x version.
+ *              Added packet and byte counters for policy matches.
+ * 26-Feb-1998: Fixed race conditions, added SMP support.
+ * 18-Mar-1998: Fix SMP, fix race condition fix.
+ * 1-May-1998:  Remove caching of device pointer.
+ * 12-May-1998: Allow tiny fragment case for TCP/UDP.
+ * 15-May-1998: Treat short packets as fragments, don't just block.
+ * 3-Jan-1999:  Fixed serious procfs security hole -- users should never
+ *              be allowed to view the chains!
+ *              Marc Santoro <ultima@snicker.emoti.com>
+ * 29-Jan-1999: Locally generated bogus IPs dealt with, rather than crash
+ *              during dump_packet. --RR.
+ * 19-May-1999: Star Wars: The Phantom Menace opened.  Rule num
+ *		printed in log (modified from Michael Hasenstein's patch).
+ *		Added SYN in log message. --RR
+ * 23-Jul-1999: Fixed small fragment security exposure opened on 15-May-1998.
+ *              John McDonald <jm@dataprotect.com>
+ *              Thomas Lopatic <tl@dataprotect.com>
+ */
+
+/*
+ *
+ * The origina Linux port was done Alan Cox, with changes/fixes from
+ * Pauline Middlelink, Jos Vos, Thomas Quinot, Wouter Gadeyne, Juan
+ * Jose Ciarlante, Bernd Eckenfels, Keith Owens and others.
+ * 
+ * Copyright from the original FreeBSD version follows:
+ *
+ * Copyright (c) 1993 Daniel Boulet
+ * Copyright (c) 1994 Ugen J.S.Antsilevich
+ *
+ * Redistribution and use in source forms, with and without modification,
+ * are permitted provided that this entire comment appears intact.
+ *
+ * Redistribution in binary form may occur without any restrictions.
+ * Obviously, it would be nice if you gave credit where credit is due
+ * but requiring it would be too onerous.
+ *
+ * This software is provided ``AS IS'' without any warranties of any kind.  */
+
+
+#include <linux/config.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/icmp.h>
+#include <linux/udp.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/route.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <net/sock.h>
+#include <net/icmp.h>
+#include <linux/netlink.h>
+#include <linux/init.h>
+#include <linux/firewall.h>
+#include <linux/ip_fw.h>
+
+#ifdef CONFIG_IP_MASQUERADE
+#include <net/ip_masq.h>
+#endif
+
+#include <net/checksum.h>
+#include <linux/proc_fs.h>
+#include <linux/stat.h>
+
+/* Understanding locking in this code: (thanks to Alan Cox for using
+ * little words to explain this to me). -- PR
+ *
+ * In UP, there can be two packets traversing the chains:
+ * 1) A packet from the current userspace context
+ * 2) A packet off the bh handlers (timer or net).
+ *
+ * For SMP (kernel v2.1+), multiply this by # CPUs.
+ *
+ * [Note that this in not correct for 2.2 - because the socket code always
+ *  uses lock_kernel() to serialize, and bottom halves (timers and net_bhs)
+ *  only run on one CPU at a time.  This will probably change for 2.3.
+ *  It is still good to use spinlocks because that avoids the global cli() 
+ *  for updating the tables, which is rather costly in SMP kernels -AK]
+ *
+ * This means counters and backchains can get corrupted if no precautions
+ * are taken.
+ *
+ * To actually alter a chain on UP, we need only do a cli(), as this will
+ * stop a bh handler firing, as we are in the current userspace context
+ * (coming from a setsockopt()).
+ *
+ * On SMP, we need a write_lock_irqsave(), which is a simple cli() in
+ * UP.
+ *
+ * For backchains and counters, we use an array, indexed by
+ * [cpu_number_map[smp_processor_id()]*2 + !in_interrupt()]; the array is of 
+ * size [smp_num_cpus*2].  For v2.0, smp_num_cpus is effectively 1.  So,
+ * confident of uniqueness, we modify counters even though we only
+ * have a read lock (to read the counters, you need a write lock,
+ * though).  */
+
+/* Why I didn't use straight locking... -- PR
+ * 
+ * The backchains can be separated out of the ip_chains structure, and
+ * allocated as needed inside ip_fw_check().
+ *
+ * The counters, however, can't.  Trying to lock these means blocking
+ * interrupts every time we want to access them.  This would suck HARD
+ * performance-wise.  Not locking them leads to possible corruption,
+ * made worse on 32-bit machines (counters are 64-bit).  */
+
+/*#define DEBUG_IP_FIREWALL*/
+/*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
+/*#define DEBUG_IP_FIREWALL_USER*/
+/*#define DEBUG_IP_FIREWALL_LOCKING*/
+
+#ifdef CONFIG_IP_FIREWALL_NETLINK
+static struct sock *ipfwsk;
+#endif
+
+#ifdef __SMP__
+#define SLOT_NUMBER() (cpu_number_map[smp_processor_id()]*2 + !in_interrupt())
+#else
+#define SLOT_NUMBER() (!in_interrupt())
+#endif
+#define NUM_SLOTS (smp_num_cpus*2)
+
+#define SIZEOF_STRUCT_IP_CHAIN (sizeof(struct ip_chain) \
+				+ NUM_SLOTS*sizeof(struct ip_reent))
+#define SIZEOF_STRUCT_IP_FW_KERNEL (sizeof(struct ip_fwkernel) \
+				    + NUM_SLOTS*sizeof(struct ip_counters))
+
+#ifdef DEBUG_IP_FIREWALL_LOCKING
+static unsigned int fwc_rlocks, fwc_wlocks;
+#define FWC_DEBUG_LOCK(d)			\
+do {						\
+	FWC_DONT_HAVE_LOCK(d);			\
+	d |= (1 << SLOT_NUMBER());		\
+} while (0)
+
+#define FWC_DEBUG_UNLOCK(d)			\
+do {						\
+	FWC_HAVE_LOCK(d);			\
+	d &= ~(1 << SLOT_NUMBER());		\
+} while (0)
+
+#define FWC_DONT_HAVE_LOCK(d)					\
+do {								\
+	if ((d) & (1 << SLOT_NUMBER()))				\
+		printk("%s:%i: Got lock on %i already!\n", 	\
+		       __FILE__, __LINE__, SLOT_NUMBER());	\
+} while(0)
+
+#define FWC_HAVE_LOCK(d)				\
+do {							\
+	if (!((d) & (1 << SLOT_NUMBER())))		\
+	printk("%s:%i:No lock on %i!\n", 		\
+	       __FILE__, __LINE__, SLOT_NUMBER());	\
+} while (0)
+
+#else
+#define FWC_DEBUG_LOCK(d) do { } while(0)
+#define FWC_DEBUG_UNLOCK(d) do { } while(0)
+#define FWC_DONT_HAVE_LOCK(d) do { } while(0)
+#define FWC_HAVE_LOCK(d) do { } while(0)
+#endif /*DEBUG_IP_FIRWALL_LOCKING*/
+
+#define FWC_READ_LOCK(l) do { FWC_DEBUG_LOCK(fwc_rlocks); read_lock(l); } while (0)
+#define FWC_WRITE_LOCK(l) do { FWC_DEBUG_LOCK(fwc_wlocks); write_lock(l); } while (0)
+#define FWC_READ_LOCK_IRQ(l,f) do { FWC_DEBUG_LOCK(fwc_rlocks); read_lock_irqsave(l,f); } while (0)
+#define FWC_WRITE_LOCK_IRQ(l,f) do { FWC_DEBUG_LOCK(fwc_wlocks); write_lock_irqsave(l,f); } while (0)
+#define FWC_READ_UNLOCK(l) do { FWC_DEBUG_UNLOCK(fwc_rlocks); read_unlock(l); } while (0)
+#define FWC_WRITE_UNLOCK(l) do { FWC_DEBUG_UNLOCK(fwc_wlocks); write_unlock(l); } while (0)
+#define FWC_READ_UNLOCK_IRQ(l,f) do { FWC_DEBUG_UNLOCK(fwc_rlocks); read_unlock_irqrestore(l,f); } while (0)
+#define FWC_WRITE_UNLOCK_IRQ(l,f) do { FWC_DEBUG_UNLOCK(fwc_wlocks); write_unlock_irqrestore(l,f); } while (0)
+
+struct ip_chain;
+
+struct ip_counters
+{
+	__u64 pcnt, bcnt;			/* Packet and byte counters */
+};
+
+struct ip_fwkernel
+{
+	struct ip_fw ipfw;
+	struct ip_fwkernel *next;	/* where to go next if current
+					 * rule doesn't match */
+	struct ip_chain *branch;	/* which branch to jump to if
+					 * current rule matches */
+	int simplebranch;		/* Use this if branch == NULL */
+	struct ip_counters counters[0]; /* Actually several of these */
+};
+
+struct ip_reent 
+{
+	struct ip_chain *prevchain;	/* Pointer to referencing chain */
+	struct ip_fwkernel *prevrule;	/* Pointer to referencing rule */
+	struct ip_counters counters;
+};
+
+struct ip_chain
+{
+	ip_chainlabel label;	    /* Defines the label for each block */
+ 	struct ip_chain *next;	    /* Pointer to next block */
+	struct ip_fwkernel *chain;  /* Pointer to first rule in block */
+	__u32 refcount; 	    /* Number of refernces to block */
+	int policy;		    /* Default rule for chain.  Only *
+				     * used in built in chains */
+	struct ip_reent reent[0];   /* Actually several of these */
+};
+
+/*
+ *	Implement IP packet firewall
+ */
+
+#ifdef DEBUG_IP_FIREWALL 
+#define dprintf(format, args...)  printk(format , ## args)
+#else
+#define dprintf(format, args...)
+#endif
+
+#ifdef DEBUG_IP_FIREWALL_USER
+#define duprintf(format, args...) printk(format , ## args)
+#else
+#define duprintf(format, args...)
+#endif
+
+/* Lock around ip_fw_chains linked list structure */
+rwlock_t ip_fw_lock = RW_LOCK_UNLOCKED;
+
+/* Head of linked list of fw rules */
+static struct ip_chain *ip_fw_chains; 
+
+#define IP_FW_INPUT_CHAIN ip_fw_chains
+#define IP_FW_FORWARD_CHAIN (ip_fw_chains->next)
+#define IP_FW_OUTPUT_CHAIN (ip_fw_chains->next->next)
+
+/* Returns 1 if the port is matched by the range, 0 otherwise */
+extern inline int port_match(__u16 min, __u16 max, __u16 port,
+			     int frag, int invert)
+{
+	if (frag) /* Fragments fail ANY port test. */
+		return (min == 0 && max == 0xFFFF);
+	else return (port >= min && port <= max) ^ invert;
+}
+
+/* Returns whether matches rule or not. */
+static int ip_rule_match(struct ip_fwkernel *f, 
+			 const char *ifname, 
+			 struct iphdr *ip, 
+			 char tcpsyn,
+			 __u16 src_port, __u16 dst_port,
+			 char isfrag)
+{
+#define FWINV(bool,invflg) ((bool) ^ !!(f->ipfw.fw_invflg & invflg))
+	/*
+	 *	This is a bit simpler as we don't have to walk
+	 *	an interface chain as you do in BSD - same logic
+	 *	however.
+	 */
+
+	if (FWINV((ip->saddr&f->ipfw.fw_smsk.s_addr) != f->ipfw.fw_src.s_addr,
+		  IP_FW_INV_SRCIP)
+	    || FWINV((ip->daddr&f->ipfw.fw_dmsk.s_addr)!=f->ipfw.fw_dst.s_addr,
+		     IP_FW_INV_DSTIP)) {
+		dprintf("Source or dest mismatch.\n");
+
+		dprintf("SRC: %u. Mask: %u. Target: %u.%s\n", ip->saddr,
+			f->ipfw.fw_smsk.s_addr, f->ipfw.fw_src.s_addr,
+			f->ipfw.fw_invflg & IP_FW_INV_SRCIP ? " (INV)" : "");
+		dprintf("DST: %u. Mask: %u. Target: %u.%s\n", ip->daddr,
+			f->ipfw.fw_dmsk.s_addr, f->ipfw.fw_dst.s_addr,
+			f->ipfw.fw_invflg & IP_FW_INV_DSTIP ? " (INV)" : "");
+		return 0;
+	}
+
+	/*
+	 *	Look for a VIA device match 
+	 */
+	if (f->ipfw.fw_flg & IP_FW_F_WILDIF) {
+	    if (FWINV(strncmp(ifname, f->ipfw.fw_vianame,
+			      strlen(f->ipfw.fw_vianame)) != 0,
+		      IP_FW_INV_VIA)) {	
+		dprintf("Wildcard interface mismatch.%s\n",
+			f->ipfw.fw_invflg & IP_FW_INV_VIA ? " (INV)" : "");
+		return 0;	/* Mismatch */
+	    }
+	}
+	else if (FWINV(strcmp(ifname, f->ipfw.fw_vianame) != 0,
+		       IP_FW_INV_VIA)) {
+	    dprintf("Interface name does not match.%s\n",
+		    f->ipfw.fw_invflg & IP_FW_INV_VIA
+		    ? " (INV)" : "");
+	    return 0;	/* Mismatch */
+	}
+
+	/*
+	 *	Ok the chain addresses match.
+	 */
+	
+	/* If we have a fragment rule but the packet is not a fragment
+	 * the we return zero */
+	if (FWINV((f->ipfw.fw_flg&IP_FW_F_FRAG) && !isfrag, IP_FW_INV_FRAG)) { 
+		dprintf("Fragment rule but not fragment.%s\n",
+			f->ipfw.fw_invflg & IP_FW_INV_FRAG ? " (INV)" : "");
+		return 0;
+	}
+
+	/* Fragment NEVER passes a SYN test, even an inverted one. */
+	if (FWINV((f->ipfw.fw_flg&IP_FW_F_TCPSYN) && !tcpsyn, IP_FW_INV_SYN)
+	    || (isfrag && (f->ipfw.fw_flg&IP_FW_F_TCPSYN))) {
+		dprintf("Rule requires SYN and packet has no SYN.%s\n",
+			f->ipfw.fw_invflg & IP_FW_INV_SYN ? " (INV)" : "");
+		return 0;
+	}
+
+	if (f->ipfw.fw_proto) {
+		/*
+		 *	Specific firewall - packet's protocol
+		 *	must match firewall's.
+		 */
+
+		if (FWINV(ip->protocol!=f->ipfw.fw_proto, IP_FW_INV_PROTO)) {
+			dprintf("Packet protocol %hi does not match %hi.%s\n",
+				ip->protocol, f->ipfw.fw_proto,
+				f->ipfw.fw_invflg&IP_FW_INV_PROTO ? " (INV)":"");
+			return 0;
+		}
+
+		/* For non TCP/UDP/ICMP, port range is max anyway. */
+		if (!port_match(f->ipfw.fw_spts[0], 
+				f->ipfw.fw_spts[1],
+				src_port, isfrag, 
+				!!(f->ipfw.fw_invflg&IP_FW_INV_SRCPT))
+		    || !port_match(f->ipfw.fw_dpts[0], 
+				   f->ipfw.fw_dpts[1],
+				   dst_port, isfrag, 
+				   !!(f->ipfw.fw_invflg
+				      &IP_FW_INV_DSTPT))) {
+		    dprintf("Port match failed.\n");
+		    return 0;
+		}
+	}
+
+	dprintf("Match succeeded.\n");
+	return 1;
+}
+
+static const char *branchname(struct ip_chain *branch,int simplebranch)
+{
+	if (branch)
+		return branch->label;
+	switch (simplebranch)
+	{
+	case FW_BLOCK: return IP_FW_LABEL_BLOCK;
+	case FW_ACCEPT: return IP_FW_LABEL_ACCEPT;
+	case FW_REJECT: return IP_FW_LABEL_REJECT;
+	case FW_REDIRECT: return IP_FW_LABEL_REDIRECT;
+	case FW_MASQUERADE: return IP_FW_LABEL_MASQUERADE;
+	case FW_SKIP: return "-";
+	case FW_SKIP+1: return IP_FW_LABEL_RETURN;
+	default:
+		return "UNKNOWN";
+	}
+}
+
+/*
+ * VERY ugly piece of code which actually
+ * makes kernel printf for matching packets...
+ */
+static void dump_packet(const struct iphdr *ip, 
+			const char *ifname,
+			struct ip_fwkernel *f, 
+			const ip_chainlabel chainlabel,
+			__u16 src_port, 
+			__u16 dst_port,
+			unsigned int count,
+			int syn)
+{
+	__u32 *opt = (__u32 *) (ip + 1);
+	int opti;
+	
+	if (f)
+	{
+		printk(KERN_INFO "Packet log: %s ",chainlabel);
+		
+		printk("%s ",branchname(f->branch,f->simplebranch));
+		if (f->simplebranch==FW_REDIRECT)
+			printk("%d ",f->ipfw.fw_redirpt);
+	}
+
+	printk("%s PROTO=%d %ld.%ld.%ld.%ld:%hu %ld.%ld.%ld.%ld:%hu"
+	       " L=%hu S=0x%2.2hX I=%hu F=0x%4.4hX T=%hu",
+	       ifname, ip->protocol,
+	       (ntohl(ip->saddr)>>24)&0xFF,
+	       (ntohl(ip->saddr)>>16)&0xFF,
+	       (ntohl(ip->saddr)>>8)&0xFF,
+	       (ntohl(ip->saddr))&0xFF,
+	       src_port,
+	       (ntohl(ip->daddr)>>24)&0xFF,
+	       (ntohl(ip->daddr)>>16)&0xFF,
+	       (ntohl(ip->daddr)>>8)&0xFF,
+	       (ntohl(ip->daddr))&0xFF,
+	       dst_port,
+	       ntohs(ip->tot_len), ip->tos, ntohs(ip->id),
+	       ntohs(ip->frag_off), ip->ttl);
+
+	for (opti = 0; opti < (ip->ihl - sizeof(struct iphdr) / 4); opti++)
+		printk(" O=0x%8.8X", *opt++);
+	printk(" %s(#%d)\n", syn ? "SYN " : /* "PENANCE" */ "", count);
+}
+
+/* function for checking chain labels for user space. */
+static int check_label(ip_chainlabel label)
+{
+	unsigned int i;
+	/* strlen must be < IP_FW_MAX_LABEL_LENGTH. */
+	for (i = 0; i < IP_FW_MAX_LABEL_LENGTH + 1; i++)
+		if (label[i] == '\0') return 1;
+
+	return 0;
+}	
+
+/*	This function returns a pointer to the first chain with a label
+ *	that matches the one given. */
+static struct ip_chain *find_label(ip_chainlabel label)
+{
+	struct ip_chain *tmp;
+	FWC_HAVE_LOCK(fwc_rlocks | fwc_wlocks);
+	for (tmp = ip_fw_chains; tmp; tmp = tmp->next)
+		if (strcmp(tmp->label,label) == 0)
+			break;
+	return tmp;
+}
+
+/* This function returns a boolean which when true sets answer to one
+   of the FW_*. */
+static int find_special(ip_chainlabel label, int *answer)
+{
+	if (label[0] == '\0') {
+		*answer = FW_SKIP; /* => pass-through rule */
+		return 1;
+	} else if (strcmp(label,IP_FW_LABEL_ACCEPT) == 0) {
+		*answer = FW_ACCEPT;
+		return 1;
+	} else if (strcmp(label,IP_FW_LABEL_BLOCK) == 0) {
+		*answer = FW_BLOCK;
+		return 1;
+	} else if (strcmp(label,IP_FW_LABEL_REJECT) == 0) {
+		*answer = FW_REJECT;
+		return 1;
+#ifdef CONFIG_IP_TRANSPARENT_PROXY
+	} else if (strcmp(label,IP_FW_LABEL_REDIRECT) == 0) {
+		*answer = FW_REDIRECT;
+		return 1;
+#endif
+#ifdef CONFIG_IP_MASQUERADE
+	} else if (strcmp(label,IP_FW_LABEL_MASQUERADE) == 0) {
+		*answer = FW_MASQUERADE;
+		return 1;
+#endif
+	} else if (strcmp(label, IP_FW_LABEL_RETURN) == 0) {
+		*answer = FW_SKIP+1;
+		return 1;
+	} else {
+		return 0;
+	}
+}
+
+/* This function cleans up the prevchain and prevrule.  If the verbose
+ * flag is set then he names of the chains will be printed as it
+ * cleans up.  */
+static void cleanup(struct ip_chain *chain, 
+		    const int verbose, 
+		    unsigned int slot)
+{ 
+	struct ip_chain *tmpchain = chain->reent[slot].prevchain;
+	if (verbose)
+		printk(KERN_ERR "Chain backtrace: ");
+	while (tmpchain) {
+		if (verbose)
+			printk("%s<-",chain->label);
+		chain->reent[slot].prevchain = NULL;
+		chain = tmpchain;
+		tmpchain = chain->reent[slot].prevchain;
+	}
+	if (verbose)
+		printk("%s\n",chain->label);
+}
+
+static inline int
+ip_fw_domatch(struct ip_fwkernel *f,
+	      struct iphdr *ip, 
+	      const char *rif,
+	      const ip_chainlabel label,
+	      struct sk_buff *skb,
+	      unsigned int slot,
+	      __u16 src_port, __u16 dst_port, 
+	      unsigned int count,
+	      int tcpsyn)
+{
+	f->counters[slot].bcnt+=ntohs(ip->tot_len);
+	f->counters[slot].pcnt++;
+	if (f->ipfw.fw_flg & IP_FW_F_PRN) {
+		dump_packet(ip,rif,f,label,src_port,dst_port,count,tcpsyn);
+	}
+	ip->tos = (ip->tos & f->ipfw.fw_tosand) ^ f->ipfw.fw_tosxor;
+
+/* This functionality is useless in stock 2.0.x series, but we don't
+ * discard the mark thing altogether, to avoid breaking ipchains (and,
+ * more importantly, the ipfwadm wrapper) --PR */
+	if (f->ipfw.fw_flg & IP_FW_F_MARKABS)
+		skb->fwmark = f->ipfw.fw_mark;
+	else
+		skb->fwmark+=f->ipfw.fw_mark;
+#ifdef CONFIG_IP_FIREWALL_NETLINK
+	if (f->ipfw.fw_flg & IP_FW_F_NETLINK) {
+		size_t len = min(f->ipfw.fw_outputsize, ntohs(ip->tot_len)) 
+			+ sizeof(__u32) + sizeof(skb->fwmark) + IFNAMSIZ;
+		struct sk_buff *outskb=alloc_skb(len, GFP_ATOMIC);
+
+		duprintf("Sending packet out NETLINK (length = %u).\n", 
+			 (unsigned int)len);
+		if (outskb) {
+			/* Prepend length, mark & interface */
+			skb_put(outskb, len);
+			*((__u32 *)outskb->data) = (__u32)len;
+			*((__u32 *)(outskb->data+sizeof(__u32))) = skb->fwmark;
+			strcpy(outskb->data+sizeof(__u32)*2, rif);
+			memcpy(outskb->data+sizeof(__u32)*2+IFNAMSIZ, ip, 
+			       len-(sizeof(__u32)*2+IFNAMSIZ));
+			netlink_broadcast(ipfwsk, outskb, 0, ~0, GFP_KERNEL);
+		}
+		else {
+			if (net_ratelimit())
+				printk(KERN_WARNING "ip_fw: packet drop due to "
+				       "netlink failure\n");
+			return 0;
+		}
+	}
+#endif
+	return 1;
+}
+
+/*
+ *	Returns one of the generic firewall policies, like FW_ACCEPT.
+ *
+ *	The testing is either false for normal firewall mode or true for
+ *	user checking mode (counters are not updated, TOS & mark not done).
+ */
+static int 
+ip_fw_check(struct iphdr *ip, 
+	    const char *rif,
+	    __u16 *redirport,
+	    struct ip_chain *chain,
+	    struct sk_buff *skb,
+	    unsigned int slot,
+	    int testing)
+{
+	struct tcphdr		*tcp=(struct tcphdr *)((__u32 *)ip+ip->ihl);
+	struct udphdr		*udp=(struct udphdr *)((__u32 *)ip+ip->ihl);
+	struct icmphdr		*icmp=(struct icmphdr *)((__u32 *)ip+ip->ihl);
+	__u32			src, dst;
+	__u16			src_port = 0xFFFF, dst_port = 0xFFFF;
+	char			tcpsyn=0;
+	__u16			offset;
+	unsigned char		oldtos;
+	struct ip_fwkernel	*f;	
+	int			ret = FW_SKIP+2;
+	unsigned int		count;
+
+	/* We handle fragments by dealing with the first fragment as
+	 * if it was a normal packet.  All other fragments are treated
+	 * normally, except that they will NEVER match rules that ask
+	 * things we don't know, ie. tcp syn flag or ports).  If the
+	 * rule is also a fragment-specific rule, non-fragments won't
+	 * match it. */
+
+	offset = ntohs(ip->frag_off) & IP_OFFSET;
+	
+	/*
+	 *	Don't allow a fragment of TCP 8 bytes in. Nobody
+	 *	normal causes this. Its a cracker trying to break
+	 *	in by doing a flag overwrite to pass the direction
+	 *	checks.
+	 */
+	 
+	if (offset == 1 && ip->protocol == IPPROTO_TCP)	{
+		if (!testing && net_ratelimit()) {
+			printk("Suspect TCP fragment.\n");
+			dump_packet(ip,rif,NULL,NULL,0,0,0,0);
+		}
+		return FW_BLOCK;
+	}
+
+	/* If we can't investigate ports, treat as fragment.  It's
+	 * either a trucated whole packet, or a truncated first
+	 * fragment, or a TCP first fragment of length 8-15, in which
+	 * case the above rule stops reassembly.
+	 */
+	if (offset == 0) {
+		unsigned int size_req;
+		switch (ip->protocol) {
+		case IPPROTO_TCP:
+			/* Don't care about things past flags word */
+			size_req = 16; 
+			break;
+
+		case IPPROTO_UDP:
+		case IPPROTO_ICMP:
+			size_req = 8;
+			break;
+
+		default:
+			size_req = 0;
+		}
+		offset = (ntohs(ip->tot_len) < (ip->ihl<<2)+size_req);
+
+		/* If it is a truncated first fragment then it can be
+		 * used to rewrite port information, and thus should
+		 * be blocked.
+		 */
+		if (offset && (ntohs(ip->frag_off) & IP_MF)) {
+			if (!testing && net_ratelimit()) {
+				printk("Suspect short first fragment.\n");
+				dump_packet(ip,rif,NULL,NULL,0,0,0,0);
+			}
+			return FW_BLOCK;
+		}
+	}
+
+	src = ip->saddr;
+	dst = ip->daddr;
+	oldtos = ip->tos;
+	
+	/*
+	 *	If we got interface from which packet came
+	 *	we can use the address directly. Linux 2.1 now uses address
+	 *	chains per device too, but unlike BSD we first check if the
+	 *	incoming packet matches a device address and the routing
+	 *	table before calling the firewall. 
+	 */
+	 
+	dprintf("Packet ");
+	switch(ip->protocol) 
+	{
+		case IPPROTO_TCP:
+			dprintf("TCP ");
+			if (!offset) {
+				src_port=ntohs(tcp->source);
+				dst_port=ntohs(tcp->dest);
+
+				/* Connection initilisation can only
+				 * be made when the syn bit is set and
+				 * neither of the ack or reset is
+				 * set. */
+				if(tcp->syn && !(tcp->ack || tcp->rst))
+					tcpsyn=1;
+			}
+			break;
+		case IPPROTO_UDP:
+			dprintf("UDP ");
+			if (!offset) {
+				src_port=ntohs(udp->source);
+				dst_port=ntohs(udp->dest);
+			}
+			break;
+		case IPPROTO_ICMP:
+			if (!offset) {
+				src_port=(__u16)icmp->type;
+				dst_port=(__u16)icmp->code;
+			}
+			dprintf("ICMP ");
+			break;
+		default:
+			dprintf("p=%d ",ip->protocol);
+			break;
+	}
+#ifdef DEBUG_IP_FIREWALL
+	print_ip(ip->saddr);
+	
+	if (offset) 
+		dprintf(":fragment (%i) ", ((int)offset)<<2);
+	else if (ip->protocol==IPPROTO_TCP || ip->protocol==IPPROTO_UDP 
+		 || ip->protocol==IPPROTO_ICMP)
+		dprintf(":%hu:%hu", src_port, dst_port);
+	dprintf("\n");
+#endif
+
+	if (!testing) FWC_READ_LOCK(&ip_fw_lock);
+	else FWC_HAVE_LOCK(fwc_rlocks);
+
+	f = chain->chain;
+	do {
+		count = 0;
+		for (; f; f = f->next) {
+			count++;
+			if (ip_rule_match(f,rif,ip,
+					  tcpsyn,src_port,dst_port,offset)) {
+				if (!testing
+				    && !ip_fw_domatch(f, ip, rif, chain->label,
+						      skb, slot, 
+						      src_port, dst_port,
+						      count, tcpsyn)) {
+					ret = FW_BLOCK;
+					goto out;
+				}
+				break;
+			}
+		}
+		if (f) {
+			if (f->branch) {
+				/* Do sanity check to see if we have
+                                 * already set prevchain and if so we
+                                 * must be in a loop */
+				if (f->branch->reent[slot].prevchain) {
+					if (!testing) {
+						printk(KERN_ERR 
+						       "IP firewall: "
+						       "Loop detected "
+						       "at `%s'.\n",
+						       f->branch->label);
+						cleanup(chain, 1, slot);
+						ret = FW_BLOCK;
+					} else {
+						cleanup(chain, 0, slot);
+						ret = FW_SKIP+1;
+					}
+				}
+				else {
+					f->branch->reent[slot].prevchain 
+						= chain;
+					f->branch->reent[slot].prevrule 
+						= f->next;
+					chain = f->branch;
+					f = chain->chain;
+				}
+			}
+			else if (f->simplebranch == FW_SKIP) 
+				f = f->next;
+			else if (f->simplebranch == FW_SKIP+1) {
+				/* Just like falling off the chain */
+				goto fall_off_chain;
+			}
+			else {	
+				cleanup(chain, 0, slot);
+				ret = f->simplebranch;
+			}
+		} /* f == NULL */
+		else {
+		fall_off_chain:
+			if (chain->reent[slot].prevchain) {
+				struct ip_chain *tmp = chain;
+				f = chain->reent[slot].prevrule;
+				chain = chain->reent[slot].prevchain;
+				tmp->reent[slot].prevchain = NULL;
+			}
+			else {
+				ret = chain->policy;
+				if (!testing) {
+					chain->reent[slot].counters.pcnt++;
+					chain->reent[slot].counters.bcnt
+						+= ntohs(ip->tot_len);
+				}
+			}
+		}
+	} while (ret == FW_SKIP+2);
+
+ out:
+	if (!testing) FWC_READ_UNLOCK(&ip_fw_lock);
+
+	/* Recalculate checksum if not going to reject, and TOS changed. */
+	if (ip->tos != oldtos 
+	    && ret != FW_REJECT && ret != FW_BLOCK 
+	    && !testing)
+		ip_send_check(ip);
+
+#ifdef CONFIG_IP_TRANSPARENT_PROXY
+	if (ret == FW_REDIRECT && redirport) {
+		if ((*redirport = htons(f->ipfw.fw_redirpt)) == 0) {
+			/* Wildcard redirection.
+			 * Note that redirport will become
+			 * 0xFFFF for non-TCP/UDP packets.
+			 */
+			*redirport = htons(dst_port);
+		}
+	}
+#endif
+
+#ifdef DEBUG_ALLOW_ALL
+	return (testing ? ret : FW_ACCEPT);
+#else
+	return ret;
+#endif
+}
+
+/* Must have write lock & interrupts off for any of these */
+
+/* This function sets all the byte counters in a chain to zero.  The
+ * input is a pointer to the chain required for zeroing */
+static int zero_fw_chain(struct ip_chain *chainptr)
+{
+	struct ip_fwkernel *i;
+
+	FWC_HAVE_LOCK(fwc_wlocks);
+	for (i = chainptr->chain; i; i = i->next)
+		memset(i->counters, 0, sizeof(struct ip_counters)*NUM_SLOTS);
+	return 0;
+}
+
+static int clear_fw_chain(struct ip_chain *chainptr)
+{
+	struct ip_fwkernel *i= chainptr->chain;
+
+	FWC_HAVE_LOCK(fwc_wlocks);
+	chainptr->chain=NULL;
+
+	while (i) {
+		struct ip_fwkernel *tmp = i->next;
+		if (i->branch)
+			i->branch->refcount--;
+		kfree(i);
+		i = tmp;
+	}
+	return 0;
+}
+
+static int replace_in_chain(struct ip_chain *chainptr, 
+			    struct ip_fwkernel *frwl,
+			    __u32 position)
+{
+	struct ip_fwkernel *f = chainptr->chain;
+	
+	FWC_HAVE_LOCK(fwc_wlocks);
+
+	while (--position && f != NULL) f = f->next;
+	if (f == NULL)
+		return EINVAL;
+		
+	if (f->branch) f->branch->refcount--;
+	if (frwl->branch) frwl->branch->refcount++;
+
+	frwl->next = f->next;
+	memcpy(f,frwl,sizeof(struct ip_fwkernel));
+	kfree(frwl);
+	return 0;
+}
+
+static int append_to_chain(struct ip_chain *chainptr, struct ip_fwkernel *rule)
+{
+	struct ip_fwkernel *i;
+
+	FWC_HAVE_LOCK(fwc_wlocks);
+	/* Special case if no rules already present */
+	if (chainptr->chain == NULL) {
+
+		/* If pointer writes are atomic then turning off
+		 * interupts is not necessary. */
+		chainptr->chain = rule;
+		if (rule->branch) rule->branch->refcount++;
+		return 0;
+	}
+
+	/* Find the rule before the end of the chain */
+	for (i = chainptr->chain; i->next; i = i->next); 
+	i->next = rule;
+	if (rule->branch) rule->branch->refcount++;
+	return 0;
+}
+
+/* This function inserts a rule at the position of position in the
+ * chain refenced by chainptr.  If position is 1 then this rule will
+ * become the new rule one. */
+static int insert_in_chain(struct ip_chain *chainptr, 
+			   struct ip_fwkernel *frwl,
+			   __u32 position)
+{
+	struct ip_fwkernel *f = chainptr->chain;
+	
+	FWC_HAVE_LOCK(fwc_wlocks);
+	/* special case if the position is number 1 */
+	if (position == 1) {
+		frwl->next = chainptr->chain;
+		if (frwl->branch) frwl->branch->refcount++; 
+		chainptr->chain = frwl;
+		return 0;
+	}
+	position--;
+	while (--position && f != NULL) f = f->next;
+	if (f == NULL)
+		return EINVAL;
+	if (frwl->branch) frwl->branch->refcount++;
+	frwl->next = f->next;
+	
+	f->next = frwl;
+	return 0;
+}
+
+/* This function deletes the a rule from a given rulenum and chain.
+ * With rulenum = 1 is the first rule is deleted. */
+
+static int del_num_from_chain(struct ip_chain *chainptr, __u32 rulenum)
+{
+	struct ip_fwkernel *i=chainptr->chain,*tmp;
+	
+	FWC_HAVE_LOCK(fwc_wlocks);
+
+	if (!chainptr->chain)
+		return ENOENT;
+
+	/* Need a special case for the first rule */
+	if (rulenum == 1) {
+		/* store temp to allow for freeing up of memory */
+		tmp = chainptr->chain;
+	        if (chainptr->chain->branch) chainptr->chain->branch->refcount--;
+		chainptr->chain = chainptr->chain->next;
+		kfree(tmp); /* free memory that is now unused */
+	} else {  
+		rulenum--;
+		while (--rulenum && i->next ) i = i->next;
+		if (!i->next)
+			return ENOENT;
+		tmp = i->next;
+		if (i->next->branch)
+			i->next->branch->refcount--;
+		i->next = i->next->next;
+		kfree(tmp);
+	}
+	return 0;
+}
+
+
+/* This function deletes the a rule from a given rule and chain.
+ * The rule that is deleted is the first occursance of that rule. */
+static int del_rule_from_chain(struct ip_chain *chainptr, 
+			       struct ip_fwkernel *frwl)
+{
+	struct ip_fwkernel *ltmp,*ftmp = chainptr->chain ;
+	int was_found;
+
+	FWC_HAVE_LOCK(fwc_wlocks);
+	
+	/* Sure, we should compare marks, but since the `ipfwadm'
+	 * script uses it for an unholy hack... well, life is easier
+	 * this way.  We also mask it out of the flags word. --PR */
+	for (ltmp=NULL, was_found=0; 
+	     !was_found && ftmp != NULL;
+	     ltmp = ftmp,ftmp = ftmp->next) {	
+		if (ftmp->ipfw.fw_src.s_addr!=frwl->ipfw.fw_src.s_addr 
+		    || ftmp->ipfw.fw_dst.s_addr!=frwl->ipfw.fw_dst.s_addr
+		    || ftmp->ipfw.fw_smsk.s_addr!=frwl->ipfw.fw_smsk.s_addr
+		    || ftmp->ipfw.fw_dmsk.s_addr!=frwl->ipfw.fw_dmsk.s_addr
+#if 0
+		    || ftmp->ipfw.fw_flg!=frwl->ipfw.fw_flg
+#else
+		    || ((ftmp->ipfw.fw_flg & ~IP_FW_F_MARKABS) 
+			!= (frwl->ipfw.fw_flg & ~IP_FW_F_MARKABS))
+#endif
+		    || ftmp->ipfw.fw_invflg!=frwl->ipfw.fw_invflg
+		    || ftmp->ipfw.fw_proto!=frwl->ipfw.fw_proto
+#if 0
+		    || ftmp->ipfw.fw_mark!=frwl->ipfw.fw_mark
+#endif
+		    || ftmp->ipfw.fw_redirpt!=frwl->ipfw.fw_redirpt
+		    || ftmp->ipfw.fw_spts[0]!=frwl->ipfw.fw_spts[0]
+		    || ftmp->ipfw.fw_spts[1]!=frwl->ipfw.fw_spts[1]
+		    || ftmp->ipfw.fw_dpts[0]!=frwl->ipfw.fw_dpts[0]
+		    || ftmp->ipfw.fw_dpts[1]!=frwl->ipfw.fw_dpts[1]
+		    || ftmp->ipfw.fw_outputsize!=frwl->ipfw.fw_outputsize) {
+			duprintf("del_rule_from_chain: mismatch:"
+				 "src:%u/%u dst:%u/%u smsk:%u/%u dmsk:%u/%u "
+				 "flg:%hX/%hX invflg:%hX/%hX proto:%u/%u "
+				 "mark:%u/%u "
+				 "ports:%hu-%hu/%hu-%hu %hu-%hu/%hu-%hu "
+				 "outputsize:%hu-%hu\n",
+				 ftmp->ipfw.fw_src.s_addr,
+				 frwl->ipfw.fw_src.s_addr,
+				 ftmp->ipfw.fw_dst.s_addr,
+				 frwl->ipfw.fw_dst.s_addr,
+				 ftmp->ipfw.fw_smsk.s_addr,
+				 frwl->ipfw.fw_smsk.s_addr,
+				 ftmp->ipfw.fw_dmsk.s_addr,
+				 frwl->ipfw.fw_dmsk.s_addr,
+				 ftmp->ipfw.fw_flg,
+				 frwl->ipfw.fw_flg,
+				 ftmp->ipfw.fw_invflg,
+				 frwl->ipfw.fw_invflg,
+				 ftmp->ipfw.fw_proto,
+				 frwl->ipfw.fw_proto,
+				 ftmp->ipfw.fw_mark,
+				 frwl->ipfw.fw_mark,
+				 ftmp->ipfw.fw_spts[0],
+				 frwl->ipfw.fw_spts[0],
+				 ftmp->ipfw.fw_spts[1],
+				 frwl->ipfw.fw_spts[1],
+				 ftmp->ipfw.fw_dpts[0],
+				 frwl->ipfw.fw_dpts[0],
+				 ftmp->ipfw.fw_dpts[1],
+				 frwl->ipfw.fw_dpts[1],
+				 ftmp->ipfw.fw_outputsize,
+				 frwl->ipfw.fw_outputsize);
+			continue;
+		}
+
+		if (strncmp(ftmp->ipfw.fw_vianame, 
+			    frwl->ipfw.fw_vianame, 
+			    IFNAMSIZ)) {
+			duprintf("del_rule_from_chain: if mismatch: %s/%s\n",
+				 ftmp->ipfw.fw_vianame, 
+				 frwl->ipfw.fw_vianame);
+		        continue;
+		}
+		if (ftmp->branch != frwl->branch) {
+			duprintf("del_rule_from_chain: branch mismatch: "
+				 "%s/%s\n", 
+				 ftmp->branch?ftmp->branch->label:"(null)",
+				 frwl->branch?frwl->branch->label:"(null)");
+			continue;
+		}
+		if (ftmp->branch == NULL 
+		    && ftmp->simplebranch != frwl->simplebranch) {
+			duprintf("del_rule_from_chain: simplebranch mismatch: "
+				 "%i/%i\n", 
+				 ftmp->simplebranch, frwl->simplebranch);
+			continue;
+		}
+		was_found = 1;
+		if (ftmp->branch)
+			ftmp->branch->refcount--;
+		if (ltmp)
+			ltmp->next = ftmp->next;
+		else
+			chainptr->chain = ftmp->next; 
+		kfree(ftmp);
+		break;
+	}
+	
+	if (was_found)
+		return 0;
+	else {
+		duprintf("del_rule_from_chain: no matching rule found\n");
+		return EINVAL;
+	}
+}
+
+/* This function takes the label of a chain and deletes the first
+ * chain with that name.  No special cases required for the built in
+ * chains as they have their refcount initilised to 1 so that they are
+ * never deleted.  */
+static int del_chain(ip_chainlabel label) 
+{
+	struct ip_chain *tmp,*tmp2;
+
+	FWC_HAVE_LOCK(fwc_wlocks);
+	/* Corner case: return EBUSY not ENOENT for first elem ("input") */
+	if (strcmp(label, ip_fw_chains->label) == 0) 
+		return EBUSY;
+
+	for (tmp = ip_fw_chains; tmp->next; tmp = tmp->next)
+		if(strcmp(tmp->next->label,label) == 0)
+			break;
+
+	tmp2 = tmp->next;
+	if (!tmp2)
+		return ENOENT;
+
+	if (tmp2->refcount)
+		return EBUSY;
+
+	if (tmp2->chain)
+		return ENOTEMPTY;
+	
+	tmp->next = tmp2->next;
+	kfree(tmp2);
+	return 0;
+}
+
+/* This is a function to initilise a chain.  Built in rules start with
+ * refcount = 1 so that they cannot be deleted.  User defined rules
+ * start with refcount = 0 so they can be deleted. */
+static struct ip_chain *ip_init_chain(ip_chainlabel name, 
+				      __u32 ref, 
+				      int policy)
+{
+	unsigned int i;
+	struct ip_chain *label 
+		= kmalloc(SIZEOF_STRUCT_IP_CHAIN, GFP_KERNEL);
+	if (label == NULL)
+		panic("Can't kmalloc for firewall chains.\n");
+	strcpy(label->label,name);
+	label->next = NULL;
+	label->chain = NULL;
+	label->refcount = ref;
+	label->policy = policy;
+	for (i = 0; i < smp_num_cpus*2; i++) {
+		label->reent[i].counters.pcnt = label->reent[i].counters.bcnt 
+			= 0;
+		label->reent[i].prevchain = NULL;
+		label->reent[i].prevrule = NULL;
+	}
+
+	return label;
+}
+
+/* This is a function for reating a new chain.  The chains is not
+ * created if a chain of the same name already exists */
+static int create_chain(ip_chainlabel label) 
+{
+	struct ip_chain *tmp;
+
+	if (!check_label(label))
+		return EINVAL;
+
+	FWC_HAVE_LOCK(fwc_wlocks);
+	for (tmp = ip_fw_chains; tmp->next; tmp = tmp->next)
+		if (strcmp(tmp->label,label) == 0)
+			return EEXIST;
+	
+	if (strcmp(tmp->label,label) == 0)
+		return EEXIST;
+	
+	tmp->next = ip_init_chain(label, 0, FW_SKIP); /* refcount is
+					      * zero since this is a
+					      * user defined chain *
+					      * and therefore can be
+					      * deleted */
+	return 0;
+}
+
+/* This function simply changes the policy on one of the built in
+ * chains.  checking must be done before this is call to ensure that
+ * chainptr is pointing to one of the three possible chains */
+static int change_policy(struct ip_chain *chainptr, int policy)
+{ 
+	FWC_HAVE_LOCK(fwc_wlocks);
+	chainptr->policy = policy;
+	return 0;
+}
+
+/* This function takes an ip_fwuser and converts it to a ip_fwkernel.  It also
+ * performs some checks in the structure. */
+static struct ip_fwkernel *convert_ipfw(struct ip_fwuser *fwuser, int *errno)
+{
+	struct ip_fwkernel *fwkern;
+
+	if ( (fwuser->ipfw.fw_flg & ~IP_FW_F_MASK) != 0 ) {
+		duprintf("convert_ipfw: undefined flag bits set (flags=%x)\n",
+			 fwuser->ipfw.fw_flg);
+		*errno = EINVAL;
+		return NULL;
+	}
+
+#ifdef DEBUG_IP_FIREWALL_USER
+	/* These are sanity checks that don't really matter.
+	 * We can get rid of these once testing is complete. 
+	 */
+	if ((fwuser->ipfw.fw_flg & IP_FW_F_TCPSYN)
+	    && ((fwuser->ipfw.fw_invflg & IP_FW_INV_PROTO)
+		|| fwuser->ipfw.fw_proto != IPPROTO_TCP)) {
+		duprintf("convert_ipfw: TCP SYN flag set but proto != TCP!\n");
+		*errno = EINVAL;
+		return NULL;
+	}
+
+	if (strcmp(fwuser->label, IP_FW_LABEL_REDIRECT) != 0
+	    && fwuser->ipfw.fw_redirpt != 0) {
+		duprintf("convert_ipfw: Target not REDIR but redirpt != 0!\n");
+		*errno = EINVAL;
+		return NULL;
+	}
+
+	if ((!(fwuser->ipfw.fw_flg & IP_FW_F_FRAG) 
+	     && (fwuser->ipfw.fw_invflg & IP_FW_INV_FRAG))
+	    || (!(fwuser->ipfw.fw_flg & IP_FW_F_TCPSYN) 
+		&& (fwuser->ipfw.fw_invflg & IP_FW_INV_SYN))) {
+		duprintf("convert_ipfw: Can't have INV flag if flag unset!\n");
+		*errno = EINVAL;
+		return NULL;
+	}
+
+	if (((fwuser->ipfw.fw_invflg & IP_FW_INV_SRCPT) 
+	     && fwuser->ipfw.fw_spts[0] == 0 
+	     && fwuser->ipfw.fw_spts[1] == 0xFFFF)
+	    || ((fwuser->ipfw.fw_invflg & IP_FW_INV_DSTPT) 
+		&& fwuser->ipfw.fw_dpts[0] == 0 
+		&& fwuser->ipfw.fw_dpts[1] == 0xFFFF)
+	    || ((fwuser->ipfw.fw_invflg & IP_FW_INV_VIA) 
+		&& (fwuser->ipfw.fw_vianame)[0] == '\0')
+	    || ((fwuser->ipfw.fw_invflg & IP_FW_INV_SRCIP)
+		&& fwuser->ipfw.fw_smsk.s_addr == 0)
+	    || ((fwuser->ipfw.fw_invflg & IP_FW_INV_DSTIP)
+		&& fwuser->ipfw.fw_dmsk.s_addr == 0)) {
+		duprintf("convert_ipfw: INV flag makes rule unmatchable!\n");
+		*errno = EINVAL;
+		return NULL;
+	}
+
+	if ((fwuser->ipfw.fw_flg & IP_FW_F_FRAG)
+	    && !(fwuser->ipfw.fw_invflg & IP_FW_INV_FRAG)
+	    && (fwuser->ipfw.fw_spts[0] != 0
+		|| fwuser->ipfw.fw_spts[1] != 0xFFFF
+		|| fwuser->ipfw.fw_dpts[0] != 0
+		|| fwuser->ipfw.fw_dpts[1] != 0xFFFF
+		|| (fwuser->ipfw.fw_flg & IP_FW_F_TCPSYN))) {
+		duprintf("convert_ipfw: Can't test ports or SYN with frag!\n");
+		*errno = EINVAL;
+		return NULL;
+	}
+#endif
+
+	if ((fwuser->ipfw.fw_spts[0] != 0
+	     || fwuser->ipfw.fw_spts[1] != 0xFFFF
+	     || fwuser->ipfw.fw_dpts[0] != 0
+	     || fwuser->ipfw.fw_dpts[1] != 0xFFFF)
+	    && ((fwuser->ipfw.fw_invflg & IP_FW_INV_PROTO)
+		|| (fwuser->ipfw.fw_proto != IPPROTO_TCP
+		    && fwuser->ipfw.fw_proto != IPPROTO_UDP
+		    && fwuser->ipfw.fw_proto != IPPROTO_ICMP))) {
+		duprintf("convert_ipfw: Can only test ports for TCP/UDP/ICMP!\n");
+		*errno = EINVAL;
+		return NULL;
+	}
+
+	fwkern = kmalloc(SIZEOF_STRUCT_IP_FW_KERNEL, GFP_KERNEL);
+	if (!fwkern) {
+		duprintf("convert_ipfw: kmalloc failed!\n");
+		*errno = ENOMEM;
+		return NULL;
+	}
+	memcpy(&fwkern->ipfw,&fwuser->ipfw,sizeof(struct ip_fw));
+
+	if (!find_special(fwuser->label, &fwkern->simplebranch)) {
+		fwkern->branch = find_label(fwuser->label);
+		if (!fwkern->branch) { 
+			duprintf("convert_ipfw: chain doesn't exist `%s'.\n",
+				 fwuser->label);
+			kfree(fwkern);
+			*errno = ENOENT;
+			return NULL;
+		} else if (fwkern->branch == IP_FW_INPUT_CHAIN 
+			   || fwkern->branch == IP_FW_FORWARD_CHAIN
+			   || fwkern->branch == IP_FW_OUTPUT_CHAIN) {
+			duprintf("convert_ipfw: Can't branch to builtin chain `%s'.\n",
+				 fwuser->label);
+			kfree(fwkern);
+			*errno = ENOENT;
+			return NULL; 
+		}
+	} else 
+		fwkern->branch = NULL;
+	memset(fwkern->counters, 0, sizeof(struct ip_counters)*NUM_SLOTS);
+
+	/* Handle empty vianame by making it a wildcard */
+	if ((fwkern->ipfw.fw_vianame)[0] == '\0')
+	    fwkern->ipfw.fw_flg |= IP_FW_F_WILDIF;
+
+	fwkern->next = NULL;
+	return fwkern;
+}
+
+int ip_fw_ctl(int cmd, void *m, int len)
+{
+	int ret;
+	struct ip_chain *chain;
+	unsigned long flags;
+
+	FWC_WRITE_LOCK_IRQ(&ip_fw_lock, flags);
+
+	switch (cmd) {
+	case IP_FW_FLUSH:
+		if (len != sizeof(ip_chainlabel) || !check_label(m))
+			ret = EINVAL;
+		else if ((chain = find_label(m)) == NULL)
+			ret = ENOENT;		
+		else ret = clear_fw_chain(chain);
+		break;
+
+	case IP_FW_ZERO:
+		if (len != sizeof(ip_chainlabel) || !check_label(m))
+			ret = EINVAL;
+		else if ((chain = find_label(m)) == NULL)
+			ret = ENOENT;
+		else ret = zero_fw_chain(chain);
+		break;
+
+	case IP_FW_CHECK: {
+		struct ip_fwtest *new = m;
+		struct iphdr *ip;
+
+		/* Don't need write lock. */
+		FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags);
+		
+		if (len != sizeof(struct ip_fwtest) || !check_label(m))
+			return EINVAL;
+
+		/* Need readlock to do find_label */
+		FWC_READ_LOCK(&ip_fw_lock);
+
+		if ((chain = find_label(new->fwt_label)) == NULL)
+			ret = ENOENT;
+		else {
+			ip = &(new->fwt_packet.fwp_iph);
+
+			if (ip->ihl != sizeof(struct iphdr) / sizeof(int)) {
+			    duprintf("ip_fw_ctl: ip->ihl=%d, want %d\n",
+				     ip->ihl,
+				     sizeof(struct iphdr) / sizeof(int));
+			    ret = EINVAL;
+			}
+			else {
+				ret = ip_fw_check(ip, new->fwt_packet.fwp_vianame,
+						  NULL, chain,
+						  NULL, SLOT_NUMBER(), 1);
+				switch (ret) {
+				case FW_ACCEPT:
+					ret = 0; break;
+				case FW_REDIRECT:
+					ret = ECONNABORTED; break;
+				case FW_MASQUERADE:
+					ret = ECONNRESET; break;
+				case FW_REJECT:
+					ret = ECONNREFUSED; break;
+					/* Hack to help diag; these only get
+					   returned when testing. */
+				case FW_SKIP+1:
+					ret = ELOOP; break;
+				case FW_SKIP:
+					ret = ENFILE; break;
+				default: /* FW_BLOCK */
+					ret = ETIMEDOUT; break;
+				}
+			}
+		}
+		FWC_READ_UNLOCK(&ip_fw_lock);
+		return ret;
+	}
+
+	case IP_FW_MASQ_TIMEOUTS: {
+#ifdef CONFIG_IP_MASQUERADE
+		ret = ip_fw_masq_timeouts(m, len);
+#else
+		ret = EINVAL;
+#endif
+	}
+	break;
+
+	case IP_FW_REPLACE: {
+		struct ip_fwkernel *ip_fwkern;
+		struct ip_fwnew *new = m;
+
+		if (len != sizeof(struct ip_fwnew) 
+		    || !check_label(new->fwn_label))
+			ret = EINVAL;
+		else if ((chain = find_label(new->fwn_label)) == NULL)
+			ret = ENOENT;
+		else if ((ip_fwkern = convert_ipfw(&new->fwn_rule, &ret))
+			 != NULL)
+			ret = replace_in_chain(chain, ip_fwkern, 
+					       new->fwn_rulenum);
+	}
+	break;
+
+	case IP_FW_APPEND: {
+		struct ip_fwchange *new = m;
+		struct ip_fwkernel *ip_fwkern;
+
+		if (len != sizeof(struct ip_fwchange)
+		    || !check_label(new->fwc_label))
+			ret = EINVAL;
+		else if ((chain = find_label(new->fwc_label)) == NULL)
+			ret = ENOENT;
+		else if ((ip_fwkern = convert_ipfw(&new->fwc_rule, &ret))
+			 != NULL)
+			ret = append_to_chain(chain, ip_fwkern);
+	}
+	break;
+
+	case IP_FW_INSERT: {
+		struct ip_fwkernel *ip_fwkern;
+		struct ip_fwnew *new = m;
+
+		if (len != sizeof(struct ip_fwnew)
+		    || !check_label(new->fwn_label))
+			ret = EINVAL;
+		else if ((chain = find_label(new->fwn_label)) == NULL)
+			ret = ENOENT;
+		else if ((ip_fwkern = convert_ipfw(&new->fwn_rule, &ret))
+			 != NULL)
+			ret = insert_in_chain(chain, ip_fwkern,
+					      new->fwn_rulenum);
+	}
+	break;
+
+	case IP_FW_DELETE: {
+		struct ip_fwchange *new = m;
+		struct ip_fwkernel *ip_fwkern;
+
+		if (len != sizeof(struct ip_fwchange)
+		    || !check_label(new->fwc_label))
+			ret = EINVAL;
+		else if ((chain = find_label(new->fwc_label)) == NULL)
+			ret = ENOENT;
+		else if ((ip_fwkern = convert_ipfw(&new->fwc_rule, &ret))
+			 != NULL) {
+			ret = del_rule_from_chain(chain, ip_fwkern);
+			kfree(ip_fwkern);
+		}
+	}
+	break;
+
+	case IP_FW_DELETE_NUM: {
+		struct ip_fwdelnum *new = m;
+
+		if (len != sizeof(struct ip_fwdelnum)
+		    || !check_label(new->fwd_label))
+			ret = EINVAL;
+		else if ((chain = find_label(new->fwd_label)) == NULL)
+			ret = ENOENT;		
+		else ret = del_num_from_chain(chain, new->fwd_rulenum);
+	}
+	break;
+
+	case IP_FW_CREATECHAIN: {
+		if (len != sizeof(ip_chainlabel)) {
+			duprintf("create_chain: bad size %i\n", len);
+			ret = EINVAL;
+		}
+		else ret = create_chain(m);
+	}
+	break;
+
+	case IP_FW_DELETECHAIN: {
+		if (len != sizeof(ip_chainlabel)) {
+			duprintf("delete_chain: bad size %i\n", len);
+			ret = EINVAL;
+		}
+		else ret = del_chain(m);
+	}
+	break;
+
+	case IP_FW_POLICY: {
+		struct ip_fwpolicy *new = m;
+
+		if (len != sizeof(struct ip_fwpolicy)
+		    || !check_label(new->fwp_label))
+			ret = EINVAL;
+		else if ((chain = find_label(new->fwp_label)) == NULL)
+			ret = ENOENT;
+		else if (chain != IP_FW_INPUT_CHAIN
+			 && chain != IP_FW_FORWARD_CHAIN
+			 && chain != IP_FW_OUTPUT_CHAIN) {
+			duprintf("change_policy: can't change policy on user" 
+				 " defined chain.\n");
+			ret = EINVAL;
+		}
+		else {
+		        int pol = FW_SKIP;
+			find_special(new->fwp_policy, &pol);
+
+			switch(pol) {
+			case FW_MASQUERADE:
+				if (chain != IP_FW_FORWARD_CHAIN) {
+					ret = EINVAL;
+					break;
+				}
+				/* Fall thru... */
+			case FW_BLOCK:
+			case FW_ACCEPT:
+			case FW_REJECT:
+				ret = change_policy(chain, pol);
+				break;
+			default:
+			        duprintf("change_policy: bad policy `%s'\n",
+					 new->fwp_policy);
+				ret = EINVAL;
+			}
+		}
+		break;
+		
+	}
+	default:
+		duprintf("ip_fw_ctl:  unknown request %d\n",cmd);
+		ret = EINVAL;
+	}
+
+	FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags);
+	return ret;
+}
+
+/* Returns bytes used - doesn't NUL terminate */
+static int dump_rule(char *buffer, 
+		     const char *chainlabel, 
+		     const struct ip_fwkernel *rule)
+{
+	int len;
+	unsigned int i;
+	__u64 packets = 0, bytes = 0;
+
+	FWC_HAVE_LOCK(fwc_wlocks);
+	for (i = 0; i < NUM_SLOTS; i++) {
+		packets += rule->counters[i].pcnt;
+		bytes += rule->counters[i].bcnt;
+	}
+
+	len=sprintf(buffer,
+		    "%9s "			/* Chain name */
+		    "%08lX/%08lX->%08lX/%08lX "	/* Source & Destination IPs */
+		    "%.16s "			/* Interface */
+		    "%X %X "			/* fw_flg and fw_invflg fields */
+		    "%u "			/* Protocol */
+		    "%-9u %-9u %-9u %-9u "	/* Packet & byte counters */
+		    "%u-%u %u-%u "		/* Source & Dest port ranges */
+		    "A%02X X%02X "		/* TOS and and xor masks */
+		    "%08X "			/* Redirection port */
+		    "%u "			/* fw_mark field */
+		    "%u "			/* output size */
+		    "%9s\n",			/* Target */
+		    chainlabel,
+		    ntohl(rule->ipfw.fw_src.s_addr),
+		    ntohl(rule->ipfw.fw_smsk.s_addr),
+		    ntohl(rule->ipfw.fw_dst.s_addr),
+		    ntohl(rule->ipfw.fw_dmsk.s_addr),
+		    (rule->ipfw.fw_vianame)[0] ? rule->ipfw.fw_vianame : "-",
+		    rule->ipfw.fw_flg,
+		    rule->ipfw.fw_invflg,
+		    rule->ipfw.fw_proto,
+		    (__u32)(packets >> 32), (__u32)packets,
+		    (__u32)(bytes >> 32), (__u32)bytes,
+		    rule->ipfw.fw_spts[0], rule->ipfw.fw_spts[1],
+		    rule->ipfw.fw_dpts[0], rule->ipfw.fw_dpts[1], 
+		    rule->ipfw.fw_tosand, rule->ipfw.fw_tosxor, 
+		    rule->ipfw.fw_redirpt, 
+		    rule->ipfw.fw_mark, 
+		    rule->ipfw.fw_outputsize,
+		    branchname(rule->branch,rule->simplebranch));
+
+	duprintf("dump_rule: %i bytes done.\n", len);
+	return len;
+}
+
+/* File offset is actually in records, not bytes. */
+static int ip_chain_procinfo(char *buffer, char **start,
+			     off_t offset, int length, int reset)
+{
+	struct ip_chain *i;
+	struct ip_fwkernel *j = ip_fw_chains->chain;
+	unsigned long flags;
+	int len = 0;
+	int last_len = 0;
+	off_t upto = 0;
+
+	duprintf("Offset starts at %lu\n", offset);
+	duprintf("ip_fw_chains is 0x%0lX\n", (unsigned long int)ip_fw_chains);
+
+	/* Need a write lock to lock out ``readers'' which update counters. */
+	FWC_WRITE_LOCK_IRQ(&ip_fw_lock, flags);
+
+	for (i = ip_fw_chains; i; i = i->next) {
+	    for (j = i->chain; j; j = j->next) {
+		if (upto == offset) break;
+		duprintf("Skipping rule in chain `%s'\n", 
+			 i->label);
+		upto++;
+	    }
+	    if (upto == offset) break;
+	}
+
+	/* Don't init j first time, or once i = NULL */
+	for (; i; (void)((i = i->next) && (j = i->chain))) {
+		duprintf("Dumping chain `%s'\n", i->label);
+		for (; j; j = j->next, upto++, last_len = len)
+		{
+			len += dump_rule(buffer+len, i->label, j);
+			if (len > length) {
+				duprintf("Dumped to %i (past %i).  "
+					 "Moving back to %i.\n",
+					 len, length, last_len);
+				len = last_len;
+				goto outside;
+			}
+			else if (reset)
+				memset(j->counters, 0,
+				       sizeof(struct ip_counters)*NUM_SLOTS);
+		}
+	}
+outside:
+	FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags);
+	buffer[len] = '\0';
+
+	duprintf("ip_chain_procinfo: Length = %i (of %i).  Offset = %li.\n",
+		 len, length, upto);
+	/* `start' hack - see fs/proc/generic.c line ~165 */
+	*start=(char *)((unsigned int)upto-offset);
+	return len;
+}
+
+static int ip_chain_name_procinfo(char *buffer, char **start,
+				  off_t offset, int length, int reset)
+{
+	struct ip_chain *i;
+	int len = 0,last_len = 0;
+	off_t pos = 0,begin = 0;
+	unsigned long flags;
+
+	/* Need a write lock to lock out ``readers'' which update counters. */
+	FWC_WRITE_LOCK_IRQ(&ip_fw_lock, flags);
+
+	for (i = ip_fw_chains; i; i = i->next)
+	{
+		unsigned int j;
+		__u32 packetsHi = 0, packetsLo = 0, bytesHi = 0, bytesLo = 0;
+
+		for (j = 0; j < NUM_SLOTS; j++) {
+			packetsLo += i->reent[j].counters.pcnt & 0xFFFFFFFF;
+			packetsHi += ((i->reent[j].counters.pcnt >> 32) 
+				      & 0xFFFFFFFF);
+			bytesLo += i->reent[j].counters.bcnt & 0xFFFFFFFF;
+			bytesHi += ((i->reent[j].counters.bcnt >> 32) 
+				    & 0xFFFFFFFF);
+		}
+
+		/* print the label and the policy */
+		len+=sprintf(buffer+len,"%s %s %i %u %u %u %u\n",
+			     i->label,branchname(NULL, i->policy),i->refcount,
+			     packetsHi, packetsLo, bytesHi, bytesLo);
+		pos=begin+len;
+		if(pos<offset) {
+			len=0;
+			begin=pos;
+		}
+		else if(pos>offset+length) {
+			len = last_len;
+			break;		
+		}
+		
+		last_len = len;
+	}
+	FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags);
+
+	*start = buffer+(offset-begin);
+	len-=(offset-begin);
+	if(len>length)
+		len=length;
+	return len;
+}
+
+/*
+ *	Interface to the generic firewall chains.
+ */
+int ipfw_input_check(struct firewall_ops *this, int pf, struct device *dev, 
+		     void *phdr, void *arg, struct sk_buff **pskb)
+{
+	return ip_fw_check(phdr, dev->name,
+			   arg, IP_FW_INPUT_CHAIN, *pskb, SLOT_NUMBER(), 0);
+}
+
+int ipfw_output_check(struct firewall_ops *this, int pf, struct device *dev, 
+		      void *phdr, void *arg, struct sk_buff **pskb)
+{
+	/* Locally generated bogus packets by root. <SIGH>. */
+	if (((struct iphdr *)phdr)->ihl * 4 < sizeof(struct iphdr)
+	    || (*pskb)->len < sizeof(struct iphdr))
+		return FW_ACCEPT;
+	return ip_fw_check(phdr, dev->name,
+			   arg, IP_FW_OUTPUT_CHAIN, *pskb, SLOT_NUMBER(), 0);
+}
+
+int ipfw_forward_check(struct firewall_ops *this, int pf, struct device *dev, 
+		       void *phdr, void *arg, struct sk_buff **pskb)
+{
+	return ip_fw_check(phdr, dev->name,
+			   arg, IP_FW_FORWARD_CHAIN, *pskb, SLOT_NUMBER(), 0);
+}
+
+struct firewall_ops ipfw_ops=
+{
+	NULL,
+	ipfw_forward_check,
+	ipfw_input_check,
+	ipfw_output_check,
+	PF_INET,
+	0	/* We don't even allow a fall through so we are last */
+};
+
+#ifdef CONFIG_PROC_FS		
+static struct proc_dir_entry proc_net_ipfwchains_chain = {
+	PROC_NET_IPFW_CHAINS, sizeof(IP_FW_PROC_CHAINS)-1, 
+	IP_FW_PROC_CHAINS, S_IFREG | S_IRUSR | S_IWUSR, 1, 0, 0,
+	0, &proc_net_inode_operations, ip_chain_procinfo
+};
+
+static struct proc_dir_entry proc_net_ipfwchains_chainnames = {
+	PROC_NET_IPFW_CHAIN_NAMES, sizeof(IP_FW_PROC_CHAIN_NAMES)-1, 
+	IP_FW_PROC_CHAIN_NAMES, S_IFREG | S_IRUSR | S_IWUSR, 1, 0, 0,
+	0, &proc_net_inode_operations, ip_chain_name_procinfo
+};
+
+#endif
+
+__initfunc(void ip_fw_init(void))
+{
+#ifdef DEBUG_IP_FIRWALL_LOCKING
+	fwc_wlocks = fwc_rlocks = 0;
+#endif
+
+	IP_FW_INPUT_CHAIN = ip_init_chain(IP_FW_LABEL_INPUT, 1, FW_ACCEPT);
+	IP_FW_FORWARD_CHAIN = ip_init_chain(IP_FW_LABEL_FORWARD, 1, FW_ACCEPT);
+	IP_FW_OUTPUT_CHAIN = ip_init_chain(IP_FW_LABEL_OUTPUT, 1, FW_ACCEPT);
+
+	if(register_firewall(PF_INET,&ipfw_ops)<0)
+		panic("Unable to register IP firewall.\n");
+
+#ifdef CONFIG_PROC_FS		
+	proc_net_register(&proc_net_ipfwchains_chain);
+	proc_net_register(&proc_net_ipfwchains_chainnames);
+#endif
+
+#ifdef CONFIG_IP_FIREWALL_NETLINK
+	ipfwsk = netlink_kernel_create(NETLINK_FIREWALL, NULL);
+	if (ipfwsk == NULL)
+		panic("ip_fw_init: cannot initialize netlink\n");
+#endif
+#if defined(DEBUG_IP_FIREWALL) || defined(DEBUG_IP_FIREWALL_USER)
+	printk("Firewall graphs enabled! Untested kernel coming thru. \n");
+#endif
+}
diff --git a/pfinet/linux-src/net/ipv4/ip_gre.c b/pfinet/linux-src/net/ipv4/ip_gre.c
new file mode 100644
index 00000000..6a7546fd
--- /dev/null
+++ b/pfinet/linux-src/net/ipv4/ip_gre.c
@@ -0,0 +1,1223 @@
+/*
+ *	Linux NET3:	GRE over IP protocol decoder. 
+ *
+ *	Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <asm/uaccess.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/in.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/if_arp.h>
+#include <linux/mroute.h>
+#include <linux/init.h>
+#include <linux/in6.h>
+#include <linux/inetdevice.h>
+#include <linux/igmp.h>
+
+#include <net/sock.h>
+#include <net/ip.h>
+#include <net/icmp.h>
+#include <net/protocol.h>
+#include <net/ipip.h>
+#include <net/arp.h>
+#include <net/checksum.h>
+
+#ifdef CONFIG_IPV6
+#include <net/ipv6.h>
+#include <net/ip6_fib.h>
+#include <net/ip6_route.h>
+#endif
+
+/*
+   Problems & solutions
+   --------------------
+
+   1. The most important issue is detecting local dead loops.
+   They would cause complete host lockup in transmit, which
+   would be "resolved" by stack overflow or, if queueing is enabled,
+   with infinite looping in net_bh.
+
+   We cannot track such dead loops during route installation,
+   it is infeasible task. The most general solutions would be
+   to keep skb->encapsulation counter (sort of local ttl),
+   and silently drop packet when it expires. It is the best
+   solution, but it supposes maintaing new variable in ALL
+   skb, even if no tunneling is used.
+
+   Current solution: t->recursion lock breaks dead loops. It looks 
+   like dev->tbusy flag, but I preferred new variable, because
+   the semantics is different. One day, when hard_start_xmit
+   will be multithreaded we will have to use skb->encapsulation.
+
+
+
+   2. Networking dead loops would not kill routers, but would really
+   kill network. IP hop limit plays role of "t->recursion" in this case,
+   if we copy it from packet being encapsulated to upper header.
+   It is very good solution, but it introduces two problems:
+
+   - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
+     do not work over tunnels.
+   - traceroute does not work. I planned to relay ICMP from tunnel,
+     so that this problem would be solved and traceroute output
+     would even more informative. This idea appeared to be wrong:
+     only Linux complies to rfc1812 now (yes, guys, Linux is the only
+     true router now :-)), all routers (at least, in neighbourhood of mine)
+     return only 8 bytes of payload. It is the end.
+
+   Hence, if we want that OSPF worked or traceroute said something reasonable,
+   we should search for another solution.
+
+   One of them is to parse packet trying to detect inner encapsulation
+   made by our node. It is difficult or even impossible, especially,
+   taking into account fragmentation. TO be short, tt is not solution at all.
+
+   Current solution: The solution was UNEXPECTEDLY SIMPLE.
+   We force DF flag on tunnels with preconfigured hop limit,
+   that is ALL. :-) Well, it does not remove the problem completely,
+   but exponential growth of network traffic is changed to linear
+   (branches, that exceed pmtu are pruned) and tunnel mtu
+   fastly degrades to value <68, where looping stops.
+   Yes, it is not good if there exists a router in the loop,
+   which does not force DF, even when encapsulating packets have DF set.
+   But it is not our problem! Nobody could accuse us, we made
+   all that we could make. Even if it is your gated who injected
+   fatal route to network, even if it were you who configured
+   fatal static route: you are innocent. :-)
+
+
+
+   3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
+   practically identical code. It would be good to glue them
+   together, but it is not very evident, how to make them modular.
+   sit is integral part of IPv6, ipip and gre are naturally modular.
+   We could extract common parts (hash table, ioctl etc)
+   to a separate module (ip_tunnel.c).
+
+   Alexey Kuznetsov.
+ */
+
+static int ipgre_tunnel_init(struct device *dev);
+
+/* Fallback tunnel: no source, no destination, no key, no options */
+
+static int ipgre_fb_tunnel_init(struct device *dev);
+
+static struct device ipgre_fb_tunnel_dev = {
+	NULL, 0x0, 0x0, 0x0, 0x0, 0, 0, 0, 0, 0, NULL, ipgre_fb_tunnel_init,
+};
+
+static struct ip_tunnel ipgre_fb_tunnel = {
+	NULL, &ipgre_fb_tunnel_dev, {0, }, 0, 0, 0, 0, 0, 0, 0, {"gre0", }
+};
+
+/* Tunnel hash table */
+
+/*
+   4 hash tables:
+
+   3: (remote,local)
+   2: (remote,*)
+   1: (*,local)
+   0: (*,*)
+
+   We require exact key match i.e. if a key is present in packet
+   it will match only tunnel with the same key; if it is not present,
+   it will match only keyless tunnel.
+
+   All keysless packets, if not matched configured keyless tunnels
+   will match fallback tunnel.
+ */
+
+#define HASH_SIZE  16
+#define HASH(addr) ((addr^(addr>>4))&0xF)
+
+static struct ip_tunnel *tunnels[4][HASH_SIZE];
+
+#define tunnels_r_l	(tunnels[3])
+#define tunnels_r	(tunnels[2])
+#define tunnels_l	(tunnels[1])
+#define tunnels_wc	(tunnels[0])
+
+/* Given src, dst and key, find approriate for input tunnel. */
+
+static struct ip_tunnel * ipgre_tunnel_lookup(u32 remote, u32 local, u32 key)
+{
+	unsigned h0 = HASH(remote);
+	unsigned h1 = HASH(key);
+	struct ip_tunnel *t;
+
+	for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
+		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
+			if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
+				return t;
+		}
+	}
+	for (t = tunnels_r[h0^h1]; t; t = t->next) {
+		if (remote == t->parms.iph.daddr) {
+			if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
+				return t;
+		}
+	}
+	for (t = tunnels_l[h1]; t; t = t->next) {
+		if (local == t->parms.iph.saddr ||
+		     (local == t->parms.iph.daddr && MULTICAST(local))) {
+			if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
+				return t;
+		}
+	}
+	for (t = tunnels_wc[h1]; t; t = t->next) {
+		if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
+			return t;
+	}
+	if (ipgre_fb_tunnel_dev.flags&IFF_UP)
+		return &ipgre_fb_tunnel;
+	return NULL;
+}
+
+static struct ip_tunnel **ipgre_bucket(struct ip_tunnel *t)
+{
+	u32 remote = t->parms.iph.daddr;
+	u32 local = t->parms.iph.saddr;
+	u32 key = t->parms.i_key;
+	unsigned h = HASH(key);
+	int prio = 0;
+
+	if (local)
+		prio |= 1;
+	if (remote && !MULTICAST(remote)) {
+		prio |= 2;
+		h ^= HASH(remote);
+	}
+
+	return &tunnels[prio][h];
+}
+
+static void ipgre_tunnel_link(struct ip_tunnel *t)
+{
+	struct ip_tunnel **tp = ipgre_bucket(t);
+
+	t->next = *tp;
+	wmb();
+	*tp = t;
+}
+
+static void ipgre_tunnel_unlink(struct ip_tunnel *t)
+{
+	struct ip_tunnel **tp;
+
+	for (tp = ipgre_bucket(t); *tp; tp = &(*tp)->next) {
+		if (t == *tp) {
+			*tp = t->next;
+			synchronize_bh();
+			break;
+		}
+	}
+}
+
+static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int create)
+{
+	u32 remote = parms->iph.daddr;
+	u32 local = parms->iph.saddr;
+	u32 key = parms->i_key;
+	struct ip_tunnel *t, **tp, *nt;
+	struct device *dev;
+	unsigned h = HASH(key);
+	int prio = 0;
+
+	if (local)
+		prio |= 1;
+	if (remote && !MULTICAST(remote)) {
+		prio |= 2;
+		h ^= HASH(remote);
+	}
+	for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
+		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
+			if (key == t->parms.i_key)
+				return t;
+		}
+	}
+	if (!create)
+		return NULL;
+
+	MOD_INC_USE_COUNT;
+	dev = kmalloc(sizeof(*dev) + sizeof(*t), GFP_KERNEL);
+	if (dev == NULL) {
+		MOD_DEC_USE_COUNT;
+		return NULL;
+	}
+	memset(dev, 0, sizeof(*dev) + sizeof(*t));
+	dev->priv = (void*)(dev+1);
+	nt = (struct ip_tunnel*)dev->priv;
+	nt->dev = dev;
+	dev->name = nt->parms.name;
+	dev->init = ipgre_tunnel_init;
+	memcpy(&nt->parms, parms, sizeof(*parms));
+	if (dev->name[0] == 0) {
+		int i;
+		for (i=1; i<100; i++) {
+			sprintf(dev->name, "gre%d", i);
+			if (dev_get(dev->name) == NULL)
+				break;
+		}
+		if (i==100)
+			goto failed;
+		memcpy(parms->name, dev->name, IFNAMSIZ);
+	}
+	if (register_netdevice(dev) < 0)
+		goto failed;
+
+	ipgre_tunnel_link(nt);
+	/* Do not decrement MOD_USE_COUNT here. */
+	return nt;
+
+failed:
+	kfree(dev);
+	MOD_DEC_USE_COUNT;
+	return NULL;
+}
+
+static void ipgre_tunnel_destroy(struct device *dev)
+{
+	ipgre_tunnel_unlink((struct ip_tunnel*)dev->priv);
+
+	if (dev != &ipgre_fb_tunnel_dev) {
+		kfree(dev);
+		MOD_DEC_USE_COUNT;
+	}
+}
+
+
+void ipgre_err(struct sk_buff *skb, unsigned char *dp, int len)
+{
+#ifndef I_WISH_WORLD_WERE_PERFECT
+
+/* It is not :-( All the routers (except for Linux) return only
+   8 bytes of packet payload. It means, that precise relaying of
+   ICMP in the real Internet is absolutely infeasible.
+
+   Moreover, Cisco "wise men" put GRE key to the third word
+   in GRE header. It makes impossible maintaining even soft state for keyed
+   GRE tunnels with enabled checksum. Tell them "thank you".
+
+   Well, I wonder, rfc1812 was written by Cisco employee,
+   what the hell these idiots break standrads established
+   by themself???
+ */
+
+	struct iphdr *iph = (struct iphdr*)dp;
+	u16	     *p = (u16*)(dp+(iph->ihl<<2));
+	int grehlen = (iph->ihl<<2) + 4;
+	int type = skb->h.icmph->type;
+	int code = skb->h.icmph->code;
+	struct ip_tunnel *t;
+	u16 flags;
+
+	flags = p[0];
+	if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
+		if (flags&(GRE_VERSION|GRE_ROUTING))
+			return;
+		if (flags&GRE_KEY) {
+			grehlen += 4;
+			if (flags&GRE_CSUM)
+				grehlen += 4;
+		}
+	}
+
+	/* If only 8 bytes returned, keyed message will be dropped here */
+	if (len < grehlen)
+		return;
+
+	switch (type) {
+	default:
+	case ICMP_PARAMETERPROB:
+		return;
+
+	case ICMP_DEST_UNREACH:
+		switch (code) {
+		case ICMP_SR_FAILED:
+		case ICMP_PORT_UNREACH:
+			/* Impossible event. */
+			return;
+		case ICMP_FRAG_NEEDED:
+			/* Soft state for pmtu is maintained by IP core. */
+			return;
+		default:
+			/* All others are translated to HOST_UNREACH.
+			   rfc2003 contains "deep thoughts" about NET_UNREACH,
+			   I believe they are just ether pollution. --ANK
+			 */
+			break;
+		}
+		break;
+	case ICMP_TIME_EXCEEDED:
+		if (code != ICMP_EXC_TTL)
+			return;
+		break;
+	}
+
+	t = ipgre_tunnel_lookup(iph->daddr, iph->saddr, (flags&GRE_KEY) ? *(((u32*)p) + (grehlen>>2) - 1) : 0);
+	if (t == NULL || t->parms.iph.daddr == 0 || MULTICAST(t->parms.iph.daddr))
+		return;
+
+	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
+		return;
+
+	if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
+		t->err_count++;
+	else
+		t->err_count = 1;
+	t->err_time = jiffies;
+	return;
+#else
+	struct iphdr *iph = (struct iphdr*)dp;
+	struct iphdr *eiph;
+	u16	     *p = (u16*)(dp+(iph->ihl<<2));
+	int type = skb->h.icmph->type;
+	int code = skb->h.icmph->code;
+	int rel_type = 0;
+	int rel_code = 0;
+	int rel_info = 0;
+	u16 flags;
+	int grehlen = (iph->ihl<<2) + 4;
+	struct sk_buff *skb2;
+	struct rtable *rt;
+
+	if (p[1] != __constant_htons(ETH_P_IP))
+		return;
+
+	flags = p[0];
+	if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
+		if (flags&(GRE_VERSION|GRE_ROUTING))
+			return;
+		if (flags&GRE_CSUM)
+			grehlen += 4;
+		if (flags&GRE_KEY)
+			grehlen += 4;
+		if (flags&GRE_SEQ)
+			grehlen += 4;
+	}
+	if (len < grehlen + sizeof(struct iphdr))
+		return;
+	eiph = (struct iphdr*)(dp + grehlen);
+
+	switch (type) {
+	default:
+		return;
+	case ICMP_PARAMETERPROB:
+		if (skb->h.icmph->un.gateway < (iph->ihl<<2))
+			return;
+
+		/* So... This guy found something strange INSIDE encapsulated
+		   packet. Well, he is fool, but what can we do ?
+		 */
+		rel_type = ICMP_PARAMETERPROB;
+		rel_info = skb->h.icmph->un.gateway - grehlen;
+		break;
+
+	case ICMP_DEST_UNREACH:
+		switch (code) {
+		case ICMP_SR_FAILED:
+		case ICMP_PORT_UNREACH:
+			/* Impossible event. */
+			return;
+		case ICMP_FRAG_NEEDED:
+			/* And it is the only really necesary thing :-) */
+			rel_info = ntohs(skb->h.icmph->un.frag.mtu);
+			if (rel_info < grehlen+68)
+				return;
+			rel_info -= grehlen;
+			/* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
+			if (rel_info > ntohs(eiph->tot_len))
+				return;
+			break;
+		default:
+			/* All others are translated to HOST_UNREACH.
+			   rfc2003 contains "deep thoughts" about NET_UNREACH,
+			   I believe, it is just ether pollution. --ANK
+			 */
+			rel_type = ICMP_DEST_UNREACH;
+			rel_code = ICMP_HOST_UNREACH;
+			break;
+		}
+		break;
+	case ICMP_TIME_EXCEEDED:
+		if (code != ICMP_EXC_TTL)
+			return;
+		break;
+	}
+
+	/* Prepare fake skb to feed it to icmp_send */
+	skb2 = skb_clone(skb, GFP_ATOMIC);
+	if (skb2 == NULL)
+		return;
+	dst_release(skb2->dst);
+	skb2->dst = NULL;
+	skb_pull(skb2, skb->data - (u8*)eiph);
+	skb2->nh.raw = skb2->data;
+
+	/* Try to guess incoming interface */
+	if (ip_route_output(&rt, eiph->saddr, 0, RT_TOS(eiph->tos), 0)) {
+		kfree_skb(skb2);
+		return;
+	}
+	skb2->dev = rt->u.dst.dev;
+
+	/* route "incoming" packet */
+	if (rt->rt_flags&RTCF_LOCAL) {
+		ip_rt_put(rt);
+		rt = NULL;
+		if (ip_route_output(&rt, eiph->daddr, eiph->saddr, eiph->tos, 0) ||
+		    rt->u.dst.dev->type != ARPHRD_IPGRE) {
+			ip_rt_put(rt);
+			kfree_skb(skb2);
+			return;
+		}
+	} else {
+		ip_rt_put(rt);
+		if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
+		    skb2->dst->dev->type != ARPHRD_IPGRE) {
+			kfree_skb(skb2);
+			return;
+		}
+	}
+
+	/* change mtu on this route */
+	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
+		if (rel_info > skb2->dst->pmtu) {
+			kfree_skb(skb2);
+			return;
+		}
+		skb2->dst->pmtu = rel_info;
+		rel_info = htonl(rel_info);
+	} else if (type == ICMP_TIME_EXCEEDED) {
+		struct ip_tunnel *t = (struct ip_tunnel*)skb2->dev->priv;
+		if (t->parms.iph.ttl) {
+			rel_type = ICMP_DEST_UNREACH;
+			rel_code = ICMP_HOST_UNREACH;
+		}
+	}
+
+	icmp_send(skb2, rel_type, rel_code, rel_info);
+	kfree_skb(skb2);
+#endif
+}
+
+int ipgre_rcv(struct sk_buff *skb, unsigned short len)
+{
+	struct iphdr *iph = skb->nh.iph;
+	u8     *h = skb->h.raw;
+	u16    flags = *(u16*)h;
+	u16    csum = 0;
+	u32    key = 0;
+	u32    seqno = 0;
+	struct ip_tunnel *tunnel;
+	int    offset = 4;
+
+	if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
+		/* - Version must be 0.
+		   - We do not support routing headers.
+		 */
+		if (flags&(GRE_VERSION|GRE_ROUTING))
+			goto drop;
+
+		if (flags&GRE_CSUM) {
+			csum = ip_compute_csum(h, len);
+			offset += 4;
+		}
+		if (flags&GRE_KEY) {
+			key = *(u32*)(h + offset);
+			offset += 4;
+		}
+		if (flags&GRE_SEQ) {
+			seqno = ntohl(*(u32*)(h + offset));
+			offset += 4;
+		}
+	}
+
+	if ((tunnel = ipgre_tunnel_lookup(iph->saddr, iph->daddr, key)) != NULL) {
+		skb->mac.raw = skb->nh.raw;
+		skb->nh.raw = skb_pull(skb, h + offset - skb->data);
+		memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
+		skb->ip_summed = 0;
+		skb->protocol = *(u16*)(h + 2);
+		skb->pkt_type = PACKET_HOST;
+#ifdef CONFIG_NET_IPGRE_BROADCAST
+		if (MULTICAST(iph->daddr)) {
+			/* Looped back packet, drop it! */
+			if (((struct rtable*)skb->dst)->key.iif == 0)
+				goto drop;
+			tunnel->stat.multicast++;
+			skb->pkt_type = PACKET_BROADCAST;
+		}
+#endif
+
+		if (((flags&GRE_CSUM) && csum) ||
+		    (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
+			tunnel->stat.rx_crc_errors++;
+			tunnel->stat.rx_errors++;
+			goto drop;
+		}
+		if (tunnel->parms.i_flags&GRE_SEQ) {
+			if (!(flags&GRE_SEQ) ||
+			    (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
+				tunnel->stat.rx_fifo_errors++;
+				tunnel->stat.rx_errors++;
+				goto drop;
+			}
+			tunnel->i_seqno = seqno + 1;
+		}
+		tunnel->stat.rx_packets++;
+		tunnel->stat.rx_bytes += skb->len;
+		skb->dev = tunnel->dev;
+		dst_release(skb->dst);
+		skb->dst = NULL;
+		netif_rx(skb);
+		return(0);
+	}
+	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0);
+
+drop:
+	kfree_skb(skb);
+	return(0);
+}
+
+static int ipgre_tunnel_xmit(struct sk_buff *skb, struct device *dev)
+{
+	struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
+	struct net_device_stats *stats = &tunnel->stat;
+	struct iphdr  *old_iph = skb->nh.iph;
+	struct iphdr  *tiph;
+	u8     tos;
+	u16    df;
+	struct rtable *rt;     			/* Route to the other host */
+	struct device *tdev;			/* Device to other host */
+	struct iphdr  *iph;			/* Our new IP header */
+	int    max_headroom;			/* The extra header space needed */
+	int    gre_hlen;
+	u32    dst;
+	int    mtu;
+
+	if (tunnel->recursion++) {
+		tunnel->stat.collisions++;
+		goto tx_error;
+	}
+
+	if (dev->hard_header) {
+		gre_hlen = 0;
+		tiph = (struct iphdr*)skb->data;
+	} else {
+		gre_hlen = tunnel->hlen;
+		tiph = &tunnel->parms.iph;
+	}
+
+	if ((dst = tiph->daddr) == 0) {
+		/* NBMA tunnel */
+
+		if (skb->dst == NULL) {
+			tunnel->stat.tx_fifo_errors++;
+			goto tx_error;
+		}
+
+		if (skb->protocol == __constant_htons(ETH_P_IP)) {
+			rt = (struct rtable*)skb->dst;
+			if ((dst = rt->rt_gateway) == 0)
+				goto tx_error_icmp;
+		}
+#ifdef CONFIG_IPV6
+		else if (skb->protocol == __constant_htons(ETH_P_IPV6)) {
+			struct in6_addr *addr6;
+			int addr_type;
+			struct neighbour *neigh = skb->dst->neighbour;
+
+			if (neigh == NULL)
+				goto tx_error;
+
+			addr6 = (struct in6_addr*)&neigh->primary_key;
+			addr_type = ipv6_addr_type(addr6);
+
+			if (addr_type == IPV6_ADDR_ANY) {
+				addr6 = &skb->nh.ipv6h->daddr;
+				addr_type = ipv6_addr_type(addr6);
+			}
+
+			if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
+				goto tx_error_icmp;
+
+			dst = addr6->s6_addr32[3];
+		}
+#endif
+		else
+			goto tx_error;
+	}
+
+	tos = tiph->tos;
+	if (tos&1) {
+		if (skb->protocol == __constant_htons(ETH_P_IP))
+			tos = old_iph->tos;
+		tos &= ~1;
+	}
+
+	if (ip_route_output(&rt, dst, tiph->saddr, RT_TOS(tos), tunnel->parms.link)) {
+		tunnel->stat.tx_carrier_errors++;
+		goto tx_error;
+	}
+	tdev = rt->u.dst.dev;
+
+	if (tdev == dev) {
+		ip_rt_put(rt);
+		tunnel->stat.collisions++;
+		goto tx_error;
+	}
+
+	df = tiph->frag_off;
+	mtu = rt->u.dst.pmtu - tunnel->hlen;
+
+	if (skb->protocol == __constant_htons(ETH_P_IP)) {
+		if (skb->dst && mtu < skb->dst->pmtu && mtu >= 68)
+			skb->dst->pmtu = mtu;
+
+		df |= (old_iph->frag_off&__constant_htons(IP_DF));
+
+		if ((old_iph->frag_off&__constant_htons(IP_DF)) &&
+		    mtu < ntohs(old_iph->tot_len)) {
+			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
+			ip_rt_put(rt);
+			goto tx_error;
+		}
+	}
+#ifdef CONFIG_IPV6
+	else if (skb->protocol == __constant_htons(ETH_P_IPV6)) {
+		struct rt6_info *rt6 = (struct rt6_info*)skb->dst;
+
+		if (rt6 && mtu < rt6->u.dst.pmtu && mtu >= IPV6_MIN_MTU) {
+			if ((tunnel->parms.iph.daddr && !MULTICAST(tunnel->parms.iph.daddr)) ||
+			    rt6->rt6i_dst.plen == 128) {
+				rt6->rt6i_flags |= RTF_MODIFIED;
+				skb->dst->pmtu = mtu;
+			}
+		}
+
+		if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
+			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
+			ip_rt_put(rt);
+			goto tx_error;
+		}
+	}
+#endif
+
+	if (tunnel->err_count > 0) {
+		if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
+			tunnel->err_count--;
+
+			dst_link_failure(skb);
+		} else
+			tunnel->err_count = 0;
+	}
+
+	skb->h.raw = skb->nh.raw;
+
+	max_headroom = ((tdev->hard_header_len+15)&~15)+ gre_hlen;
+
+	if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
+		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
+		if (!new_skb) {
+			ip_rt_put(rt);
+  			stats->tx_dropped++;
+			dev_kfree_skb(skb);
+			tunnel->recursion--;
+			return 0;
+		}
+		if (skb->sk)
+			skb_set_owner_w(new_skb, skb->sk);
+		dev_kfree_skb(skb);
+		skb = new_skb;
+	}
+
+	skb->nh.raw = skb_push(skb, gre_hlen);
+	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+	dst_release(skb->dst);
+	skb->dst = &rt->u.dst;
+
+	/*
+	 *	Push down and install the IPIP header.
+	 */
+
+	iph 			=	skb->nh.iph;
+	iph->version		=	4;
+	iph->ihl		=	sizeof(struct iphdr) >> 2;
+	iph->frag_off		=	df;
+	iph->protocol		=	IPPROTO_GRE;
+	iph->tos		=	tos;
+	iph->daddr		=	rt->rt_dst;
+	iph->saddr		=	rt->rt_src;
+
+	if ((iph->ttl = tiph->ttl) == 0) {
+		if (skb->protocol == __constant_htons(ETH_P_IP))
+			iph->ttl = old_iph->ttl;
+#ifdef CONFIG_IPV6
+		else if (skb->protocol == __constant_htons(ETH_P_IPV6))
+			iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit;
+#endif
+		else
+			iph->ttl = ip_statistics.IpDefaultTTL;
+	}
+
+	((u16*)(iph+1))[0] = tunnel->parms.o_flags;
+	((u16*)(iph+1))[1] = skb->protocol;
+
+	if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
+		u32 *ptr = (u32*)(((u8*)iph) + tunnel->hlen - 4);
+
+		if (tunnel->parms.o_flags&GRE_SEQ) {
+			++tunnel->o_seqno;
+			*ptr = htonl(tunnel->o_seqno);
+			ptr--;
+		}
+		if (tunnel->parms.o_flags&GRE_KEY) {
+			*ptr = tunnel->parms.o_key;
+			ptr--;
+		}
+		if (tunnel->parms.o_flags&GRE_CSUM) {
+			*ptr = 0;
+			*(__u16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
+		}
+	}
+
+	iph->tot_len		=	htons(skb->len);
+	iph->id			=	htons(ip_id_count++);
+	ip_send_check(iph);
+
+	stats->tx_bytes += skb->len;
+	stats->tx_packets++;
+	ip_send(skb);
+	tunnel->recursion--;
+	return 0;
+
+tx_error_icmp:
+	dst_link_failure(skb);
+
+tx_error:
+	stats->tx_errors++;
+	dev_kfree_skb(skb);
+	tunnel->recursion--;
+	return 0;
+}
+
+static int
+ipgre_tunnel_ioctl (struct device *dev, struct ifreq *ifr, int cmd)
+{
+	int err = 0;
+	struct ip_tunnel_parm p;
+	struct ip_tunnel *t;
+
+	MOD_INC_USE_COUNT;
+
+	switch (cmd) {
+	case SIOCGETTUNNEL:
+		t = NULL;
+		if (dev == &ipgre_fb_tunnel_dev) {
+			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
+				err = -EFAULT;
+				break;
+			}
+			t = ipgre_tunnel_locate(&p, 0);
+		}
+		if (t == NULL)
+			t = (struct ip_tunnel*)dev->priv;
+		memcpy(&p, &t->parms, sizeof(p));
+		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+			err = -EFAULT;
+		break;
+
+	case SIOCADDTUNNEL:
+	case SIOCCHGTUNNEL:
+		err = -EPERM;
+		if (!capable(CAP_NET_ADMIN))
+			goto done;
+
+		err = -EFAULT;
+		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+			goto done;
+
+		err = -EINVAL;
+		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
+		    p.iph.ihl != 5 || (p.iph.frag_off&__constant_htons(~IP_DF)) ||
+		    ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
+			goto done;
+		if (p.iph.ttl)
+			p.iph.frag_off |= __constant_htons(IP_DF);
+
+		if (!(p.i_flags&GRE_KEY))
+			p.i_key = 0;
+		if (!(p.o_flags&GRE_KEY))
+			p.o_key = 0;
+
+		t = ipgre_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
+
+		if (dev != &ipgre_fb_tunnel_dev && cmd == SIOCCHGTUNNEL &&
+		    t != &ipgre_fb_tunnel) {
+			if (t != NULL) {
+				if (t->dev != dev) {
+					err = -EEXIST;
+					break;
+				}
+			} else {
+				unsigned nflags=0;
+
+				t = (struct ip_tunnel*)dev->priv;
+
+				if (MULTICAST(p.iph.daddr))
+					nflags = IFF_BROADCAST;
+				else if (p.iph.daddr)
+					nflags = IFF_POINTOPOINT;
+
+				if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
+					err = -EINVAL;
+					break;
+				}
+				start_bh_atomic();
+				ipgre_tunnel_unlink(t);
+				t->parms.iph.saddr = p.iph.saddr;
+				t->parms.iph.daddr = p.iph.daddr;
+				t->parms.i_key = p.i_key;
+				t->parms.o_key = p.o_key;
+				memcpy(dev->dev_addr, &p.iph.saddr, 4);
+				memcpy(dev->broadcast, &p.iph.daddr, 4);
+				ipgre_tunnel_link(t);
+				end_bh_atomic();
+				netdev_state_change(dev);
+			}
+		}
+
+		if (t) {
+			err = 0;
+			if (cmd == SIOCCHGTUNNEL) {
+				t->parms.iph.ttl = p.iph.ttl;
+				t->parms.iph.tos = p.iph.tos;
+				t->parms.iph.frag_off = p.iph.frag_off;
+			}
+			if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
+				err = -EFAULT;
+		} else
+			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
+		break;
+
+	case SIOCDELTUNNEL:
+		err = -EPERM;
+		if (!capable(CAP_NET_ADMIN))
+			goto done;
+
+		if (dev == &ipgre_fb_tunnel_dev) {
+			err = -EFAULT;
+			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+				goto done;
+			err = -ENOENT;
+			if ((t = ipgre_tunnel_locate(&p, 0)) == NULL)
+				goto done;
+			err = -EPERM;
+			if (t == &ipgre_fb_tunnel)
+				goto done;
+		}
+		err = unregister_netdevice(dev);
+		break;
+
+	default:
+		err = -EINVAL;
+	}
+
+done:
+	MOD_DEC_USE_COUNT;
+	return err;
+}
+
+static struct net_device_stats *ipgre_tunnel_get_stats(struct device *dev)
+{
+	return &(((struct ip_tunnel*)dev->priv)->stat);
+}
+
+static int ipgre_tunnel_change_mtu(struct device *dev, int new_mtu)
+{
+	struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
+	if (new_mtu < 68 || new_mtu > 0xFFF8 - tunnel->hlen)
+		return -EINVAL;
+	dev->mtu = new_mtu;
+	return 0;
+}
+
+#ifdef CONFIG_NET_IPGRE_BROADCAST
+/* Nice toy. Unfortunately, useless in real life :-)
+   It allows to construct virtual multiprotocol broadcast "LAN"
+   over the Internet, provided multicast routing is tuned.
+
+
+   I have no idea was this bicycle invented before me,
+   so that I had to set ARPHRD_IPGRE to a random value.
+   I have an impression, that Cisco could make something similar,
+   but this feature is apparently missing in IOS<=11.2(8).
+   
+   I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
+   with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
+
+   ping -t 255 224.66.66.66
+
+   If nobody answers, mbone does not work.
+
+   ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
+   ip addr add 10.66.66.<somewhat>/24 dev Universe
+   ifconfig Universe up
+   ifconfig Universe add fe80::<Your_real_addr>/10
+   ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
+   ftp 10.66.66.66
+   ...
+   ftp fec0:6666:6666::193.233.7.65
+   ...
+
+ */
+
+static int ipgre_header(struct sk_buff *skb, struct device *dev, unsigned short type,
+			void *daddr, void *saddr, unsigned len)
+{
+	struct ip_tunnel *t = (struct ip_tunnel*)dev->priv;
+	struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
+	u16 *p = (u16*)(iph+1);
+
+	memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
+	p[0]		= t->parms.o_flags;
+	p[1]		= htons(type);
+
+	/*
+	 *	Set the source hardware address. 
+	 */
+	 
+	if (saddr)
+		memcpy(&iph->saddr, saddr, 4);
+
+	if (daddr) {
+		memcpy(&iph->daddr, daddr, 4);
+		return t->hlen;
+	}
+	if (iph->daddr && !MULTICAST(iph->daddr))
+		return t->hlen;
+	
+	return -t->hlen;
+}
+
+static int ipgre_open(struct device *dev)
+{
+	struct ip_tunnel *t = (struct ip_tunnel*)dev->priv;
+
+	MOD_INC_USE_COUNT;
+	if (MULTICAST(t->parms.iph.daddr)) {
+		struct rtable *rt;
+		if (ip_route_output(&rt, t->parms.iph.daddr,
+				    t->parms.iph.saddr, RT_TOS(t->parms.iph.tos), 
+				    t->parms.link)) {
+			MOD_DEC_USE_COUNT;
+			return -EADDRNOTAVAIL;
+		}
+		dev = rt->u.dst.dev;
+		ip_rt_put(rt);
+		if (dev->ip_ptr == NULL) {
+			MOD_DEC_USE_COUNT;
+			return -EADDRNOTAVAIL;
+		}
+		t->mlink = dev->ifindex;
+		ip_mc_inc_group(dev->ip_ptr, t->parms.iph.daddr);
+	}
+	return 0;
+}
+
+static int ipgre_close(struct device *dev)
+{
+	struct ip_tunnel *t = (struct ip_tunnel*)dev->priv;
+	if (MULTICAST(t->parms.iph.daddr) && t->mlink) {
+		dev = dev_get_by_index(t->mlink);
+		if (dev && dev->ip_ptr)
+			ip_mc_dec_group(dev->ip_ptr, t->parms.iph.daddr);
+	}
+	MOD_DEC_USE_COUNT;
+	return 0;
+}
+
+#endif
+
+static void ipgre_tunnel_init_gen(struct device *dev)
+{
+	struct ip_tunnel *t = (struct ip_tunnel*)dev->priv;
+
+	dev->destructor		= ipgre_tunnel_destroy;
+	dev->hard_start_xmit	= ipgre_tunnel_xmit;
+	dev->get_stats		= ipgre_tunnel_get_stats;
+	dev->do_ioctl		= ipgre_tunnel_ioctl;
+	dev->change_mtu		= ipgre_tunnel_change_mtu;
+
+	dev_init_buffers(dev);
+
+	dev->type		= ARPHRD_IPGRE;
+	dev->hard_header_len 	= LL_MAX_HEADER + sizeof(struct iphdr) + 4;
+	dev->mtu		= 1500 - sizeof(struct iphdr) - 4;
+	dev->flags		= IFF_NOARP;
+	dev->iflink		= 0;
+	dev->addr_len		= 4;
+	memcpy(dev->dev_addr, &t->parms.iph.saddr, 4);
+	memcpy(dev->broadcast, &t->parms.iph.daddr, 4);
+}
+
+static int ipgre_tunnel_init(struct device *dev)
+{
+	struct device *tdev = NULL;
+	struct ip_tunnel *tunnel;
+	struct iphdr *iph;
+	int hlen = LL_MAX_HEADER;
+	int mtu = 1500;
+	int addend = sizeof(struct iphdr) + 4;
+
+	tunnel = (struct ip_tunnel*)dev->priv;
+	iph = &tunnel->parms.iph;
+
+	ipgre_tunnel_init_gen(dev);
+
+	/* Guess output device to choose reasonable mtu and hard_header_len */
+
+	if (iph->daddr) {
+		struct rtable *rt;
+		if (!ip_route_output(&rt, iph->daddr, iph->saddr, RT_TOS(iph->tos), tunnel->parms.link)) {
+			tdev = rt->u.dst.dev;
+			ip_rt_put(rt);
+		}
+
+		dev->flags |= IFF_POINTOPOINT;
+
+#ifdef CONFIG_NET_IPGRE_BROADCAST
+		if (MULTICAST(iph->daddr)) {
+			if (!iph->saddr)
+				return -EINVAL;
+			dev->flags = IFF_BROADCAST;
+			dev->hard_header = ipgre_header;
+			dev->open = ipgre_open;
+			dev->stop = ipgre_close;
+		}
+#endif
+	}
+
+	if (!tdev && tunnel->parms.link)
+		tdev = dev_get_by_index(tunnel->parms.link);
+
+	if (tdev) {
+		hlen = tdev->hard_header_len;
+		mtu = tdev->mtu;
+	}
+	dev->iflink = tunnel->parms.link;
+
+	/* Precalculate GRE options length */
+	if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
+		if (tunnel->parms.o_flags&GRE_CSUM)
+			addend += 4;
+		if (tunnel->parms.o_flags&GRE_KEY)
+			addend += 4;
+		if (tunnel->parms.o_flags&GRE_SEQ)
+			addend += 4;
+	}
+	dev->hard_header_len = hlen + addend;
+	dev->mtu = mtu - addend;
+	tunnel->hlen = addend;
+	return 0;
+}
+
+#ifdef MODULE
+static int ipgre_fb_tunnel_open(struct device *dev)
+{
+	MOD_INC_USE_COUNT;
+	return 0;
+}
+
+static int ipgre_fb_tunnel_close(struct device *dev)
+{
+	MOD_DEC_USE_COUNT;
+	return 0;
+}
+#endif
+
+__initfunc(int ipgre_fb_tunnel_init(struct device *dev))
+{
+	struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
+	struct iphdr *iph;
+
+	ipgre_tunnel_init_gen(dev);
+#ifdef MODULE
+	dev->open		= ipgre_fb_tunnel_open;
+	dev->stop		= ipgre_fb_tunnel_close;
+#endif
+
+	iph = &ipgre_fb_tunnel.parms.iph;
+	iph->version		= 4;
+	iph->protocol		= IPPROTO_GRE;
+	iph->ihl		= 5;
+	tunnel->hlen		= sizeof(struct iphdr) + 4;
+
+	tunnels_wc[0]		= &ipgre_fb_tunnel;
+	return 0;
+}
+
+
+static struct inet_protocol ipgre_protocol = {
+  ipgre_rcv,             /* GRE handler          */
+  ipgre_err,             /* TUNNEL error control */
+  0,                    /* next                 */
+  IPPROTO_GRE,          /* protocol ID          */
+  0,                    /* copy                 */
+  NULL,                 /* data                 */
+  "GRE"                 /* name                 */
+};
+
+
+/*
+ *	And now the modules code and kernel interface.
+ */
+
+#ifdef MODULE
+int init_module(void) 
+#else
+__initfunc(int ipgre_init(void))
+#endif
+{
+	printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
+
+	ipgre_fb_tunnel_dev.priv = (void*)&ipgre_fb_tunnel;
+	ipgre_fb_tunnel_dev.name = ipgre_fb_tunnel.parms.name;
+#ifdef MODULE
+	register_netdev(&ipgre_fb_tunnel_dev);
+#else
+	register_netdevice(&ipgre_fb_tunnel_dev);
+#endif
+
+	inet_add_protocol(&ipgre_protocol);
+	return 0;
+}
+
+#ifdef MODULE
+
+void cleanup_module(void)
+{
+	if ( inet_del_protocol(&ipgre_protocol) < 0 )
+		printk(KERN_INFO "ipgre close: can't remove protocol\n");
+
+	unregister_netdev(&ipgre_fb_tunnel_dev);
+}
+
+#endif
diff --git a/pfinet/linux-src/net/ipv4/ip_input.c b/pfinet/linux-src/net/ipv4/ip_input.c
new file mode 100644
index 00000000..7a3e2618
--- /dev/null
+++ b/pfinet/linux-src/net/ipv4/ip_input.c
@@ -0,0 +1,549 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		The Internet Protocol (IP) module.
+ *
+ * Version:	$Id: ip_input.c,v 1.37 1999/04/22 10:38:36 davem Exp $
+ *
+ * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
+ *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *		Donald Becker, <becker@super.org>
+ *		Alan Cox, <Alan.Cox@linux.org>
+ *		Richard Underwood
+ *		Stefan Becker, <stefanb@yello.ping.de>
+ *		Jorge Cwik, <jorge@laser.satlink.net>
+ *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
+ *		
+ *
+ * Fixes:
+ *		Alan Cox	:	Commented a couple of minor bits of surplus code
+ *		Alan Cox	:	Undefining IP_FORWARD doesn't include the code
+ *					(just stops a compiler warning).
+ *		Alan Cox	:	Frames with >=MAX_ROUTE record routes, strict routes or loose routes
+ *					are junked rather than corrupting things.
+ *		Alan Cox	:	Frames to bad broadcast subnets are dumped
+ *					We used to process them non broadcast and
+ *					boy could that cause havoc.
+ *		Alan Cox	:	ip_forward sets the free flag on the
+ *					new frame it queues. Still crap because
+ *					it copies the frame but at least it
+ *					doesn't eat memory too.
+ *		Alan Cox	:	Generic queue code and memory fixes.
+ *		Fred Van Kempen :	IP fragment support (borrowed from NET2E)
+ *		Gerhard Koerting:	Forward fragmented frames correctly.
+ *		Gerhard Koerting: 	Fixes to my fix of the above 8-).
+ *		Gerhard Koerting:	IP interface addressing fix.
+ *		Linus Torvalds	:	More robustness checks
+ *		Alan Cox	:	Even more checks: Still not as robust as it ought to be
+ *		Alan Cox	:	Save IP header pointer for later
+ *		Alan Cox	:	ip option setting
+ *		Alan Cox	:	Use ip_tos/ip_ttl settings
+ *		Alan Cox	:	Fragmentation bogosity removed
+ *					(Thanks to Mark.Bush@prg.ox.ac.uk)
+ *		Dmitry Gorodchanin :	Send of a raw packet crash fix.
+ *		Alan Cox	:	Silly ip bug when an overlength
+ *					fragment turns up. Now frees the
+ *					queue.
+ *		Linus Torvalds/ :	Memory leakage on fragmentation
+ *		Alan Cox	:	handling.
+ *		Gerhard Koerting:	Forwarding uses IP priority hints
+ *		Teemu Rantanen	:	Fragment problems.
+ *		Alan Cox	:	General cleanup, comments and reformat
+ *		Alan Cox	:	SNMP statistics
+ *		Alan Cox	:	BSD address rule semantics. Also see
+ *					UDP as there is a nasty checksum issue
+ *					if you do things the wrong way.
+ *		Alan Cox	:	Always defrag, moved IP_FORWARD to the config.in file
+ *		Alan Cox	: 	IP options adjust sk->priority.
+ *		Pedro Roque	:	Fix mtu/length error in ip_forward.
+ *		Alan Cox	:	Avoid ip_chk_addr when possible.
+ *	Richard Underwood	:	IP multicasting.
+ *		Alan Cox	:	Cleaned up multicast handlers.
+ *		Alan Cox	:	RAW sockets demultiplex in the BSD style.
+ *		Gunther Mayer	:	Fix the SNMP reporting typo
+ *		Alan Cox	:	Always in group 224.0.0.1
+ *	Pauline Middelink	:	Fast ip_checksum update when forwarding
+ *					Masquerading support.
+ *		Alan Cox	:	Multicast loopback error for 224.0.0.1
+ *		Alan Cox	:	IP_MULTICAST_LOOP option.
+ *		Alan Cox	:	Use notifiers.
+ *		Bjorn Ekwall	:	Removed ip_csum (from slhc.c too)
+ *		Bjorn Ekwall	:	Moved ip_fast_csum to ip.h (inline!)
+ *		Stefan Becker   :       Send out ICMP HOST REDIRECT
+ *	Arnt Gulbrandsen	:	ip_build_xmit
+ *		Alan Cox	:	Per socket routing cache
+ *		Alan Cox	:	Fixed routing cache, added header cache.
+ *		Alan Cox	:	Loopback didn't work right in original ip_build_xmit - fixed it.
+ *		Alan Cox	:	Only send ICMP_REDIRECT if src/dest are the same net.
+ *		Alan Cox	:	Incoming IP option handling.
+ *		Alan Cox	:	Set saddr on raw output frames as per BSD.
+ *		Alan Cox	:	Stopped broadcast source route explosions.
+ *		Alan Cox	:	Can disable source routing
+ *		Takeshi Sone    :	Masquerading didn't work.
+ *	Dave Bonn,Alan Cox	:	Faster IP forwarding whenever possible.
+ *		Alan Cox	:	Memory leaks, tramples, misc debugging.
+ *		Alan Cox	:	Fixed multicast (by popular demand 8))
+ *		Alan Cox	:	Fixed forwarding (by even more popular demand 8))
+ *		Alan Cox	:	Fixed SNMP statistics [I think]
+ *	Gerhard Koerting	:	IP fragmentation forwarding fix
+ *		Alan Cox	:	Device lock against page fault.
+ *		Alan Cox	:	IP_HDRINCL facility.
+ *	Werner Almesberger	:	Zero fragment bug
+ *		Alan Cox	:	RAW IP frame length bug
+ *		Alan Cox	:	Outgoing firewall on build_xmit
+ *		A.N.Kuznetsov	:	IP_OPTIONS support throughout the kernel
+ *		Alan Cox	:	Multicast routing hooks
+ *		Jos Vos		:	Do accounting *before* call_in_firewall
+ *	Willy Konynenberg	:	Transparent proxying support
+ *
+ *  
+ *
+ * To Fix:
+ *		IP fragmentation wants rewriting cleanly. The RFC815 algorithm is much more efficient
+ *		and could be made very efficient with the addition of some virtual memory hacks to permit
+ *		the allocation of a buffer that can then be 'grown' by twiddling page tables.
+ *		Output fragmentation wants updating along with the buffer management to use a single 
+ *		interleaved copy algorithm so that fragmenting has a one copy overhead. Actual packet
+ *		output should probably do its own fragmentation at the UDP/RAW layer. TCP shouldn't cause
+ *		fragmentation anyway.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/system.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/config.h>
+
+#include <linux/net.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+
+#include <net/snmp.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/route.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/arp.h>
+#include <net/icmp.h>
+#include <net/raw.h>
+#include <net/checksum.h>
+#include <linux/ip_fw.h>
+#ifdef CONFIG_IP_MASQUERADE
+#include <net/ip_masq.h>
+#endif
+#include <linux/firewall.h>
+#include <linux/mroute.h>
+#include <linux/netlink.h>
+
+/*
+ *	SNMP management statistics
+ */
+
+struct ip_mib ip_statistics={2,IPDEFTTL,};	/* Forwarding=No, Default TTL=64 */
+
+
+/*
+ *	Handle the issuing of an ioctl() request
+ *	for the ip device. This is scheduled to
+ *	disappear
+ */
+
+int ip_ioctl(struct sock *sk, int cmd, unsigned long arg)
+{
+	switch(cmd)
+	{
+		default:
+			return(-EINVAL);
+	}
+}
+
+
+#if defined(CONFIG_IP_TRANSPARENT_PROXY) && !defined(CONFIG_IP_ALWAYS_DEFRAG)
+#define CONFIG_IP_ALWAYS_DEFRAG 1
+#endif
+
+/*
+ *	0 - deliver
+ *	1 - block
+ */
+static __inline__ int icmp_filter(struct sock *sk, struct sk_buff *skb)
+{
+	int    type;
+
+	type = skb->h.icmph->type;
+	if (type < 32)
+		return test_bit(type, &sk->tp_pinfo.tp_raw4.filter);
+
+	/* Do not block unknown ICMP types */
+	return 0;
+}
+
+/*
+ *	Process Router Attention IP option
+ */ 
+int ip_call_ra_chain(struct sk_buff *skb)
+{
+	struct ip_ra_chain *ra;
+	u8 protocol = skb->nh.iph->protocol;
+	struct sock *last = NULL;
+
+	for (ra = ip_ra_chain; ra; ra = ra->next) {
+		struct sock *sk = ra->sk;
+		if (sk && sk->num == protocol) {
+			if (skb->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
+				skb = ip_defrag(skb);
+				if (skb == NULL)
+					return 1;
+			}
+			if (last) {
+				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
+				if (skb2)
+					raw_rcv(last, skb2);
+			}
+			last = sk;
+		}
+	}
+
+	if (last) {
+		raw_rcv(last, skb);
+		return 1;
+	}
+	return 0;
+}
+
+/*
+ * 	Deliver IP Packets to the higher protocol layers.
+ */ 
+int ip_local_deliver(struct sk_buff *skb)
+{
+	struct iphdr *iph = skb->nh.iph;
+	struct inet_protocol *ipprot;
+	struct sock *raw_sk=NULL;
+	unsigned char hash;
+	int flag = 0;
+
+#ifndef CONFIG_IP_ALWAYS_DEFRAG
+	/*
+	 *	Reassemble IP fragments.
+	 */
+
+	if (iph->frag_off & htons(IP_MF|IP_OFFSET)) {
+		skb = ip_defrag(skb);
+		if (!skb)
+			return 0;
+		iph = skb->nh.iph;
+	}
+#endif
+
+#ifdef CONFIG_IP_MASQUERADE
+	/*
+	 * Do we need to de-masquerade this packet?
+	 */
+        {
+		int ret;
+		/*
+		 *	Some masq modules can re-inject packets if
+		 *	bad configured.
+		 */
+
+		if((IPCB(skb)->flags&IPSKB_MASQUERADED)) {
+			printk(KERN_DEBUG "ip_input(): demasq recursion detected. Check masq modules configuration\n");
+			kfree_skb(skb);
+			return 0;
+		}
+
+		ret = ip_fw_demasquerade(&skb);
+		if (ret < 0) {
+			kfree_skb(skb);
+			return 0;
+		}
+
+		if (ret) {
+			iph=skb->nh.iph;
+			IPCB(skb)->flags |= IPSKB_MASQUERADED;
+			dst_release(skb->dst);
+			skb->dst = NULL;
+			if (ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, skb->dev)) {
+				kfree_skb(skb);
+				return 0;
+			}
+			return skb->dst->input(skb);
+		}
+        }
+#endif
+
+        /*
+	 *	Point into the IP datagram, just past the header.
+	 */
+
+        skb->h.raw = skb->nh.raw + iph->ihl*4;
+
+	/*
+	 *	Deliver to raw sockets. This is fun as to avoid copies we want to make no 
+	 *	surplus copies.
+	 *
+	 *	RFC 1122: SHOULD pass TOS value up to the transport layer.
+	 *	-> It does. And not only TOS, but all IP header.
+	 */
+ 
+	/* Note: See raw.c and net/raw.h, RAWV4_HTABLE_SIZE==MAX_INET_PROTOS */
+	hash = iph->protocol & (MAX_INET_PROTOS - 1);
+
+	/* 
+	 *	If there maybe a raw socket we must check - if not we don't care less 
+	 */
+		 
+	if((raw_sk = raw_v4_htable[hash]) != NULL) {
+		struct sock *sknext = NULL;
+		struct sk_buff *skb1;
+		raw_sk = raw_v4_lookup(raw_sk, iph->protocol, iph->saddr, iph->daddr, skb->dev->ifindex);
+		if(raw_sk) {	/* Any raw sockets */
+			do {
+				/* Find the next */
+				sknext = raw_v4_lookup(raw_sk->next, iph->protocol,
+						       iph->saddr, iph->daddr, skb->dev->ifindex);
+				if (iph->protocol != IPPROTO_ICMP || !icmp_filter(raw_sk, skb)) {
+					if (sknext == NULL)
+						break;
+					skb1 = skb_clone(skb, GFP_ATOMIC);
+					if(skb1)
+					{
+						raw_rcv(raw_sk, skb1);
+					}
+				}
+				raw_sk = sknext;
+			} while(raw_sk!=NULL);
+				
+			/*	Here either raw_sk is the last raw socket, or NULL if
+			 *	none.  We deliver to the last raw socket AFTER the
+			 *	protocol checks as it avoids a surplus copy.
+			 */
+		}
+	}
+	
+	/*
+	 *	skb->h.raw now points at the protocol beyond the IP header.
+	 */
+	
+	for (ipprot = (struct inet_protocol *)inet_protos[hash];ipprot != NULL;ipprot=(struct inet_protocol *)ipprot->next)
+	{
+		struct sk_buff *skb2;
+	
+		if (ipprot->protocol != iph->protocol)
+			continue;
+		/*
+		 * 	See if we need to make a copy of it.  This will
+		 * 	only be set if more than one protocol wants it.
+		 * 	and then not for the last one. If there is a pending
+		 *	raw delivery wait for that
+		 */
+	
+		if (ipprot->copy || raw_sk)
+		{
+			skb2 = skb_clone(skb, GFP_ATOMIC);
+			if(skb2==NULL)
+				continue;
+		}
+		else
+		{
+			skb2 = skb;
+		}
+		flag = 1;
+
+		/*
+		 *	Pass on the datagram to each protocol that wants it,
+		 *	based on the datagram protocol.  We should really
+		 *	check the protocol handler's return values here...
+		 */
+
+		ipprot->handler(skb2, ntohs(iph->tot_len) - (iph->ihl * 4));
+	}
+
+	/*
+	 *	All protocols checked.
+	 *	If this packet was a broadcast, we may *not* reply to it, since that
+	 *	causes (proven, grin) ARP storms and a leakage of memory (i.e. all
+	 *	ICMP reply messages get queued up for transmission...)
+	 */
+
+	if(raw_sk!=NULL)	/* Shift to last raw user */
+	{
+		raw_rcv(raw_sk, skb);
+
+	}
+	else if (!flag)		/* Free and report errors */
+	{
+		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0);	
+		kfree_skb(skb);
+	}
+
+	return(0);
+}
+
+/*
+ * 	Main IP Receive routine.
+ */ 
+int ip_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
+{
+	struct iphdr *iph = skb->nh.iph;
+#ifdef	CONFIG_FIREWALL
+	int fwres;
+	u16 rport;
+#endif /* CONFIG_FIREWALL */
+
+	/*
+	 * 	When the interface is in promisc. mode, drop all the crap
+	 * 	that it receives, do not try to analyse it.
+	 */
+	if (skb->pkt_type == PACKET_OTHERHOST)
+		goto drop;
+
+	ip_statistics.IpInReceives++;
+
+	/*
+	 *	RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the checksum.
+	 *
+	 *	Is the datagram acceptable?
+	 *
+	 *	1.	Length at least the size of an ip header
+	 *	2.	Version of 4
+	 *	3.	Checksums correctly. [Speed optimisation for later, skip loopback checksums]
+	 *	4.	Doesn't have a bogus length
+	 */
+
+	if (skb->len < sizeof(struct iphdr))
+		goto inhdr_error; 
+	if (iph->ihl < 5 || iph->version != 4 || ip_fast_csum((u8 *)iph, iph->ihl) != 0)
+		goto inhdr_error; 
+
+	{
+	__u32 len = ntohs(iph->tot_len); 
+	if (skb->len < len)
+		goto inhdr_error; 
+
+	/*
+	 *	Our transport medium may have padded the buffer out. Now we know it
+	 *	is IP we can trim to the true length of the frame.
+	 *	Note this now means skb->len holds ntohs(iph->tot_len).
+	 */
+
+	__skb_trim(skb, len);
+	}
+	
+#ifdef CONFIG_IP_ALWAYS_DEFRAG
+	/* Won't send ICMP reply, since skb->dst == NULL. --RR */
+	if (iph->frag_off & htons(IP_MF|IP_OFFSET)) {
+		skb = ip_defrag(skb);
+		if (!skb)
+			return 0;
+		iph = skb->nh.iph;
+		ip_send_check(iph);
+	}
+#endif
+
+#ifdef CONFIG_FIREWALL
+	/*
+	 *	See if the firewall wants to dispose of the packet. 
+	 *
+	 * We can't do ICMP reply or local delivery before routing,
+	 * so we delay those decisions until after route. --RR
+	 */
+	fwres = call_in_firewall(PF_INET, dev, iph, &rport, &skb);
+	if (fwres < FW_ACCEPT && fwres != FW_REJECT)
+		goto drop;
+	iph = skb->nh.iph;
+#endif /* CONFIG_FIREWALL */
+
+	/*
+	 *	Initialise the virtual path cache for the packet. It describes
+	 *	how the packet travels inside Linux networking.
+	 */ 
+	if (skb->dst == NULL) {
+		if (ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))
+			goto drop; 
+#ifdef CONFIG_CPU_IS_SLOW
+		if (net_cpu_congestion > 10 && !(iph->tos&IPTOS_RELIABILITY) &&
+		    IPTOS_PREC(iph->tos) < IPTOS_PREC_INTERNETCONTROL) {
+			goto drop;
+		}
+#endif
+	}
+
+#ifdef CONFIG_NET_CLS_ROUTE
+	if (skb->dst->tclassid) {
+		u32 idx = skb->dst->tclassid;
+		ip_rt_acct[idx&0xFF].o_packets++;
+		ip_rt_acct[idx&0xFF].o_bytes+=skb->len;
+		ip_rt_acct[(idx>>16)&0xFF].i_packets++;
+		ip_rt_acct[(idx>>16)&0xFF].i_bytes+=skb->len;
+	}
+#endif
+
+	if (iph->ihl > 5) {
+		struct ip_options *opt;
+
+		/* It looks as overkill, because not all
+		   IP options require packet mangling.
+		   But it is the easiest for now, especially taking
+		   into account that combination of IP options
+		   and running sniffer is extremely rare condition.
+		                                      --ANK (980813)
+		*/
+
+		skb = skb_cow(skb, skb_headroom(skb));
+		if (skb == NULL)
+			return 0;
+		iph = skb->nh.iph;
+
+		skb->ip_summed = 0;
+		if (ip_options_compile(NULL, skb))
+			goto inhdr_error;
+
+		opt = &(IPCB(skb)->opt);
+		if (opt->srr) {
+			struct in_device *in_dev = dev->ip_ptr;
+			if (in_dev && !IN_DEV_SOURCE_ROUTE(in_dev)) {
+				if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit())
+					printk(KERN_INFO "source route option %d.%d.%d.%d -> %d.%d.%d.%d\n",
+					       NIPQUAD(iph->saddr), NIPQUAD(iph->daddr));
+				goto drop;
+			}
+			if (ip_options_rcv_srr(skb))
+				goto drop;
+		}
+	}
+
+#ifdef CONFIG_FIREWALL
+#ifdef	CONFIG_IP_TRANSPARENT_PROXY
+	if (fwres == FW_REDIRECT && (IPCB(skb)->redirport = rport) != 0)
+		return ip_local_deliver(skb);
+#endif /* CONFIG_IP_TRANSPARENT_PROXY */
+
+	if (fwres == FW_REJECT) {
+		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
+		goto drop;
+	}
+#endif /* CONFIG_FIREWALL */
+
+	return skb->dst->input(skb);
+
+inhdr_error:
+	ip_statistics.IpInHdrErrors++;
+drop:
+        kfree_skb(skb);
+        return(0);
+}
+
diff --git a/pfinet/linux-src/net/ipv4/ip_masq.c b/pfinet/linux-src/net/ipv4/ip_masq.c
new file mode 100644
index 00000000..0187c58d
--- /dev/null
+++ b/pfinet/linux-src/net/ipv4/ip_masq.c
@@ -0,0 +1,2545 @@
+/*
+ *
+ * 	Masquerading functionality
+ *
+ * 	Copyright (c) 1994 Pauline Middelink
+ *
+ *	$Id: ip_masq.c,v 1.34.2.2 1999/08/07 10:56:28 davem Exp $
+ *
+ *
+ *	See ip_fw.c for original log
+ *
+ * Fixes:
+ *	Juan Jose Ciarlante	:	Modularized application masquerading (see ip_masq_app.c)
+ *	Juan Jose Ciarlante	:	New struct ip_masq_seq that holds output/input delta seq.
+ *	Juan Jose Ciarlante	:	Added hashed lookup by proto,maddr,mport and proto,saddr,sport
+ *	Juan Jose Ciarlante	:	Fixed deadlock if free ports get exhausted
+ *	Juan Jose Ciarlante	:	Added NO_ADDR status flag.
+ *	Richard Lynch		:	Added IP Autoforward
+ *	Nigel Metheringham	:	Added ICMP handling for demasquerade
+ *	Nigel Metheringham	:	Checksum checking of masqueraded data
+ *	Nigel Metheringham	:	Better handling of timeouts of TCP conns
+ *	Delian Delchev		:	Added support for ICMP requests and replys
+ *	Nigel Metheringham	:	ICMP in ICMP handling, tidy ups, bug fixes, made ICMP optional
+ *	Juan Jose Ciarlante	:	re-assign maddr if no packet received from outside
+ *	Juan Jose Ciarlante	:	ported to 2.1 tree
+ *	Juan Jose Ciarlante	:	reworked control connections
+ *	Steven Clarke		:	Added Port Forwarding
+ *	Juan Jose Ciarlante	:	Just ONE ip_masq_new (!)
+ *	Juan Jose Ciarlante	:	IP masq modules support
+ *	Juan Jose Ciarlante	:	don't go into search loop if mport specified
+ *	Juan Jose Ciarlante	:	locking
+ *	Steven Clarke		:	IP_MASQ_S_xx state design
+ *	Juan Jose Ciarlante	:	IP_MASQ_S state implementation 
+ *	Juan Jose Ciarlante	: 	xx_get() clears timer, _put() inserts it
+ *	Juan Jose Ciarlante	: 	create /proc/net/ip_masq/ 
+ *	Juan Jose Ciarlante	: 	reworked checksums (save payload csum if possible)
+ *	Juan Jose Ciarlante	: 	added missing ip_fw_masquerade checksum
+ *	Juan Jose Ciarlante	: 	csum savings
+ *	Juan Jose Ciarlante	: 	added user-space tunnel creation/del, etc
+ *	Juan Jose Ciarlante	: 	(last) moved to ip_masq_user runtime module
+ *	Juan Jose Ciarlante	: 	user timeout handling again
+ *	Juan Jose Ciarlante	: 	make new modules support optional
+ *	Juan Jose Ciarlante	: 	u-space context => locks reworked
+ *	Juan Jose Ciarlante	: 	fixed stupid SMP locking bug
+ *	Juan Jose Ciarlante	: 	fixed "tap"ing in demasq path by copy-on-w
+ *	Juan Jose Ciarlante	: 	make masq_proto_doff() robust against fake sized/corrupted packets
+ *	Kai Bankett		:	do not toss other IP protos in proto_doff()
+ *	Dan Kegel		:	pointed correct NAT behavior for UDP streams
+ *	Julian Anastasov	:	use daddr and dport as hash keys
+ *	
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#ifdef CONFIG_KMOD
+#include <linux/kmod.h>
+#endif
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <asm/system.h>
+#include <linux/stat.h>
+#include <linux/proc_fs.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/inet.h>
+#include <linux/init.h>
+#include <net/protocol.h>
+#include <net/icmp.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <net/checksum.h>
+#include <net/ip_masq.h>
+
+#ifdef CONFIG_IP_MASQUERADE_MOD
+#include <net/ip_masq_mod.h>
+#endif
+
+#include <linux/sysctl.h>
+#include <linux/ip_fw.h>
+#include <linux/ip_masq.h>
+
+int sysctl_ip_masq_debug = 0;
+
+/*
+ *	Exported wrapper 
+ */
+int ip_masq_get_debug_level(void)
+{
+	return sysctl_ip_masq_debug;
+}
+
+struct ip_masq_hook *ip_masq_user_hook = NULL;
+
+/*
+ *	Timeout table[state]
+ */
+/* static int masq_timeout_table[IP_MASQ_S_LAST+1] = { */
+static struct ip_masq_timeout_table masq_timeout_table = {
+	ATOMIC_INIT(0),	/* refcnt */
+	0,		/* scale  */
+	{
+		30*60*HZ,	/*	IP_MASQ_S_NONE,	*/
+		15*60*HZ,	/*	IP_MASQ_S_ESTABLISHED,	*/
+		2*60*HZ,	/*	IP_MASQ_S_SYN_SENT,	*/
+		1*60*HZ,	/*	IP_MASQ_S_SYN_RECV,	*/
+		2*60*HZ,	/*	IP_MASQ_S_FIN_WAIT,	*/
+		2*60*HZ,	/*	IP_MASQ_S_TIME_WAIT,	*/
+		10*HZ,		/*	IP_MASQ_S_CLOSE,	*/
+		60*HZ,		/*	IP_MASQ_S_CLOSE_WAIT,	*/
+		30*HZ,		/*	IP_MASQ_S_LAST_ACK,	*/
+		2*60*HZ,	/*	IP_MASQ_S_LISTEN,	*/
+		5*60*HZ,	/*	IP_MASQ_S_UDP,	*/
+		1*60*HZ,	/*	IP_MASQ_S_ICMP,	*/
+		2*HZ,/*	IP_MASQ_S_LAST	*/
+	},	/* timeout */
+};
+
+#define MASQUERADE_EXPIRE_RETRY      masq_timeout_table.timeout[IP_MASQ_S_TIME_WAIT]
+
+static const char * state_name_table[IP_MASQ_S_LAST+1] = {
+	"NONE",		/*	IP_MASQ_S_NONE,	*/
+	"ESTABLISHED",	/*	IP_MASQ_S_ESTABLISHED,	*/
+	"SYN_SENT",	/*	IP_MASQ_S_SYN_SENT,	*/
+	"SYN_RECV",	/*	IP_MASQ_S_SYN_RECV,	*/
+	"FIN_WAIT",	/*	IP_MASQ_S_FIN_WAIT,	*/
+	"TIME_WAIT",	/*	IP_MASQ_S_TIME_WAIT,	*/
+	"CLOSE",	/*	IP_MASQ_S_CLOSE,	*/
+	"CLOSE_WAIT",	/*	IP_MASQ_S_CLOSE_WAIT,	*/
+	"LAST_ACK",	/*	IP_MASQ_S_LAST_ACK,	*/
+	"LISTEN",	/*	IP_MASQ_S_LISTEN,	*/
+	"UDP",		/*	IP_MASQ_S_UDP,	*/
+	"ICMP",		/*	IP_MASQ_S_ICMP,	*/
+	"BUG!",		/*	IP_MASQ_S_LAST	*/
+};
+
+#define mNO IP_MASQ_S_NONE
+#define mES IP_MASQ_S_ESTABLISHED
+#define mSS IP_MASQ_S_SYN_SENT
+#define mSR IP_MASQ_S_SYN_RECV
+#define mFW IP_MASQ_S_FIN_WAIT
+#define mTW IP_MASQ_S_TIME_WAIT
+#define mCL IP_MASQ_S_CLOSE
+#define mCW IP_MASQ_S_CLOSE_WAIT
+#define mLA IP_MASQ_S_LAST_ACK
+#define mLI IP_MASQ_S_LISTEN
+
+struct masq_tcp_states_t {
+	int next_state[IP_MASQ_S_LAST];	/* should be _LAST_TCP */
+};
+
+const char * ip_masq_state_name(int state)
+{
+	if (state >= IP_MASQ_S_LAST)
+		return "ERR!";
+	return state_name_table[state];
+}
+
+struct masq_tcp_states_t masq_tcp_states [] = {
+/*	INPUT */
+/* 	  mNO, mES, mSS, mSR, mFW, mTW, mCL, mCW, mLA, mLI 	*/
+/*syn*/	{{mSR, mES, mES, mSR, mSR, mSR, mSR, mSR, mSR, mSR }},
+/*fin*/	{{mCL, mCW, mSS, mTW, mTW, mTW, mCL, mCW, mLA, mLI }},
+/*ack*/	{{mCL, mES, mSS, mSR, mFW, mTW, mCL, mCW, mCL, mLI }},
+/*rst*/ {{mCL, mCL, mCL, mSR, mCL, mCL, mCL, mCL, mLA, mLI }},
+
+/*	OUTPUT */
+/* 	  mNO, mES, mSS, mSR, mFW, mTW, mCL, mCW, mLA, mLI 	*/
+/*syn*/	{{mSS, mES, mSS, mES, mSS, mSS, mSS, mSS, mSS, mLI }},
+/*fin*/	{{mTW, mFW, mSS, mTW, mFW, mTW, mCL, mTW, mLA, mLI }},
+/*ack*/	{{mES, mES, mSS, mSR, mFW, mTW, mCL, mCW, mLA, mES }},
+/*rst*/ {{mCL, mCL, mSS, mCL, mCL, mTW, mCL, mCL, mCL, mCL }},
+};
+
+static __inline__ int masq_tcp_state_idx(struct tcphdr *th, int output) 
+{
+	/*
+	 *	[0-3]: input states, [4-7]: output.
+	 */
+	if (output) 
+		output=4;
+
+	if (th->rst)
+		return output+3;
+	if (th->syn)
+		return output+0;
+	if (th->fin)
+		return output+1;
+	if (th->ack)
+		return output+2;
+	return -1;
+}
+
+
+
+static int masq_set_state_timeout(struct ip_masq *ms, int state)
+{
+	struct ip_masq_timeout_table *mstim = ms->timeout_table;
+	int scale;
+
+	/*
+	 *	Use default timeout table if no specific for this entry
+	 */
+	if (!mstim) 
+		mstim = &masq_timeout_table;
+
+	ms->timeout = mstim->timeout[ms->state=state];
+	scale = mstim->scale;
+
+	if (scale<0)
+		ms->timeout >>= -scale;
+	else if (scale > 0)
+		ms->timeout <<= scale;
+
+	return state;
+}
+
+static int masq_tcp_state(struct ip_masq *ms, int output, struct tcphdr *th)
+{
+	int state_idx;
+	int new_state = IP_MASQ_S_CLOSE;
+
+	if ((state_idx = masq_tcp_state_idx(th, output)) < 0) {
+		IP_MASQ_DEBUG(1, "masq_state_idx(%d)=%d!!!\n", 
+			output, state_idx);
+		goto tcp_state_out;
+	}
+
+	new_state = masq_tcp_states[state_idx].next_state[ms->state];
+	
+tcp_state_out:
+	if (new_state!=ms->state)
+		IP_MASQ_DEBUG(1, "%s %s [%c%c%c%c] %08lX:%04X-%08lX:%04X state: %s->%s\n",
+				masq_proto_name(ms->protocol),
+				output? "output" : "input ",
+				th->syn? 'S' : '.',
+				th->fin? 'F' : '.',
+				th->ack? 'A' : '.',
+				th->rst? 'R' : '.',
+				ntohl(ms->saddr), ntohs(ms->sport),
+				ntohl(ms->daddr), ntohs(ms->dport),
+				ip_masq_state_name(ms->state),
+				ip_masq_state_name(new_state));
+	return masq_set_state_timeout(ms, new_state);
+}
+
+
+/*
+ *	Handle state transitions
+ */
+static int masq_set_state(struct ip_masq *ms, int output, struct iphdr *iph, void *tp)
+{
+	switch (iph->protocol) {
+		case IPPROTO_ICMP:
+			return masq_set_state_timeout(ms, IP_MASQ_S_ICMP);
+		case IPPROTO_UDP:
+			return masq_set_state_timeout(ms, IP_MASQ_S_UDP);
+		case IPPROTO_TCP:
+			return masq_tcp_state(ms, output, tp);
+	}
+	return -1;
+}
+
+/*
+ *	Set LISTEN timeout. (ip_masq_put will setup timer)
+ */
+int ip_masq_listen(struct ip_masq *ms)
+{
+	masq_set_state_timeout(ms, IP_MASQ_S_LISTEN);
+	return ms->timeout;
+}
+
+/* 
+ *	Dynamic address rewriting 
+ */
+extern int sysctl_ip_dynaddr;
+
+/*
+ *	Lookup lock
+ */
+rwlock_t __ip_masq_lock = RW_LOCK_UNLOCKED;
+
+/*
+ *	Implement IP packet masquerading
+ */
+
+/*
+ * Converts an ICMP reply code into the equivalent request code
+ */
+static __inline__ const __u8 icmp_type_request(__u8 type)
+{
+   switch (type)
+   {
+      case ICMP_ECHOREPLY: return ICMP_ECHO; break;
+      case ICMP_TIMESTAMPREPLY: return ICMP_TIMESTAMP; break;
+      case ICMP_INFO_REPLY: return ICMP_INFO_REQUEST; break;
+      case ICMP_ADDRESSREPLY: return ICMP_ADDRESS; break;
+      default: return (255); break;
+   }
+}
+
+/*
+ * Helper macros - attempt to make code clearer! 
+ */
+
+/* ID used in ICMP lookups */
+#define icmp_id(icmph)		((icmph->un).echo.id)
+/* (port) hash value using in ICMP lookups for requests */
+#define icmp_hv_req(icmph)	((__u16)(icmph->code+(__u16)(icmph->type<<8)))
+/* (port) hash value using in ICMP lookups for replies */
+#define icmp_hv_rep(icmph)	((__u16)(icmph->code+(__u16)(icmp_type_request(icmph->type)<<8)))
+
+/*
+ *	Last masq_port number in use.
+ *	Will cycle in MASQ_PORT boundaries.
+ */
+static __u16 masq_port = PORT_MASQ_BEGIN;
+static spinlock_t masq_port_lock = SPIN_LOCK_UNLOCKED;
+
+/*
+ *	free ports counters (UDP & TCP)
+ *
+ *	Their value is _less_ or _equal_ to actual free ports:
+ *	same masq port, diff masq addr (firewall iface address) allocated
+ *	entries are accounted but their actually don't eat a more than 1 port.
+ *
+ *	Greater values could lower MASQ_EXPIRATION setting as a way to
+ *	manage 'masq_entries resource'.
+ *
+ *	By default we will reuse masq.port iff (output) connection
+ *	(5-upla) if not duplicated. 
+ *	This may break midentd and others ...
+ */
+
+#ifdef CONFIG_IP_MASQ_NREUSE
+#define PORT_MASQ_MUL 1
+#else
+#define PORT_MASQ_MUL 10
+#endif
+
+/*
+ *	At the moment, hardcore in sync with masq_proto_num
+ */
+atomic_t ip_masq_free_ports[3] = {
+        ATOMIC_INIT((PORT_MASQ_END-PORT_MASQ_BEGIN) * PORT_MASQ_MUL),/* UDP */
+        ATOMIC_INIT((PORT_MASQ_END-PORT_MASQ_BEGIN) * PORT_MASQ_MUL),/* TCP */
+        ATOMIC_INIT((PORT_MASQ_END-PORT_MASQ_BEGIN) * PORT_MASQ_MUL),/* ICMP */
+};
+
+/*
+ *	Counts entries that have been requested with specific mport.
+ *	Used for incoming packets to "relax" input rule (port in MASQ range).
+ */
+atomic_t mport_count = ATOMIC_INIT(0);
+
+EXPORT_SYMBOL(ip_masq_get_debug_level);
+EXPORT_SYMBOL(ip_masq_new);
+EXPORT_SYMBOL(ip_masq_listen);
+EXPORT_SYMBOL(ip_masq_free_ports);
+EXPORT_SYMBOL(ip_masq_out_get);
+EXPORT_SYMBOL(ip_masq_in_get);
+EXPORT_SYMBOL(ip_masq_put);
+EXPORT_SYMBOL(ip_masq_control_add);
+EXPORT_SYMBOL(ip_masq_control_del);
+EXPORT_SYMBOL(ip_masq_control_get);
+EXPORT_SYMBOL(ip_masq_user_hook);
+EXPORT_SYMBOL(ip_masq_state_name);
+EXPORT_SYMBOL(ip_masq_select_addr);
+EXPORT_SYMBOL(__ip_masq_lock);
+EXPORT_SYMBOL(ip_masq_m_table);
+EXPORT_SYMBOL(ip_masq_s_table);
+EXPORT_SYMBOL(ip_masq_d_table);
+
+/*
+ *	3 ip_masq hash double linked tables: 
+ *	  2 for input  m{addr,port}  and output s{addr,port} pkts lookups.
+ *	  1 for extra modules support (daddr)
+ */
+  
+#define IP_MASQ_NTABLES 3
+
+struct list_head ip_masq_m_table[IP_MASQ_TAB_SIZE];
+struct list_head ip_masq_s_table[IP_MASQ_TAB_SIZE];
+struct list_head ip_masq_d_table[IP_MASQ_TAB_SIZE];
+
+/*
+ * timeouts
+ */
+
+#if 000 /* FIXED timeout handling */
+static struct ip_fw_masq ip_masq_dummy = {
+	MASQUERADE_EXPIRE_TCP,
+	MASQUERADE_EXPIRE_TCP_FIN,
+	MASQUERADE_EXPIRE_UDP
+};
+
+EXPORT_SYMBOL(ip_masq_expire);
+struct ip_fw_masq *ip_masq_expire = &ip_masq_dummy;
+#endif
+
+/*
+ *	These flags enable non-strict d{addr,port} checks
+ *	Given that both (in/out) lookup tables are hashed
+ *	by m{addr,port} and s{addr,port} this is quite easy 
+ */
+
+#define MASQ_DADDR_PASS	(IP_MASQ_F_NO_DADDR|IP_MASQ_F_DLOOSE)
+#define MASQ_DPORT_PASS	(IP_MASQ_F_NO_DPORT|IP_MASQ_F_DLOOSE)
+
+/*
+ *	By default enable dest loose semantics
+ */
+#define CONFIG_IP_MASQ_LOOSE_DEFAULT 1
+
+
+/*
+ * 	Set masq expiration (deletion) and adds timer,
+ *	if timeout==0 cancel expiration.
+ *	Warning: it does not check/delete previous timer!
+ */
+
+static void __ip_masq_set_expire(struct ip_masq *ms, unsigned long tout)
+{
+        if (tout) {
+                ms->timer.expires = jiffies+tout;
+                add_timer(&ms->timer);
+        } else {
+                del_timer(&ms->timer);
+        }
+}
+
+
+/*
+ *	Returns hash value
+ */
+
+static __inline__ unsigned 
+ip_masq_hash_key(unsigned proto, __u32 addr, __u16 port)
+{
+        return (proto^ntohl(addr)^ntohs(port)) & (IP_MASQ_TAB_SIZE-1);
+}
+
+/*
+ *	Hashes ip_masq by its proto,addrs,ports.
+ *	should be called with locked tables.
+ *	returns bool success.
+ */
+
+static int ip_masq_hash(struct ip_masq *ms)
+{
+        unsigned hash;
+
+        if (ms->flags & IP_MASQ_F_HASHED) {
+                IP_MASQ_ERR( "ip_masq_hash(): request for already hashed, called from %p\n",
+			__builtin_return_address(0));
+                return 0;
+        }
+	atomic_add(IP_MASQ_NTABLES, &ms->refcnt);
+
+	if ((ms->flags & (MASQ_DADDR_PASS | MASQ_DPORT_PASS |
+		IP_MASQ_F_SIMPLE_HASH)) == 0)
+		/*
+		 *	Hash by proto,m{addr,port},d{addr,port}
+		 */
+		hash = ip_masq_hash_key(ms->protocol,
+			ms->maddr^ms->daddr, ms->mport^ms->dport);
+	else
+		/*
+		 *	Hash by proto,m{addr,port}
+		 */
+		hash = ip_masq_hash_key(ms->protocol, ms->maddr, ms->mport);
+
+	list_add(&ms->m_list, &ip_masq_m_table[hash]);
+
+	if ((ms->flags & (MASQ_DADDR_PASS | MASQ_DPORT_PASS |
+		IP_MASQ_F_NO_SADDR | IP_MASQ_F_NO_SPORT |
+		IP_MASQ_F_SIMPLE_HASH)) == 0)
+		/*
+		 *	Hash by proto,s{addr,port},d{addr,port}
+		 */
+		hash = ip_masq_hash_key(ms->protocol,
+			ms->saddr^ms->daddr, ms->sport^ms->dport);
+	else
+		/*
+		 *	Hash by proto,s{addr,port}
+		 */
+		hash = ip_masq_hash_key(ms->protocol, ms->saddr, ms->sport);
+
+	list_add(&ms->s_list, &ip_masq_s_table[hash]);
+
+        /*
+         *	Hash by proto,d{addr,port}
+         */
+        hash = ip_masq_hash_key(ms->protocol, ms->daddr, ms->dport);
+	list_add(&ms->d_list, &ip_masq_d_table[hash]);
+
+
+        ms->flags |= IP_MASQ_F_HASHED;
+        return 1;
+}
+
+/*
+ *	UNhashes ip_masq from ip_masq_[ms]_tables.
+ *	should be called with locked tables.
+ *	returns bool success.
+ */
+
+static int ip_masq_unhash(struct ip_masq *ms)
+{
+        if (!(ms->flags & IP_MASQ_F_HASHED)) {
+                IP_MASQ_ERR( "ip_masq_unhash(): request for unhash flagged, called from %p\n",
+			__builtin_return_address(0));
+                return 0;
+        }
+	list_del(&ms->m_list);
+	list_del(&ms->s_list);
+	list_del(&ms->d_list);
+
+	atomic_sub(IP_MASQ_NTABLES, &ms->refcnt);
+
+        ms->flags &= ~IP_MASQ_F_HASHED;
+        return 1;
+}
+
+/*
+ *	Returns ip_masq associated with supplied parameters, either
+ *	broken out of the ip/tcp headers or directly supplied for those
+ *	pathological protocols with address/port in the data stream
+ *	(ftp, irc).  addresses and ports are in network order.
+ *	called for pkts coming from OUTside-to-INside the firewall.
+ *
+ *	s_addr, s_port: pkt source address (foreign host)
+ *	d_addr, d_port: pkt dest address (firewall)
+ *
+ * 	NB. Cannot check destination address, just for the incoming port.
+ * 	reason: archie.doc.ac.uk has 6 interfaces, you send to
+ * 	phoenix and get a reply from any other interface(==dst)!
+ *
+ * 	[Only for UDP] - AC
+ *	
+ *	Caller must lock tables
+ */
+
+static struct ip_masq * __ip_masq_in_get(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port)
+{
+        unsigned hash;
+        struct ip_masq *ms = NULL;
+	struct list_head *l,*e;
+
+	hash = ip_masq_hash_key(protocol, d_addr^s_addr, d_port^s_port);
+
+	l = &ip_masq_m_table[hash];
+	for (e=l->next; e!=l; e=e->next) {
+		ms = list_entry(e, struct ip_masq, m_list);
+		if (s_port==ms->dport && s_addr==ms->daddr &&
+		    d_port==ms->mport && protocol==ms->protocol &&
+		    d_addr==ms->maddr &&
+		    ((ms->flags & (MASQ_DADDR_PASS | MASQ_DPORT_PASS)) == 0)
+		    ) {
+			IP_MASQ_DEBUG(2, "look/in %d %08X:%04hX->%08X:%04hX OK\n",
+			       protocol,
+			       s_addr,
+			       s_port,
+			       d_addr,
+			       d_port);
+			atomic_inc(&ms->refcnt);
+                        goto out;
+		}
+        }
+
+        hash = ip_masq_hash_key(protocol, d_addr, d_port);
+
+	l = &ip_masq_m_table[hash];
+	for (e=l->next; e!=l; e=e->next) {
+		ms = list_entry(e, struct ip_masq, m_list);
+		if (protocol==ms->protocol && 
+		    (d_addr==ms->maddr && d_port==ms->mport) &&
+		    (s_addr==ms->daddr || ms->flags & MASQ_DADDR_PASS) &&
+		    (s_port==ms->dport || ms->flags & MASQ_DPORT_PASS)
+		    ) {
+			IP_MASQ_DEBUG(2, "look/in %d %08X:%04hX->%08X:%04hX OK\n",
+			       protocol,
+			       s_addr,
+			       s_port,
+			       d_addr,
+			       d_port);
+			atomic_inc(&ms->refcnt);
+                        goto out;
+		}
+        }
+	IP_MASQ_DEBUG(2, "look/in %d %08X:%04hX->%08X:%04hX fail\n",
+	       protocol,
+	       s_addr,
+	       s_port,
+	       d_addr,
+	       d_port);
+
+	ms = NULL;
+out:
+        return ms;
+}
+
+/*
+ *	Returns ip_masq associated with supplied parameters, either
+ *	broken out of the ip/tcp headers or directly supplied for those
+ *	pathological protocols with address/port in the data stream
+ *	(ftp, irc).  addresses and ports are in network order.
+ *	called for pkts coming from inside-to-OUTside the firewall.
+ *
+ *	Normally we know the source address and port but for some protocols
+ *	(e.g. ftp PASV) we do not know the source port initially.  Alas the
+ *	hash is keyed on source port so if the first lookup fails then try again
+ *	with a zero port, this time only looking at entries marked "no source
+ *	port".
+ *	
+ *	Caller must lock tables
+ */
+
+static struct ip_masq * __ip_masq_out_get(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port)
+{
+        unsigned hash;
+        struct ip_masq *ms = NULL;
+	struct list_head *l,*e;
+
+	/*	
+	 *	Check for "full" addressed entries
+	 */
+	hash = ip_masq_hash_key(protocol, s_addr^d_addr, s_port^d_port);
+
+	l = &ip_masq_s_table[hash];
+	for (e=l->next; e!=l; e=e->next) {
+		ms = list_entry(e, struct ip_masq, s_list);
+		if (d_addr==ms->daddr && d_port==ms->dport &&
+		   s_addr==ms->saddr && s_port==ms->sport &&
+		   protocol==ms->protocol &&
+		   ((ms->flags & (MASQ_DADDR_PASS | MASQ_DPORT_PASS |
+		   IP_MASQ_F_NO_SADDR | IP_MASQ_F_NO_SPORT)) == 0)
+                   ) {
+			IP_MASQ_DEBUG(2, "lk/out0 %d %08X:%04hX->%08X:%04hX OK\n",
+			       protocol,
+			       s_addr,
+			       s_port,
+			       d_addr,
+			       d_port);
+
+			atomic_inc(&ms->refcnt);
+			goto out;
+		}
+
+        }
+
+        hash = ip_masq_hash_key(protocol, s_addr, s_port);
+	
+	l = &ip_masq_s_table[hash];
+	for (e=l->next; e!=l; e=e->next) {
+		ms = list_entry(e, struct ip_masq, s_list);
+		if (protocol == ms->protocol &&
+		    s_addr == ms->saddr && s_port == ms->sport &&
+		    (d_addr==ms->daddr || ms->flags & MASQ_DADDR_PASS) &&
+		    (d_port==ms->dport || ms->flags & MASQ_DPORT_PASS)
+                   ) {
+			IP_MASQ_DEBUG(2, "lk/out1 %d %08X:%04hX->%08X:%04hX OK\n",
+			       protocol,
+			       s_addr,
+			       s_port,
+			       d_addr,
+			       d_port);
+
+			atomic_inc(&ms->refcnt);
+			goto out;
+		}
+
+        }
+
+	/*	
+	 *	Check for NO_SPORT entries
+	 */
+        hash = ip_masq_hash_key(protocol, s_addr, 0);
+	l = &ip_masq_s_table[hash];
+	for (e=l->next; e!=l; e=e->next) {
+		ms = list_entry(e, struct ip_masq, s_list);
+		if (ms->flags & IP_MASQ_F_NO_SPORT &&
+		    protocol == ms->protocol &&
+		    s_addr == ms->saddr && 
+		    (d_addr==ms->daddr || ms->flags & MASQ_DADDR_PASS) &&
+		    (d_port==ms->dport || ms->flags & MASQ_DPORT_PASS)
+                    ) {
+			IP_MASQ_DEBUG(2, "lk/out2 %d %08X:%04hX->%08X:%04hX OK\n",
+			       protocol,
+			       s_addr,
+			       s_port,
+			       d_addr,
+			       d_port);
+
+			atomic_inc(&ms->refcnt);
+                        goto out;
+		}
+        }
+	IP_MASQ_DEBUG(2, "lk/out1 %d %08X:%04hX->%08X:%04hX fail\n",
+	       protocol,
+	       s_addr,
+	       s_port,
+	       d_addr,
+	       d_port);
+
+	ms = NULL;
+out:
+        return ms;
+}
+
+#ifdef CONFIG_IP_MASQ_NREUSE
+/*
+ *	Returns ip_masq for given proto,m_addr,m_port.
+ *      called by allocation routine to find an unused m_port.
+ *	
+ *	Caller must lock tables
+ */
+
+static struct ip_masq * __ip_masq_getbym(int protocol, __u32 m_addr, __u16 m_port)
+{
+        unsigned hash;
+        struct ip_masq *ms = NULL;
+
+        hash = ip_masq_hash_key(protocol, m_addr, m_port);
+
+        for(ms = ip_masq_m_tab[hash]; ms ; ms = ms->m_link) {
+ 		if ( protocol==ms->protocol &&
+                    (m_addr==ms->maddr && m_port==ms->mport)) {
+			atomic_inc(&ms->refcnt);
+			goto out;
+		}
+        }
+
+out:
+        return ms;
+}
+#endif
+
+struct ip_masq * ip_masq_out_get(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port) 
+{
+	struct ip_masq *ms;
+
+	read_lock(&__ip_masq_lock);
+	ms = __ip_masq_out_get(protocol, s_addr, s_port, d_addr, d_port);
+	read_unlock(&__ip_masq_lock);
+
+	if (ms)
+		__ip_masq_set_expire(ms, 0);
+	return ms;
+}
+
+struct ip_masq * ip_masq_in_get(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port)
+{
+	struct ip_masq *ms;
+
+	read_lock(&__ip_masq_lock);
+	ms =  __ip_masq_in_get(protocol, s_addr, s_port, d_addr, d_port);
+	read_unlock(&__ip_masq_lock);
+
+	if (ms)
+		__ip_masq_set_expire(ms, 0);
+	return ms;
+}
+
+static __inline__ void __ip_masq_put(struct ip_masq *ms) 
+{
+	atomic_dec(&ms->refcnt);
+}
+
+void ip_masq_put(struct ip_masq *ms)
+{
+	/*
+	 *	Decrement refcnt
+	 */
+	__ip_masq_put(ms);
+
+	/*
+	 *	if refcnt==IP_MASQ_NTABLES
+	 */
+	if (atomic_read(&ms->refcnt)==IP_MASQ_NTABLES) {
+		__ip_masq_set_expire(ms, ms->timeout);
+	} else {
+		IP_MASQ_DEBUG(0, "did not set timer with refcnt=%d, called from %p\n",
+			atomic_read(&ms->refcnt),
+			__builtin_return_address(0));
+	}
+}
+
+static void masq_expire(unsigned long data)
+{
+	struct ip_masq *ms = (struct ip_masq *)data;
+	ms->timeout = MASQUERADE_EXPIRE_RETRY;
+
+	/*
+	 *	hey, I'm using it
+	 */
+	atomic_inc(&ms->refcnt);
+
+	IP_MASQ_DEBUG(1, "Masqueraded %s %08lX:%04X expired\n",
+			masq_proto_name(ms->protocol),
+			ntohl(ms->saddr),ntohs(ms->sport));
+
+	write_lock(&__ip_masq_lock);
+
+#if 0000
+	/*
+	 *	Already locked, do bounce ...
+	 */
+	if (ip_masq_nlocks(&__ip_masq_lock) != 1) {
+		goto masq_expire_later;
+	}
+
+#endif
+	/*
+	 * 	do I control anybody?
+	 */
+	if (atomic_read(&ms->n_control)) 
+		goto masq_expire_later;
+
+	/* 	
+	 *	does anybody controls me?
+	 */
+
+	if (ms->control) 
+		ip_masq_control_del(ms);
+
+        if (ip_masq_unhash(ms)) {
+		if (ms->flags&IP_MASQ_F_MPORT) {
+			atomic_dec(&mport_count);
+		} else {
+			atomic_inc(ip_masq_free_ports + masq_proto_num(ms->protocol));
+		}
+		ip_masq_unbind_app(ms);
+        }
+
+	/*
+	 *	refcnt==1 implies I'm the only one referrer
+	 */
+	if (atomic_read(&ms->refcnt) == 1) {
+		kfree_s(ms,sizeof(*ms));
+		MOD_DEC_USE_COUNT;
+		goto masq_expire_out;
+	}
+
+masq_expire_later:
+	IP_MASQ_DEBUG(0, "masq_expire delayed: %s %08lX:%04X->%08lX:%04X masq.refcnt-1=%d masq.n_control=%d\n",
+		masq_proto_name(ms->protocol),
+		ntohl(ms->saddr), ntohs(ms->sport),
+		ntohl(ms->daddr), ntohs(ms->dport),
+		atomic_read(&ms->refcnt)-1,
+		atomic_read(&ms->n_control));
+
+	ip_masq_put(ms);
+
+masq_expire_out:
+	write_unlock(&__ip_masq_lock);
+}
+
+static __u16 get_next_mport(void)
+{
+	__u16 mport;
+	
+	spin_lock_irq(&masq_port_lock);
+	/*
+	 *	Try the next available port number
+	 */
+	mport = htons(masq_port++);
+	if (masq_port==PORT_MASQ_END) masq_port = PORT_MASQ_BEGIN;
+
+	spin_unlock_irq(&masq_port_lock);
+	return mport;
+}
+
+/*
+ * 	Create a new masquerade list entry, also allocate an
+ * 	unused mport, keeping the portnumber between the
+ * 	given boundaries MASQ_BEGIN and MASQ_END.
+ *
+ * 	Be careful, it can be called from u-space
+ */
+
+struct ip_masq * ip_masq_new(int proto, __u32 maddr, __u16 mport, __u32 saddr, __u16 sport, __u32 daddr, __u16 dport, unsigned mflags)
+{
+        struct ip_masq *ms, *mst;
+        int ports_tried;
+	atomic_t *free_ports_p = NULL;
+        static int n_fails = 0;
+	int prio;
+
+
+	if (masq_proto_num(proto)!=-1 && mport == 0) {
+		free_ports_p = ip_masq_free_ports + masq_proto_num(proto);
+
+		if (atomic_read(free_ports_p) == 0) {
+			if (++n_fails < 5)
+				IP_MASQ_ERR( "ip_masq_new(proto=%s): no free ports.\n",
+				       masq_proto_name(proto));
+			return NULL;
+		}
+	}
+
+	prio = (mflags&IP_MASQ_F_USER) ? GFP_KERNEL : GFP_ATOMIC;
+
+        ms = (struct ip_masq *) kmalloc(sizeof(struct ip_masq), prio);
+        if (ms == NULL) {
+                if (++n_fails < 5)
+                        IP_MASQ_ERR("ip_masq_new(proto=%s): no memory available.\n",
+                               masq_proto_name(proto));
+                return NULL;
+        }
+	MOD_INC_USE_COUNT;
+        memset(ms, 0, sizeof(*ms));
+	INIT_LIST_HEAD(&ms->s_list);
+	INIT_LIST_HEAD(&ms->m_list);
+	INIT_LIST_HEAD(&ms->d_list);
+	init_timer(&ms->timer);
+	ms->timer.data     = (unsigned long)ms;
+	ms->timer.function = masq_expire;
+        ms->protocol	   = proto;
+        ms->saddr    	   = saddr;
+        ms->sport	   = sport;
+        ms->daddr	   = daddr;
+        ms->dport	   = dport;
+        ms->flags	   = mflags;
+        ms->app_data	   = NULL;
+        ms->control	   = NULL;
+	
+	atomic_set(&ms->n_control,0);
+	atomic_set(&ms->refcnt,0);
+
+        if (proto == IPPROTO_UDP && !mport)
+#ifdef CONFIG_IP_MASQ_LOOSE_DEFAULT
+		/*
+		 *	Flag this tunnel as "dest loose"
+		 *	
+		 */
+		ms->flags |= IP_MASQ_F_DLOOSE;
+#else
+                ms->flags |= IP_MASQ_F_NO_DADDR;
+#endif
+
+        
+        /* get masq address from rif */
+        ms->maddr	   = maddr;
+
+        /*
+         *	This flag will allow masq. addr (ms->maddr)
+         *	to follow forwarding interface address.
+         */
+        ms->flags         |= IP_MASQ_F_NO_REPLY;
+  
+  	/*
+	 * 	We want a specific mport. Be careful.
+	 */
+	if (masq_proto_num(proto) == -1 || mport) {
+		ms->mport = mport;
+
+		/* 
+		 *	Check 5-upla uniqueness
+		 */
+		if (mflags & IP_MASQ_F_USER) 	
+			write_lock_bh(&__ip_masq_lock);
+		else 
+			write_lock(&__ip_masq_lock);
+
+                mst = __ip_masq_in_get(proto, daddr, dport, maddr, mport);
+		if (mst==NULL) {
+			ms->flags |= IP_MASQ_F_MPORT;
+
+			atomic_inc(&mport_count);
+                        ip_masq_hash(ms);
+
+			if (mflags & IP_MASQ_F_USER) 	
+				write_unlock_bh(&__ip_masq_lock);
+			else 
+				write_unlock(&__ip_masq_lock);
+
+			ip_masq_bind_app(ms);
+			atomic_inc(&ms->refcnt);
+			masq_set_state_timeout(ms, IP_MASQ_S_NONE);
+			return ms;
+		}
+		if (mflags & IP_MASQ_F_USER) 	
+			write_unlock_bh(&__ip_masq_lock);
+		else 
+			write_unlock(&__ip_masq_lock);
+
+		__ip_masq_put(mst);
+
+		IP_MASQ_ERR( "Already used connection: %s, %d.%d.%d.%d:%d => %d.%d.%d.%d:%d, called from %p\n",
+			masq_proto_name(proto),
+			NIPQUAD(maddr), ntohs(mport),
+			NIPQUAD(daddr), ntohs(dport),
+			__builtin_return_address(0));
+
+
+		goto mport_nono;
+	}
+	
+
+        for (ports_tried = 0; 
+	     (atomic_read(free_ports_p) && (ports_tried <= (PORT_MASQ_END - PORT_MASQ_BEGIN)));
+	     ports_tried++){
+
+		mport = ms->mport = get_next_mport();
+		/*
+		 *	lookup to find out if this connection is used.
+		 */
+
+		if (mflags & IP_MASQ_F_USER) 
+			write_lock_bh(&__ip_masq_lock);
+		else
+			write_lock(&__ip_masq_lock);
+
+#ifdef CONFIG_IP_MASQ_NREUSE
+		mst = __ip_masq_getbym(proto, maddr, mport);
+#else
+		mst = __ip_masq_in_get(proto, daddr, dport, maddr, mport);
+#endif
+		if (mst == NULL) {
+
+			if (atomic_read(free_ports_p) == 0) {
+				if (mflags & IP_MASQ_F_USER) 
+					write_unlock_bh(&__ip_masq_lock);
+				else
+					write_unlock(&__ip_masq_lock);
+
+				break;
+			}
+			atomic_dec(free_ports_p);
+			ip_masq_hash(ms);
+
+			if (mflags & IP_MASQ_F_USER) 
+				write_unlock_bh(&__ip_masq_lock);
+			else
+				write_unlock(&__ip_masq_lock);
+
+			ip_masq_bind_app(ms);
+			n_fails = 0;
+			atomic_inc(&ms->refcnt);
+			masq_set_state_timeout(ms, IP_MASQ_S_NONE);
+			return ms;
+		}
+		if (mflags & IP_MASQ_F_USER) 
+			write_unlock_bh(&__ip_masq_lock);
+		else
+			write_unlock(&__ip_masq_lock);
+
+		__ip_masq_put(mst);
+        }
+
+        if (++n_fails < 5)
+                IP_MASQ_ERR( "ip_masq_new(proto=%s): could not get free masq entry (free=%d).\n",
+                       masq_proto_name(ms->protocol), 
+		       atomic_read(free_ports_p));
+mport_nono:
+        kfree_s(ms, sizeof(*ms));
+
+	MOD_DEC_USE_COUNT;
+        return NULL;
+}
+
+/*
+ *	Get transport protocol data offset, check against size
+ *	return:
+ *		0  if other IP proto
+ *		-1 if error
+ */
+static __inline__ int proto_doff(unsigned proto, char *th, unsigned size)
+{
+	int ret = -1;
+	switch (proto) {
+		case IPPROTO_ICMP:
+			if (size >= sizeof(struct icmphdr))
+				ret = sizeof(struct icmphdr);
+			break;
+		case IPPROTO_UDP:
+			if (size >= sizeof(struct udphdr))
+				ret = sizeof(struct udphdr);
+			break;
+		case IPPROTO_TCP:
+			/*
+			*	Is this case, this check _also_ avoids
+			*	touching an invalid pointer if 
+			*	size is invalid
+			*/
+			if (size >= sizeof(struct tcphdr)) {
+				ret = ((struct tcphdr*)th)->doff << 2;
+				if (ret > size) {
+					ret = -1 ;
+				}
+			}
+
+			break;
+		default:
+			/* 	Other proto: nothing to say, by now :) */
+			ret = 0;
+	}
+	if (ret < 0)
+		IP_MASQ_DEBUG(0, "mess proto_doff for proto=%d, size =%d\n",
+			proto, size);
+	return ret;
+}
+
+int ip_fw_masquerade(struct sk_buff **skb_p, __u32 maddr)
+{
+	struct sk_buff  *skb = *skb_p;
+	struct iphdr	*iph = skb->nh.iph;
+	union ip_masq_tphdr h;
+	struct ip_masq	*ms;
+	int		size;
+
+	/* 
+	 * 	doff holds transport protocol data offset
+	 *	csum holds its checksum
+	 *	csum_ok says if csum is valid
+	 */
+	int doff = 0;
+	int csum = 0;
+	int csum_ok = 0;
+
+	/*
+	 * We can only masquerade protocols with ports... and hack some ICMPs
+	 */
+
+	h.raw = (char*) iph + iph->ihl * 4;
+	size = ntohs(iph->tot_len) - (iph->ihl * 4);
+
+
+	doff = proto_doff(iph->protocol, h.raw, size);
+	if (doff <= 0) {
+		/*	
+		 *	Output path: do not pass other IP protos nor
+		 *	invalid packets.
+		 */
+		return -1;
+	}
+
+	switch (iph->protocol) {
+	case IPPROTO_ICMP:
+		return(ip_fw_masq_icmp(skb_p, maddr));
+	case IPPROTO_UDP:
+		if (h.uh->check == 0)
+			/* No UDP checksum */
+			break;
+	case IPPROTO_TCP:
+		/* Make sure packet is in the masq range */
+		IP_MASQ_DEBUG(3, "O-pkt: %s size=%d\n",
+				masq_proto_name(iph->protocol),
+				size);
+
+#ifdef CONFIG_IP_MASQ_DEBUG
+		if (ip_masq_get_debug_level() > 3) {
+			skb->ip_summed = CHECKSUM_NONE;
+		}
+#endif
+		/* Check that the checksum is OK */
+		switch (skb->ip_summed)
+		{
+			case CHECKSUM_NONE:
+			{
+				csum = csum_partial(h.raw + doff, size - doff, 0);
+				IP_MASQ_DEBUG(3, "O-pkt: %s I-datacsum=%d\n",
+						masq_proto_name(iph->protocol),
+						csum);
+
+				skb->csum = csum_partial(h.raw , doff, csum);
+			}
+			case CHECKSUM_HW:
+				if (csum_tcpudp_magic(iph->saddr, iph->daddr, 
+						size, iph->protocol, skb->csum))
+				{
+					IP_MASQ_DEBUG(0, "Outgoing failed %s checksum from %d.%d.%d.%d (size=%d)!\n",
+					       masq_proto_name(iph->protocol),
+					       NIPQUAD(iph->saddr),
+					       size);
+					return -1;
+				}
+			default:
+				/* CHECKSUM_UNNECESSARY */
+		}
+		break;
+	default:
+		return -1;
+	}
+	/*
+	 *	Now hunt the list to see if we have an old entry
+	 */
+
+	/* h.raw = (char*) iph + iph->ihl * 4; */
+
+ 	IP_MASQ_DEBUG(2, "Outgoing %s %08lX:%04X -> %08lX:%04X\n",
+  		masq_proto_name(iph->protocol),
+  		ntohl(iph->saddr), ntohs(h.portp[0]),
+  		ntohl(iph->daddr), ntohs(h.portp[1]));
+
+        ms = ip_masq_out_get_iph(iph);
+        if (ms!=NULL) {
+
+                /*
+                 *	If sysctl !=0 and no pkt has been received yet
+                 *	in this tunnel and routing iface address has changed...
+                 *	 "You are welcome, diald".
+                 */
+                if ( sysctl_ip_dynaddr && ms->flags & IP_MASQ_F_NO_REPLY && maddr != ms->maddr) {
+
+                        if (sysctl_ip_dynaddr > 1) {
+                                IP_MASQ_INFO( "ip_fw_masquerade(): change masq.addr from %d.%d.%d.%d to %d.%d.%d.%d\n",
+                                       NIPQUAD(ms->maddr),NIPQUAD(maddr));
+                        }
+
+			write_lock(&__ip_masq_lock);
+
+                        ip_masq_unhash(ms);
+                        ms->maddr = maddr;
+                        ip_masq_hash(ms);
+
+			write_unlock(&__ip_masq_lock);
+                }
+                
+		/*
+		 *      Set sport if not defined yet (e.g. ftp PASV).  Because
+		 *	masq entries are hashed on sport, unhash with old value
+		 *	and hash with new.
+		 */
+
+		if ( ms->flags & IP_MASQ_F_NO_SPORT && ms->protocol == IPPROTO_TCP ) {
+
+			write_lock(&__ip_masq_lock);
+			
+			ip_masq_unhash(ms);
+			ms->flags &= ~IP_MASQ_F_NO_SPORT;
+			ms->sport = h.portp[0];
+			ip_masq_hash(ms);	/* hash on new sport */
+
+			write_unlock(&__ip_masq_lock);
+			
+			IP_MASQ_DEBUG(1, "ip_fw_masquerade(): filled sport=%d\n",
+			       ntohs(ms->sport));
+		}
+		if (ms->flags & IP_MASQ_F_DLOOSE) {
+			/*
+			 *	update dest loose values
+			 */
+			ms->dport = h.portp[1];
+			ms->daddr = iph->daddr;
+		}
+        } else {
+		/*
+		 *	Nope, not found, create a new entry for it
+		 */
+
+#ifdef CONFIG_IP_MASQUERADE_MOD
+		if (!(ms = ip_masq_mod_out_create(skb, iph, maddr))) 
+#endif
+			ms = ip_masq_new(iph->protocol,
+					maddr, 0,
+					iph->saddr, h.portp[0],
+					iph->daddr, h.portp[1],
+					0);
+                if (ms == NULL)
+			return -1;
+ 	}
+
+	/*
+ 	 * 	Call module's output update hook
+	 */
+
+#ifdef CONFIG_IP_MASQUERADE_MOD
+	ip_masq_mod_out_update(skb, iph, ms);
+#endif
+
+ 	/*
+ 	 *	Change the fragments origin
+ 	 */
+
+ 	size = skb->len - (h.raw - skb->nh.raw);
+
+        /*
+         *	Set iph addr and port from ip_masq obj.
+         */
+ 	iph->saddr = ms->maddr;
+ 	h.portp[0] = ms->mport;
+
+	/*
+	 *	Invalidate csum saving if tunnel has masq helper
+	 */
+
+	if (ms->app) 
+		csum_ok = 0;
+
+ 	/*
+ 	 *	Attempt ip_masq_app call.
+         *	will fix ip_masq and iph seq stuff
+ 	 */
+        if (ip_masq_app_pkt_out(ms, skb_p, maddr) != 0)
+	{
+                /*
+                 *	skb has possibly changed, update pointers.
+                 */
+                skb = *skb_p;
+                iph = skb->nh.iph;
+		h.raw = (char*) iph + iph->ihl *4;
+                size = skb->len - (h.raw - skb->nh.raw);
+		/* doff should have not changed */
+        }
+
+ 	/*
+ 	 *	Adjust packet accordingly to protocol
+ 	 */
+
+	/*
+	 *	Transport's payload partial csum
+	 */
+
+	if (!csum_ok) {
+		csum = csum_partial(h.raw + doff, size - doff, 0);
+	}
+	skb->csum = csum;
+
+	IP_MASQ_DEBUG(3, "O-pkt: %s size=%d O-datacsum=%d\n",
+			masq_proto_name(iph->protocol),
+			size,
+			csum);
+
+	/*
+	 * 	Protocol csum
+	 */
+	switch (iph->protocol) {
+		case IPPROTO_TCP:
+			h.th->check = 0;
+			h.th->check=csum_tcpudp_magic(iph->saddr, iph->daddr, 
+					size, iph->protocol, 
+					csum_partial(h.raw , doff, csum));
+			IP_MASQ_DEBUG(3, "O-pkt: %s O-csum=%d (+%d)\n",
+					masq_proto_name(iph->protocol),
+					h.th->check,
+					(char*) & (h.th->check) - (char*) h.raw);
+
+			break;
+		case IPPROTO_UDP:
+			h.uh->check = 0;
+			h.uh->check=csum_tcpudp_magic(iph->saddr, iph->daddr, 
+					size, iph->protocol, 
+					csum_partial(h.raw , doff, csum));
+			if (h.uh->check == 0) 
+				h.uh->check = 0xFFFF;
+			IP_MASQ_DEBUG(3, "O-pkt: %s O-csum=%d (+%d)\n",
+					masq_proto_name(iph->protocol),
+					h.uh->check,
+					(char*) &(h.uh->check)- (char*) h.raw);
+			break;
+	}
+	ip_send_check(iph);
+
+  	IP_MASQ_DEBUG(2, "O-routed from %08lX:%04X with masq.addr %08lX\n",
+		ntohl(ms->maddr),ntohs(ms->mport),ntohl(maddr));
+
+	masq_set_state(ms, 1, iph, h.portp);
+	ip_masq_put(ms);
+
+	return 0;
+ }
+
+/*
+ *	Restore original addresses and ports in the original IP
+ *	datagram if the failing packet has been [de]masqueraded.
+ *	This is ugly in the extreme.  We no longer have the original
+ *	packet so we have to reconstruct it from the failing packet
+ *	plus data in the masq tables.  The resulting "original data"
+ *	should be good enough to tell the sender which session to
+ *	throttle.  Relies on far too much knowledge of masq internals,
+ *	there ought to be a better way - KAO 990303.
+ *
+ *	Moved here from icmp.c - JJC.
+ *	Already known: type == ICMP_DEST_UNREACH, IPSKB_MASQUERADED
+ *	skb->nh.iph points to original header.
+ *
+ *	Must try both OUT and IN tables; we could add a flag
+ *	ala IPSKB_MASQUERADED to avoid 2nd tables lookup, but this is VERY
+ *	unlike because routing makes mtu decision before reaching 
+ *	ip_fw_masquerade().
+ *	
+ */
+int ip_fw_unmasq_icmp(struct sk_buff *skb) {
+	struct ip_masq *ms;
+	struct iphdr *iph = skb->nh.iph;
+	__u16 *portp = (__u16 *)&(((char *)iph)[iph->ihl*4]);
+
+	/* 
+	 *	Always called from _bh context: use read_[un]lock()
+	 */
+
+	/*
+	 * 	Peek "out" table, this packet has bounced:
+	 *	out->in(frag_needed!)->OUT[icmp]
+	 *
+	 *	iph->daddr is IN host
+	 *	iph->saddr is OUT host
+	 */
+	read_lock(&__ip_masq_lock);
+	ms = __ip_masq_out_get(iph->protocol,
+			iph->daddr, portp[1],
+			iph->saddr, portp[0]);
+	read_unlock(&__ip_masq_lock);
+	if (ms) {
+		IP_MASQ_DEBUG(1, "Incoming frag_need rewrited from %d.%d.%d.%d to %d.%d.%d.%d\n",
+			NIPQUAD(iph->daddr), NIPQUAD(ms->maddr));
+		iph->daddr = ms->maddr;
+		portp[1] = ms->mport;
+		__ip_masq_put(ms);
+		return 1;
+	}
+	/*
+	 * 	Peek "in" table
+	 *	in->out(frag_needed!)->IN[icmp]
+	 *
+	 *	iph->daddr is OUT host
+	 *	iph->saddr is MASQ host
+	 *
+	 */
+	read_lock(&__ip_masq_lock);
+	ms = __ip_masq_in_get(iph->protocol,
+			iph->daddr, portp[1],
+			iph->saddr, portp[0]);
+	read_unlock(&__ip_masq_lock);
+	if (ms) {
+		IP_MASQ_DEBUG(1, "Outgoing frag_need rewrited from %d.%d.%d.%d to %d.%d.%d.%d\n",
+			NIPQUAD(iph->saddr), NIPQUAD(ms->saddr));
+		iph->saddr = ms->saddr;
+		portp[0] = ms->sport;
+		__ip_masq_put(ms);
+		return 1;
+	}
+	return 0;
+
+}
+/*
+ *	Handle ICMP messages in forward direction.
+ *	Find any that might be relevant, check against existing connections,
+ *	forward to masqueraded host if relevant.
+ *	Currently handles error types - unreachable, quench, ttl exceeded
+ */
+
+int ip_fw_masq_icmp(struct sk_buff **skb_p, __u32 maddr)
+{
+        struct sk_buff 	*skb   = *skb_p;
+ 	struct iphdr	*iph   = skb->nh.iph;
+	struct icmphdr  *icmph = (struct icmphdr *)((char *)iph + (iph->ihl<<2));
+	struct iphdr    *ciph;	/* The ip header contained within the ICMP */
+	__u16	        *pptr;	/* port numbers from TCP/UDP contained header */
+	struct ip_masq	*ms;
+	unsigned short   len   = ntohs(iph->tot_len) - (iph->ihl * 4);
+
+ 	IP_MASQ_DEBUG(2, "Incoming forward ICMP (%d,%d) %lX -> %lX\n",
+	        icmph->type, ntohs(icmp_id(icmph)),
+ 		ntohl(iph->saddr), ntohl(iph->daddr));
+
+#ifdef CONFIG_IP_MASQUERADE_ICMP		
+	if ((icmph->type == ICMP_ECHO ) ||
+	    (icmph->type == ICMP_TIMESTAMP ) ||
+	    (icmph->type == ICMP_INFO_REQUEST ) ||
+	    (icmph->type == ICMP_ADDRESS )) {
+
+		IP_MASQ_DEBUG(2, "icmp request rcv %lX->%lX  id %d type %d\n",
+		       ntohl(iph->saddr),
+		       ntohl(iph->daddr),
+		       ntohs(icmp_id(icmph)),
+		       icmph->type);
+
+		ms = ip_masq_out_get(iph->protocol,
+				       iph->saddr,
+				       icmp_id(icmph),
+				       iph->daddr,
+				       icmp_hv_req(icmph));
+		if (ms == NULL) {
+			ms = ip_masq_new(iph->protocol,
+					 maddr, 0,
+					 iph->saddr, icmp_id(icmph),
+					 iph->daddr, icmp_hv_req(icmph),
+					 0);
+			if (ms == NULL)
+				return (-1);
+			IP_MASQ_DEBUG(1, "Created new icmp entry\n");
+		}
+		/* Rewrite source address */
+                
+                /*
+                 *	If sysctl !=0 and no pkt has been received yet
+                 *	in this tunnel and routing iface address has changed...
+                 *	 "You are welcome, diald".
+                 */
+                if ( sysctl_ip_dynaddr && ms->flags & IP_MASQ_F_NO_REPLY && maddr != ms->maddr) {
+
+                        if (sysctl_ip_dynaddr > 1) {
+				IP_MASQ_INFO( "ip_fw_masq_icmp(): change masq.addr %d.%d.%d.%d to %d.%d.%d.%d",
+				       NIPQUAD(ms->maddr), NIPQUAD(maddr));
+			}
+
+			write_lock(&__ip_masq_lock);
+			
+                        ip_masq_unhash(ms);
+                        ms->maddr = maddr;
+                        ip_masq_hash(ms);
+
+			write_unlock(&__ip_masq_lock);
+                }
+                
+		iph->saddr = ms->maddr;
+		ip_send_check(iph);
+		/* Rewrite port (id) */
+		(icmph->un).echo.id = ms->mport;
+		icmph->checksum = 0;
+		icmph->checksum = ip_compute_csum((unsigned char *)icmph, len);
+
+		IP_MASQ_DEBUG(2, "icmp request rwt %lX->%lX id %d type %d\n",
+		       ntohl(iph->saddr),
+		       ntohl(iph->daddr),
+		       ntohs(icmp_id(icmph)),
+		       icmph->type);
+
+		masq_set_state(ms, 1, iph, icmph);
+		ip_masq_put(ms);
+
+		return 1;
+	}
+#endif
+
+	/*
+	 * Work through seeing if this is for us.
+	 * These checks are supposed to be in an order that
+	 * means easy things are checked first to speed up
+	 * processing.... however this means that some
+	 * packets will manage to get a long way down this
+	 * stack and then be rejected, but thats life
+	 */
+	if ((icmph->type != ICMP_DEST_UNREACH) &&
+	    (icmph->type != ICMP_SOURCE_QUENCH) &&
+	    (icmph->type != ICMP_TIME_EXCEEDED))
+		return 0;
+
+	/* Now find the contained IP header */
+	ciph = (struct iphdr *) (icmph + 1);
+
+#ifdef CONFIG_IP_MASQUERADE_ICMP
+	if (ciph->protocol == IPPROTO_ICMP) {
+		/*
+		 * This section handles ICMP errors for ICMP packets
+		 */
+		struct icmphdr  *cicmph = (struct icmphdr *)((char *)ciph + 
+							     (ciph->ihl<<2));
+
+
+		IP_MASQ_DEBUG(2, "fw icmp/icmp rcv %lX->%lX id %d type %d\n",
+		       ntohl(ciph->saddr),
+		       ntohl(ciph->daddr),
+		       ntohs(icmp_id(cicmph)),
+		       cicmph->type);
+
+		read_lock(&__ip_masq_lock);
+		ms = __ip_masq_out_get(ciph->protocol, 
+				      ciph->daddr,
+				      icmp_id(cicmph),
+				      ciph->saddr,
+				      icmp_hv_rep(cicmph));
+		read_unlock(&__ip_masq_lock);
+
+		if (ms == NULL)
+			return 0;
+
+		/* Now we do real damage to this packet...! */
+		/* First change the source IP address, and recalc checksum */
+		iph->saddr = ms->maddr;
+		ip_send_check(iph);
+	
+		/* Now change the *dest* address in the contained IP */
+		ciph->daddr = ms->maddr;
+		__ip_masq_put(ms);
+
+		ip_send_check(ciph);
+
+		/* Change the ID to the masqed one! */
+		(cicmph->un).echo.id = ms->mport;
+	
+		/* And finally the ICMP checksum */
+		icmph->checksum = 0;
+		icmph->checksum = ip_compute_csum((unsigned char *) icmph, len);
+
+
+		IP_MASQ_DEBUG(2, "fw icmp/icmp rwt %lX->%lX id %d type %d\n",
+		       ntohl(ciph->saddr),
+		       ntohl(ciph->daddr),
+		       ntohs(icmp_id(cicmph)),
+		       cicmph->type);
+
+		return 1;
+	}
+#endif /* CONFIG_IP_MASQUERADE_ICMP */
+
+	/* We are only interested ICMPs generated from TCP or UDP packets */
+	if ((ciph->protocol != IPPROTO_UDP) && (ciph->protocol != IPPROTO_TCP))
+		return 0;
+
+	/*
+	 * Find the ports involved - this packet was
+	 * incoming so the ports are right way round
+	 * (but reversed relative to outer IP header!)
+	 */
+	pptr = (__u16 *)&(((char *)ciph)[ciph->ihl*4]);
+#if 0
+	if (ntohs(pptr[1]) < PORT_MASQ_BEGIN ||
+ 	    ntohs(pptr[1]) > PORT_MASQ_END)
+ 		return 0;
+#endif
+
+	/* Ensure the checksum is correct */
+	if (ip_compute_csum((unsigned char *) icmph, len))
+	{
+		/* Failed checksum! */
+		IP_MASQ_DEBUG(0, "forward ICMP: failed checksum from %d.%d.%d.%d!\n",
+			      NIPQUAD(iph->saddr));
+		return(-1);
+	}
+
+
+ 	IP_MASQ_DEBUG(2, "Handling forward ICMP for %08lX:%04X -> %08lX:%04X\n",
+	       ntohl(ciph->saddr), ntohs(pptr[0]),
+	       ntohl(ciph->daddr), ntohs(pptr[1]));
+
+
+#if 0
+	/* This is pretty much what __ip_masq_in_get_iph() does */
+	ms = __ip_masq_in_get(ciph->protocol, ciph->saddr, pptr[0], ciph->daddr, pptr[1]);
+#endif
+	read_lock(&__ip_masq_lock);
+	ms = __ip_masq_out_get(ciph->protocol,
+			       ciph->daddr,
+			       pptr[1],
+			       ciph->saddr,
+			       pptr[0]);
+	read_unlock(&__ip_masq_lock);
+
+	if (ms == NULL)
+		return 0;
+
+	/* Now we do real damage to this packet...! */
+	/* First change the source IP address, and recalc checksum */
+	iph->saddr = ms->maddr;
+	ip_send_check(iph);
+
+	/* Now change the *dest* address in the contained IP */
+	ciph->daddr = ms->maddr;
+	ip_send_check(ciph);
+
+	/* the TCP/UDP dest port - cannot redo check */
+	pptr[1] = ms->mport;
+	__ip_masq_put(ms);
+
+	/* And finally the ICMP checksum */
+	icmph->checksum = 0;
+	icmph->checksum = ip_compute_csum((unsigned char *) icmph, len);
+
+
+ 	IP_MASQ_DEBUG(2, "Rewrote forward ICMP to %08lX:%04X -> %08lX:%04X\n",
+	       ntohl(ciph->saddr), ntohs(pptr[0]),
+	       ntohl(ciph->daddr), ntohs(pptr[1]));
+
+
+	return 1;
+}
+
+
+/*
+ *	Own skb_cow() beast, tweaked for rewriting commonly
+ *	used pointers in masq code
+ */
+static struct sk_buff * masq_skb_cow(struct sk_buff **skb_p, 
+			struct iphdr **iph_p, unsigned char **t_p) {
+	struct sk_buff *skb=(*skb_p);
+	if (skb_cloned(skb)) {
+		skb = skb_copy(skb, GFP_ATOMIC);
+		if (skb) {
+			/*
+			 *	skb changed, update other pointers
+			 */
+			struct iphdr *iph = skb->nh.iph;
+			kfree_skb(*skb_p);
+			*skb_p = skb;
+			*iph_p = iph;
+			*t_p = (char*) iph + iph->ihl * 4;
+		}
+	}
+	return skb;
+}
+
+/*
+ *	Handle ICMP messages in reverse (demasquerade) direction.
+ *	Find any that might be relevant, check against existing connections,
+ *	forward to masqueraded host if relevant.
+ *	Currently handles error types - unreachable, quench, ttl exceeded
+ */
+
+int ip_fw_demasq_icmp(struct sk_buff **skb_p)
+{
+        struct sk_buff 	*skb   = *skb_p;
+ 	struct iphdr	*iph   = skb->nh.iph;
+	struct icmphdr  *icmph = (struct icmphdr *)((char *)iph + (iph->ihl<<2));
+	struct iphdr    *ciph;	/* The ip header contained within the ICMP */
+	__u16	        *pptr;	/* port numbers from TCP/UDP contained header */
+	struct ip_masq	*ms;
+	unsigned short   len   = ntohs(iph->tot_len) - (iph->ihl * 4);
+
+
+ 	IP_MASQ_DEBUG(2, "icmp in/rev (%d,%d) %lX -> %lX\n",
+	        icmph->type, ntohs(icmp_id(icmph)),
+ 		ntohl(iph->saddr), ntohl(iph->daddr));
+
+
+#ifdef CONFIG_IP_MASQUERADE_ICMP		
+	if ((icmph->type == ICMP_ECHOREPLY) ||
+	    (icmph->type == ICMP_TIMESTAMPREPLY) ||
+	    (icmph->type == ICMP_INFO_REPLY) ||
+	    (icmph->type == ICMP_ADDRESSREPLY))	{
+
+		IP_MASQ_DEBUG(2, "icmp reply rcv %lX->%lX id %d type %d, req %d\n",
+		       ntohl(iph->saddr),
+		       ntohl(iph->daddr),
+		       ntohs(icmp_id(icmph)),
+		       icmph->type,
+		       icmp_type_request(icmph->type));
+
+		ms = ip_masq_in_get(iph->protocol,
+				      iph->saddr,
+				      icmp_hv_rep(icmph),
+				      iph->daddr,
+				      icmp_id(icmph));
+		if (ms == NULL)
+			return 0;
+
+                /*
+                 *	got reply, so clear flag
+                 */
+                ms->flags &= ~IP_MASQ_F_NO_REPLY;
+
+		if ((skb=masq_skb_cow(skb_p, &iph, (unsigned char**)&icmph)) == NULL) {
+			ip_masq_put(ms);
+			return -1;
+		}
+
+		/* Reset source address */
+		iph->daddr = ms->saddr;
+		/* Redo IP header checksum */
+		ip_send_check(iph);
+		/* Set ID to fake port number */
+		(icmph->un).echo.id = ms->sport;
+		/* Reset ICMP checksum and set expiry */
+		icmph->checksum=0;
+		icmph->checksum=ip_compute_csum((unsigned char *)icmph,len);
+
+
+
+		IP_MASQ_DEBUG(2, "icmp reply rwt %lX->%lX id %d type %d\n",
+		       ntohl(iph->saddr),
+		       ntohl(iph->daddr),
+		       ntohs(icmp_id(icmph)),
+		       icmph->type);
+
+		masq_set_state(ms, 0, iph, icmph);
+		ip_masq_put(ms);
+
+		return 1;
+	} else {
+#endif
+		if ((icmph->type != ICMP_DEST_UNREACH) &&
+		    (icmph->type != ICMP_SOURCE_QUENCH) &&
+		    (icmph->type != ICMP_TIME_EXCEEDED))
+			return 0;
+#ifdef CONFIG_IP_MASQUERADE_ICMP
+	}
+#endif
+	/*
+	 * If we get here we have an ICMP error of one of the above 3 types
+	 * Now find the contained IP header
+	 */
+
+	ciph = (struct iphdr *) (icmph + 1);
+
+#ifdef CONFIG_IP_MASQUERADE_ICMP
+	if (ciph->protocol == IPPROTO_ICMP) {
+		/*
+		 * This section handles ICMP errors for ICMP packets
+		 *
+		 * First get a new ICMP header structure out of the IP packet
+		 */
+		struct icmphdr  *cicmph = (struct icmphdr *)((char *)ciph + 
+							     (ciph->ihl<<2));
+
+
+		IP_MASQ_DEBUG(2, "rv icmp/icmp rcv %lX->%lX id %d type %d\n",
+		       ntohl(ciph->saddr),
+		       ntohl(ciph->daddr),
+		       ntohs(icmp_id(cicmph)),
+		       cicmph->type);
+
+		read_lock(&__ip_masq_lock);
+		ms = __ip_masq_in_get(ciph->protocol, 
+				      ciph->daddr, 
+				      icmp_hv_req(cicmph),
+				      ciph->saddr, 
+				      icmp_id(cicmph));
+		read_unlock(&__ip_masq_lock);
+
+		if (ms == NULL)
+			return 0;
+
+		if ((skb=masq_skb_cow(skb_p, &iph, (unsigned char**)&icmph)) == NULL) {
+			__ip_masq_put(ms);
+			return -1;
+		}
+		ciph = (struct iphdr *) (icmph + 1);
+		cicmph = (struct icmphdr *)((char *)ciph + 
+					    (ciph->ihl<<2));
+		/* Now we do real damage to this packet...! */
+		/* First change the dest IP address, and recalc checksum */
+		iph->daddr = ms->saddr;
+		ip_send_check(iph);
+	
+		/* Now change the *source* address in the contained IP */
+		ciph->saddr = ms->saddr;
+		ip_send_check(ciph);
+
+		/* Change the ID to the original one! */
+		(cicmph->un).echo.id = ms->sport;
+		__ip_masq_put(ms);
+
+		/* And finally the ICMP checksum */
+		icmph->checksum = 0;
+		icmph->checksum = ip_compute_csum((unsigned char *) icmph, len);
+
+
+		IP_MASQ_DEBUG(2, "rv icmp/icmp rwt %lX->%lX id %d type %d\n",
+		       ntohl(ciph->saddr),
+		       ntohl(ciph->daddr),
+		       ntohs(icmp_id(cicmph)),
+		       cicmph->type);
+
+		return 1;
+	}
+#endif /* CONFIG_IP_MASQUERADE_ICMP */
+
+	/* We are only interested ICMPs generated from TCP or UDP packets */
+	if ((ciph->protocol != IPPROTO_UDP) && 
+	    (ciph->protocol != IPPROTO_TCP))
+		return 0;
+
+	/*
+	 * Find the ports involved - remember this packet was
+	 * *outgoing* so the ports are reversed (and addresses)
+	 */
+	pptr = (__u16 *)&(((char *)ciph)[ciph->ihl*4]);
+	if (ntohs(pptr[0]) < PORT_MASQ_BEGIN ||
+ 	    ntohs(pptr[0]) > PORT_MASQ_END)
+ 		return 0;
+
+	/* Ensure the checksum is correct */
+	if (ip_compute_csum((unsigned char *) icmph, len))
+	{
+		/* Failed checksum! */
+		IP_MASQ_ERR( "reverse ICMP: failed checksum from %d.%d.%d.%d!\n",
+		       NIPQUAD(iph->saddr));
+		return(-1);
+	}
+
+
+ 	IP_MASQ_DEBUG(2, "Handling reverse ICMP for %08lX:%04X -> %08lX:%04X\n",
+	       ntohl(ciph->saddr), ntohs(pptr[0]),
+	       ntohl(ciph->daddr), ntohs(pptr[1]));
+
+
+	/* This is pretty much what __ip_masq_in_get_iph() does, except params are wrong way round */
+	read_lock(&__ip_masq_lock);
+	ms = __ip_masq_in_get(ciph->protocol,
+			      ciph->daddr,
+			      pptr[1],
+			      ciph->saddr,
+			      pptr[0]);
+	read_unlock(&__ip_masq_lock);
+
+	if (ms == NULL)
+		return 0;
+
+	if ((skb=masq_skb_cow(skb_p, &iph, (unsigned char**)&icmph)) == NULL) {
+		__ip_masq_put(ms);
+		return -1;
+	}
+	ciph = (struct iphdr *) (icmph + 1);
+	pptr = (__u16 *)&(((char *)ciph)[ciph->ihl*4]);
+
+	/* Now we do real damage to this packet...! */
+	/* First change the dest IP address, and recalc checksum */
+	iph->daddr = ms->saddr;
+	ip_send_check(iph);
+
+	/* Now change the *source* address in the contained IP */
+	ciph->saddr = ms->saddr;
+	ip_send_check(ciph);
+
+	/* the TCP/UDP source port - cannot redo check */
+	pptr[0] = ms->sport;
+	__ip_masq_put(ms);
+
+	/* And finally the ICMP checksum */
+	icmph->checksum = 0;
+	icmph->checksum = ip_compute_csum((unsigned char *) icmph, len);
+
+
+ 	IP_MASQ_DEBUG(2, "Rewrote reverse ICMP to %08lX:%04X -> %08lX:%04X\n",
+	       ntohl(ciph->saddr), ntohs(pptr[0]),
+	       ntohl(ciph->daddr), ntohs(pptr[1]));
+
+
+	return 1;
+}
+
+ /*
+  *	Check if it's an masqueraded port, look it up,
+  *	and send it on its way...
+  *
+  *	Better not have many hosts using the designated portrange
+  *	as 'normal' ports, or you'll be spending many time in
+  *	this function.
+  */
+
+int ip_fw_demasquerade(struct sk_buff **skb_p)
+{
+	struct sk_buff 	*skb = *skb_p;
+	struct iphdr	*iph = skb->nh.iph;
+	union ip_masq_tphdr h;
+	struct ip_masq	*ms;
+	unsigned short size;
+	int doff = 0;
+	int csum = 0;
+	int csum_ok = 0;
+	__u32 maddr;
+
+	/*
+	 *	Big tappo: only PACKET_HOST (nor loopback neither mcasts)
+	 *	... don't know why 1st test DOES NOT include 2nd (?)
+	 */
+
+	if (skb->pkt_type != PACKET_HOST || skb->dev == &loopback_dev) {
+		IP_MASQ_DEBUG(2, "ip_fw_demasquerade(): packet type=%d proto=%d daddr=%d.%d.%d.%d ignored\n",
+			skb->pkt_type,
+			iph->protocol,
+			NIPQUAD(iph->daddr));
+		return 0;
+	}
+
+	h.raw = (char*) iph + iph->ihl * 4;
+
+	/*
+	 *	IP payload size
+	 */
+	size = ntohs(iph->tot_len) - (iph->ihl * 4);
+
+	doff = proto_doff(iph->protocol, h.raw, size);
+
+	switch (doff) {
+		case 0:
+			/*
+			 *	Input path: other IP protos Ok, will
+			 *	reach local sockets path.
+			 */
+			return 0;
+		case -1:
+			IP_MASQ_DEBUG(0, "I-pkt invalid packet data size\n");
+			return -1;
+	}
+
+	maddr = iph->daddr;
+	switch (iph->protocol) {
+	case IPPROTO_ICMP:
+		return(ip_fw_demasq_icmp(skb_p));
+	case IPPROTO_TCP:
+	case IPPROTO_UDP:
+		/* 
+		 *	Make sure packet is in the masq range 
+		 *	... or some mod-ule relaxes input range
+		 *	... or there is still some `special' mport opened
+		 */
+		if ((ntohs(h.portp[1]) < PORT_MASQ_BEGIN
+				|| ntohs(h.portp[1]) > PORT_MASQ_END)
+#ifdef CONFIG_IP_MASQUERADE_MOD
+				&& (ip_masq_mod_in_rule(skb, iph) != 1) 
+#endif
+				&& atomic_read(&mport_count) == 0 )
+			return 0;
+
+		/* Check that the checksum is OK */
+		if ((iph->protocol == IPPROTO_UDP) && (h.uh->check == 0))
+			/* No UDP checksum */
+			break;
+#ifdef CONFIG_IP_MASQ_DEBUG
+		if (ip_masq_get_debug_level() > 3) {
+			skb->ip_summed = CHECKSUM_NONE;
+		}
+#endif
+
+		switch (skb->ip_summed)
+		{
+			case CHECKSUM_NONE:
+				csum = csum_partial(h.raw + doff, size - doff, 0);
+				csum_ok++;
+				skb->csum = csum_partial(h.raw , doff, csum);
+
+			case CHECKSUM_HW:
+				if (csum_tcpudp_magic(iph->saddr, iph->daddr, 
+						size, iph->protocol, skb->csum))
+				{
+					IP_MASQ_DEBUG(0, "Incoming failed %s checksum from %d.%d.%d.%d (size=%d)!\n",
+					       masq_proto_name(iph->protocol),
+					       NIPQUAD(iph->saddr),
+					       size);
+					return -1;
+				}
+			default:
+				/* CHECKSUM_UNNECESSARY */
+		}
+		break;
+	default:
+		return 0;
+	}
+
+
+
+ 	IP_MASQ_DEBUG(2, "Incoming %s %08lX:%04X -> %08lX:%04X\n",
+ 		masq_proto_name(iph->protocol),
+ 		ntohl(iph->saddr), ntohs(h.portp[0]),
+ 		ntohl(iph->daddr), ntohs(h.portp[1]));
+
+ 	/*
+ 	 * reroute to original host:port if found...
+         */
+
+        ms = ip_masq_in_get_iph(iph);
+
+	/*
+ 	 * 	Give additional modules a chance to create an entry
+	 */
+#ifdef CONFIG_IP_MASQUERADE_MOD
+	if (!ms) 
+		ms = ip_masq_mod_in_create(skb, iph, maddr);
+
+	/*
+ 	 * 	Call module's input update hook
+	 */
+	ip_masq_mod_in_update(skb, iph, ms);
+#endif
+
+
+        if (ms != NULL)
+        {
+
+                /*
+                 *	got reply, so clear flag
+                 */
+                ms->flags &= ~IP_MASQ_F_NO_REPLY;
+                
+		/*
+		 *	Set daddr,dport if not defined yet
+		 *	and tunnel is not setup as "dest loose"
+                 */
+
+		if (ms->flags & IP_MASQ_F_DLOOSE) {
+			/*
+			 *	update dest loose values
+			 */
+			ms->dport = h.portp[0];
+			ms->daddr = iph->saddr;
+		} else {
+                if ( ms->flags & IP_MASQ_F_NO_DPORT ) { /*  && ms->protocol == IPPROTO_TCP ) { */
+
+			write_lock(&__ip_masq_lock);
+
+			ip_masq_unhash(ms);
+                        ms->flags &= ~IP_MASQ_F_NO_DPORT;
+                        ms->dport = h.portp[0];
+			ip_masq_hash(ms);	/* hash on new dport */
+
+			write_unlock(&__ip_masq_lock);
+
+                        IP_MASQ_DEBUG(1, "ip_fw_demasquerade(): filled dport=%d\n",
+                               ntohs(ms->dport));
+
+                }
+                if (ms->flags & IP_MASQ_F_NO_DADDR ) { /*  && ms->protocol == IPPROTO_TCP)  { */
+
+			write_lock(&__ip_masq_lock);
+
+			ip_masq_unhash(ms);
+                        ms->flags &= ~IP_MASQ_F_NO_DADDR;
+                        ms->daddr = iph->saddr;
+			ip_masq_hash(ms);	/* hash on new daddr */
+
+			write_unlock(&__ip_masq_lock);
+
+                        IP_MASQ_DEBUG(1, "ip_fw_demasquerade(): filled daddr=%lX\n",
+                               ntohl(ms->daddr));
+
+                }
+		}
+		if ((skb=masq_skb_cow(skb_p, &iph, &h.raw)) == NULL) {
+			ip_masq_put(ms);
+			return -1;
+		}
+                iph->daddr = ms->saddr;
+                h.portp[1] = ms->sport;
+
+		/*
+		 *	Invalidate csum saving if tunnel has masq helper
+		 */
+
+		if (ms->app) 
+			csum_ok = 0;
+
+                /*
+                 *	Attempt ip_masq_app call.
+                 *	will fix ip_masq and iph ack_seq stuff
+                 */
+
+                if (ip_masq_app_pkt_in(ms, skb_p, maddr) != 0)
+                {
+                        /*
+                         *	skb has changed, update pointers.
+                         */
+
+                        skb = *skb_p;
+                        iph = skb->nh.iph;
+			h.raw = (char*) iph + iph->ihl*4;
+                        size = ntohs(iph->tot_len) - (iph->ihl * 4);
+                }
+
+                /*
+                 * Yug! adjust UDP/TCP checksums
+		 */
+
+		/*
+		 *	Transport's payload partial csum
+		 */
+
+		if (!csum_ok) {
+			csum = csum_partial(h.raw + doff, size - doff, 0);
+		}
+		skb->csum = csum;
+
+		/*
+		 * 	Protocol csum
+		 */
+		switch (iph->protocol) {
+			case IPPROTO_TCP:
+				h.th->check = 0;
+				h.th->check=csum_tcpudp_magic(iph->saddr, iph->daddr, 
+						size, iph->protocol, 
+						csum_partial(h.raw , doff, csum));
+				break;
+			case IPPROTO_UDP:
+				h.uh->check = 0;
+				h.uh->check=csum_tcpudp_magic(iph->saddr, iph->daddr, 
+						size, iph->protocol, 
+						csum_partial(h.raw , doff, csum));
+				if (h.uh->check == 0) 
+					h.uh->check = 0xFFFF;
+				break;
+		}
+                ip_send_check(iph);
+
+                IP_MASQ_DEBUG(2, "I-routed to %08lX:%04X\n",ntohl(iph->daddr),ntohs(h.portp[1]));
+
+		masq_set_state (ms, 0, iph, h.portp);
+		ip_masq_put(ms);
+
+                return 1;
+ 	}
+
+ 	/* sorry, all this trouble for a no-hit :) */
+ 	return 0;
+}
+
+
+void ip_masq_control_add(struct ip_masq *ms, struct ip_masq* ctl_ms)
+{
+	if (ms->control) {
+		IP_MASQ_ERR( "request control ADD for already controlled: %d.%d.%d.%d:%d to %d.%d.%d.%d:%d\n",
+				NIPQUAD(ms->saddr),ntohs(ms->sport),
+				NIPQUAD(ms->daddr),ntohs(ms->dport));
+		ip_masq_control_del(ms);
+	}
+	IP_MASQ_DEBUG(1, "ADDing control for: ms.dst=%d.%d.%d.%d:%d ctl_ms.dst=%d.%d.%d.%d:%d\n",
+				NIPQUAD(ms->daddr),ntohs(ms->dport),
+				NIPQUAD(ctl_ms->daddr),ntohs(ctl_ms->dport));
+	ms->control = ctl_ms;
+	atomic_inc(&ctl_ms->n_control);
+}
+
+void ip_masq_control_del(struct ip_masq *ms)
+{
+	struct ip_masq *ctl_ms = ms->control;
+	if (!ctl_ms) {
+		IP_MASQ_ERR( "request control DEL for uncontrolled: %d.%d.%d.%d:%d to %d.%d.%d.%d:%d\n",
+				NIPQUAD(ms->saddr),ntohs(ms->sport),
+				NIPQUAD(ms->daddr),ntohs(ms->dport));
+			return;
+	}
+	IP_MASQ_DEBUG(1, "DELeting control for: ms.dst=%d.%d.%d.%d:%d ctl_ms.dst=%d.%d.%d.%d:%d\n",
+				NIPQUAD(ms->daddr),ntohs(ms->dport),
+				NIPQUAD(ctl_ms->daddr),ntohs(ctl_ms->dport));
+	ms->control = NULL;
+	if (atomic_read(&ctl_ms->n_control) == 0) {
+		IP_MASQ_ERR( "BUG control DEL with n=0 : %d.%d.%d.%d:%d to %d.%d.%d.%d:%d\n",
+				NIPQUAD(ms->saddr),ntohs(ms->sport),
+				NIPQUAD(ms->daddr),ntohs(ms->dport));
+			return;
+		
+	}
+	atomic_dec(&ctl_ms->n_control);
+}
+
+struct ip_masq * ip_masq_control_get(struct ip_masq *ms)
+{
+	return ms->control;
+}
+
+
+#ifdef CONFIG_PROC_FS
+/*
+ *	/proc/net entries
+ *	From userspace
+ */
+static int ip_msqhst_procinfo(char *buffer, char **start, off_t offset,
+			      int length, int unused)
+{
+	off_t pos=0, begin;
+	struct ip_masq *ms;
+	char temp[129];
+        int idx = 0;
+	int len=0;
+	struct list_head *l,*e;
+
+	if (offset < 128)
+	{
+		sprintf(temp,
+			"Prc FromIP   FPrt ToIP     TPrt Masq Init-seq  Delta PDelta Expires (free=%d,%d,%d)",
+			atomic_read(ip_masq_free_ports), 
+			atomic_read(ip_masq_free_ports+1), 
+			atomic_read(ip_masq_free_ports+2));
+		len = sprintf(buffer, "%-127s\n", temp);
+	}
+	pos = 128;
+
+        for(idx = 0; idx < IP_MASQ_TAB_SIZE; idx++) 
+	{
+	/*
+	 *	Lock is actually only need in next loop 
+	 *	we are called from uspace: must stop bh.
+	 */
+	read_lock_bh(&__ip_masq_lock);
+
+	l = &ip_masq_m_table[idx];
+	for (e=l->next; e!=l; e=e->next) {
+		ms = list_entry(e, struct ip_masq, m_list);
+		pos += 128;
+		if (pos <= offset) {
+			len = 0;
+			continue;
+		}
+
+		/*
+		 *	We have locked the tables, no need to del/add timers
+		 *	nor cli()  8)
+		 */
+
+		sprintf(temp,"%s %08lX:%04X %08lX:%04X %04X %08X %6d %6d %7lu",
+			masq_proto_name(ms->protocol),
+			ntohl(ms->saddr), ntohs(ms->sport),
+			ntohl(ms->daddr), ntohs(ms->dport),
+			ntohs(ms->mport),
+			ms->out_seq.init_seq,
+			ms->out_seq.delta,
+			ms->out_seq.previous_delta,
+			ms->timer.expires-jiffies);
+		len += sprintf(buffer+len, "%-127s\n", temp);
+
+		if(len >= length) {
+
+			read_unlock_bh(&__ip_masq_lock);
+			goto done;
+		}
+        }
+	read_unlock_bh(&__ip_masq_lock);
+
+	}
+done:
+
+
+	begin = len - (pos - offset);
+	*start = buffer + begin;
+	len -= begin;
+	if(len>length)
+		len = length;
+	return len;
+}
+
+#endif
+
+/* 
+ *	Timeouts handling by ipfwadm/ipchains
+ * 	From ip_fw.c
+ */
+
+int ip_fw_masq_timeouts(void *m, int len) 
+{
+	struct ip_fw_masq *masq;
+	int ret = EINVAL;
+
+	if (len != sizeof(struct ip_fw_masq)) {
+		IP_MASQ_DEBUG(1, "ip_fw_masq_timeouts: length %d, expected %d\n",
+				len, sizeof(struct ip_fw_masq));
+	} else {
+		masq = (struct ip_fw_masq *)m;
+		if (masq->tcp_timeout)
+			masq_timeout_table.timeout[IP_MASQ_S_ESTABLISHED]
+				= masq->tcp_timeout;
+
+		if (masq->tcp_fin_timeout)
+			masq_timeout_table.timeout[IP_MASQ_S_FIN_WAIT]
+				= masq->tcp_fin_timeout;
+
+		if (masq->udp_timeout)
+			masq_timeout_table.timeout[IP_MASQ_S_UDP]
+				= masq->udp_timeout;
+		ret = 0;
+	}
+	return ret;
+}
+/*
+ *	Module autoloading stuff
+ */
+
+static int ip_masq_user_check_hook(void) {
+#ifdef CONFIG_KMOD
+	if (ip_masq_user_hook == NULL) {
+		IP_MASQ_DEBUG(1, "About to request \"ip_masq_user\" module\n");
+		request_module("ip_masq_user");
+	}
+#endif /* CONFIG_KMOD */
+	return (ip_masq_user_hook != NULL);
+}
+
+/*
+ *	user module hook- info
+ */
+static int ip_masq_user_info(char *buffer, char **start, off_t offset,
+			      int len, int *eof, void *data)
+{
+	int ret = -ENOPKG;
+	if (ip_masq_user_check_hook()) {
+		ret = ip_masq_user_hook->info(buffer, start, offset, len, (int) data);
+	}
+	return ret;
+}
+
+/*
+ *	user module hook- entry mgmt
+ */
+static int ip_masq_user_ctl(int optname, void *arg, int arglen)
+{
+	int ret = -ENOPKG;
+	if (ip_masq_user_check_hook())  {
+		ret = ip_masq_user_hook->ctl(optname, arg, arglen);
+	}
+	return ret;
+}
+
+/*
+ *	Control from ip_sockglue
+ *	MAIN ENTRY point from userspace (apart from /proc *info entries)
+ *	Returns errno
+ */
+int ip_masq_uctl(int optname, char * optval , int optlen)
+{
+	struct ip_masq_ctl masq_ctl;
+	int ret = -EINVAL;
+
+	if(optlen>sizeof(masq_ctl))
+		return -EINVAL;
+
+	if(copy_from_user(&masq_ctl,optval,optlen))
+		return -EFAULT;
+
+	IP_MASQ_DEBUG(1,"ip_masq_ctl(optname=%d, optlen=%d, target=%d, cmd=%d)\n",
+		optname, optlen, masq_ctl.m_target, masq_ctl.m_cmd);
+
+	switch (masq_ctl.m_target) {
+		case IP_MASQ_TARGET_USER:
+			ret = ip_masq_user_ctl(optname, &masq_ctl, optlen);
+			break;
+#ifdef CONFIG_IP_MASQUERADE_MOD
+		case IP_MASQ_TARGET_MOD:
+			ret = ip_masq_mod_ctl(optname, &masq_ctl, optlen);
+			break;
+#endif
+	}
+
+	/* 	
+	 *	If ret>0, copy to user space 
+	 */
+
+	if (ret > 0 && ret <= sizeof (masq_ctl)) {
+		if (copy_to_user(optval, &masq_ctl, ret) )
+			return -EFAULT;
+		ret = 0;
+	}
+
+	return ret;
+}
+
+#ifdef CONFIG_PROC_FS
+static struct proc_dir_entry	*proc_net_ip_masq = NULL;
+
+#ifdef MODULE
+static void ip_masq_proc_count(struct inode *inode, int fill)
+{
+	if (fill)
+		MOD_INC_USE_COUNT;
+	else
+		MOD_DEC_USE_COUNT;
+}
+#endif
+
+int ip_masq_proc_register(struct proc_dir_entry *ent)
+{
+	if (!proc_net_ip_masq) return -1;
+	IP_MASQ_DEBUG(1, "registering \"/proc/net/ip_masq/%s\" entry\n",
+			ent->name);
+	return proc_register(proc_net_ip_masq, ent);
+}
+void ip_masq_proc_unregister(struct proc_dir_entry *ent)
+{
+	if (!proc_net_ip_masq) return;
+	IP_MASQ_DEBUG(1, "unregistering \"/proc/net/ip_masq/%s\" entry\n",
+			ent->name);
+	proc_unregister(proc_net_ip_masq, ent->low_ino);
+}
+
+
+__initfunc(static void masq_proc_init(void))
+{	
+	IP_MASQ_DEBUG(1,"registering /proc/net/ip_masq\n");
+	if (!proc_net_ip_masq) {
+		struct proc_dir_entry *ent;
+		ent = create_proc_entry("net/ip_masq", S_IFDIR, 0);
+		if (ent) {
+#ifdef MODULE
+			ent->fill_inode = ip_masq_proc_count;
+#endif
+			proc_net_ip_masq = ent;
+		 } else {
+			 IP_MASQ_ERR("Could not create \"/proc/net/ip_masq\" entry\n");
+		 }
+	}
+}
+#endif	/* CONFIG_PROC_FS */
+/*
+ *	Wrapper over inet_select_addr()
+ */
+u32 ip_masq_select_addr(struct device *dev, u32 dst, int scope)
+{
+	return inet_select_addr(dev, dst, scope);
+}
+
+/*
+ *	Initialize ip masquerading
+ */
+__initfunc(int ip_masq_init(void))
+{
+	int idx;
+        for(idx = 0; idx < IP_MASQ_TAB_SIZE; idx++)  {
+		INIT_LIST_HEAD(&ip_masq_s_table[idx]);
+		INIT_LIST_HEAD(&ip_masq_m_table[idx]);
+		INIT_LIST_HEAD(&ip_masq_d_table[idx]);
+	}
+#ifdef CONFIG_PROC_FS        
+	proc_net_register(&(struct proc_dir_entry) {
+		PROC_NET_IPMSQHST, 13, "ip_masquerade",
+		S_IFREG | S_IRUGO, 1, 0, 0,
+		0, &proc_net_inode_operations,
+		ip_msqhst_procinfo
+	});
+	masq_proc_init();
+
+	ip_masq_proc_register(&(struct proc_dir_entry) {
+		0, 3, "tcp",
+		S_IFREG | S_IRUGO, 1, 0, 0,
+		0, &proc_net_inode_operations,
+		NULL,	/* get_info */
+		NULL,	/* fill_inode */
+		NULL, NULL, NULL,
+		(char *) IPPROTO_TCP,
+		ip_masq_user_info
+	});
+	ip_masq_proc_register(&(struct proc_dir_entry) {
+		0, 3, "udp",
+		S_IFREG | S_IRUGO, 1, 0, 0,
+		0, &proc_net_inode_operations,
+		NULL,	/* get_info */
+		NULL,	/* fill_inode */
+		NULL, NULL, NULL,
+		(char *) IPPROTO_UDP,
+		ip_masq_user_info
+	});
+	ip_masq_proc_register(&(struct proc_dir_entry) {
+		0, 4, "icmp",
+		S_IFREG | S_IRUGO, 1, 0, 0,
+		0, &proc_net_inode_operations,
+		NULL,	/* get_info */
+		NULL,	/* fill_inode */
+		NULL, NULL, NULL,
+		(char *) IPPROTO_ICMP,
+		ip_masq_user_info
+	});
+#endif	
+#ifdef CONFIG_IP_MASQUERADE_IPAUTOFW
+	ip_autofw_init();
+#endif
+#ifdef CONFIG_IP_MASQUERADE_IPPORTFW
+	ip_portfw_init();
+#endif
+#ifdef CONFIG_IP_MASQUERADE_MFW
+	ip_mfw_init();
+#endif
+        ip_masq_app_init();
+
+        return 0;
+}
diff --git a/pfinet/linux-src/net/ipv4/ip_masq_app.c b/pfinet/linux-src/net/ipv4/ip_masq_app.c
new file mode 100644
index 00000000..84e059fa
--- /dev/null
+++ b/pfinet/linux-src/net/ipv4/ip_masq_app.c
@@ -0,0 +1,603 @@
+/*
+ *		IP_MASQ_APP application masquerading module
+ *
+ *
+ * 	$Id: ip_masq_app.c,v 1.16 1998/08/29 23:51:14 davem Exp $
+ *
+ * Author:	Juan Jose Ciarlante, <jjciarla@raiz.uncu.edu.ar>
+ *
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *
+ * Fixes:
+ *	JJC			: Implemented also input pkt hook
+ *	Miquel van Smoorenburg	: Copy more stuff when resizing skb
+ *
+ *
+ * FIXME:
+ *	- ip_masq_skb_replace(): use same skb if space available.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/init.h>
+#include <net/protocol.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <asm/system.h>
+#include <linux/stat.h>
+#include <linux/proc_fs.h>
+#include <net/ip_masq.h>
+
+#define IP_MASQ_APP_TAB_SIZE  16 /* must be power of 2 */
+
+#define IP_MASQ_APP_HASH(proto, port) ((port^proto) & (IP_MASQ_APP_TAB_SIZE-1))
+#define IP_MASQ_APP_TYPE(proto, port) ( proto<<16 | port )
+#define IP_MASQ_APP_PORT(type)        ( type & 0xffff )
+#define IP_MASQ_APP_PROTO(type)       ( (type>>16) & 0x00ff )
+
+
+EXPORT_SYMBOL(register_ip_masq_app);
+EXPORT_SYMBOL(unregister_ip_masq_app);
+EXPORT_SYMBOL(ip_masq_skb_replace);
+
+/*
+ * 	will hold masq app. hashed list heads
+ */
+
+struct ip_masq_app *ip_masq_app_base[IP_MASQ_APP_TAB_SIZE];
+
+/*
+ * 	ip_masq_app registration routine
+ *	port: host byte order.
+ */
+
+int register_ip_masq_app(struct ip_masq_app *mapp, unsigned short proto, __u16 port)
+{
+        unsigned long flags;
+        unsigned hash;
+        if (!mapp) {
+                IP_MASQ_ERR("register_ip_masq_app(): NULL arg\n");
+                return -EINVAL;
+        }
+        mapp->type = IP_MASQ_APP_TYPE(proto, port);
+        mapp->n_attach = 0;
+        hash = IP_MASQ_APP_HASH(proto, port);
+
+        save_flags(flags);
+        cli();
+        mapp->next = ip_masq_app_base[hash];
+        ip_masq_app_base[hash] = mapp;
+        restore_flags(flags);
+
+        return 0;
+}
+
+/*
+ * 	ip_masq_app unreg. routine.
+ */
+
+int unregister_ip_masq_app(struct ip_masq_app *mapp)
+{
+        struct ip_masq_app **mapp_p;
+        unsigned hash;
+        unsigned long flags;
+        if (!mapp) {
+                IP_MASQ_ERR("unregister_ip_masq_app(): NULL arg\n");
+                return -EINVAL;
+        }
+        /*
+         * only allow unregistration if it has no attachments
+         */
+        if (mapp->n_attach)  {
+                IP_MASQ_ERR("unregister_ip_masq_app(): has %d attachments. failed\n",
+                       mapp->n_attach);
+                return -EINVAL;
+        }
+        hash = IP_MASQ_APP_HASH(IP_MASQ_APP_PROTO(mapp->type), IP_MASQ_APP_PORT(mapp->type));
+
+        save_flags(flags);
+        cli();
+        for (mapp_p = &ip_masq_app_base[hash]; *mapp_p ; mapp_p = &(*mapp_p)->next)
+                if (mapp == (*mapp_p))  {
+                        *mapp_p = mapp->next;
+                        restore_flags(flags);
+                        return 0;
+                }
+
+        restore_flags(flags);
+        IP_MASQ_ERR("unregister_ip_masq_app(proto=%s,port=%u): not hashed!\n",
+               masq_proto_name(IP_MASQ_APP_PROTO(mapp->type)), IP_MASQ_APP_PORT(mapp->type));
+        return -EINVAL;
+}
+
+/*
+ *	get ip_masq_app object by its proto and port (net byte order).
+ */
+
+struct ip_masq_app * ip_masq_app_get(unsigned short proto, __u16 port)
+{
+        struct ip_masq_app *mapp;
+        unsigned hash;
+        unsigned type;
+
+        port = ntohs(port);
+        type = IP_MASQ_APP_TYPE(proto,port);
+        hash = IP_MASQ_APP_HASH(proto,port);
+        for(mapp = ip_masq_app_base[hash]; mapp ; mapp = mapp->next) {
+                if (type == mapp->type) return mapp;
+        }
+        return NULL;
+}
+
+/*
+ *	ip_masq_app object binding related funcs.
+ */
+
+/*
+ * 	change ip_masq_app object's number of bindings
+ */
+
+static __inline__ int ip_masq_app_bind_chg(struct ip_masq_app *mapp, int delta)
+{
+        unsigned long flags;
+        int n_at;
+        if (!mapp) return -1;
+        save_flags(flags);
+        cli();
+        n_at = mapp->n_attach + delta;
+        if (n_at < 0) {
+                restore_flags(flags);
+                IP_MASQ_ERR("ip_masq_app: tried to set n_attach < 0 for (proto=%s,port==%d) ip_masq_app object.\n",
+                       masq_proto_name(IP_MASQ_APP_PROTO(mapp->type)),
+                       IP_MASQ_APP_PORT(mapp->type));
+                return -1;
+        }
+        mapp->n_attach = n_at;
+        restore_flags(flags);
+        return 0;
+}
+
+/*
+ *	Bind ip_masq to its ip_masq_app based on proto and dport ALREADY
+ *	set in ip_masq struct. Also calls constructor.
+ */
+
+struct ip_masq_app * ip_masq_bind_app(struct ip_masq *ms)
+{
+        struct ip_masq_app * mapp;
+
+	if (ms->protocol != IPPROTO_TCP && ms->protocol != IPPROTO_UDP)
+		return NULL;
+
+        mapp = ip_masq_app_get(ms->protocol, ms->dport);
+
+#if 0000
+/* #ifdef CONFIG_IP_MASQUERADE_IPAUTOFW */
+	if (mapp == NULL)
+		mapp = ip_masq_app_get(ms->protocol, ms->sport);
+/* #endif */
+#endif
+
+        if (mapp != NULL) {
+                /*
+                 *	don't allow binding if already bound
+                 */
+
+                if (ms->app != NULL) {
+                        IP_MASQ_ERR("ip_masq_bind_app() called for already bound object.\n");
+                        return ms->app;
+                }
+
+                ms->app = mapp;
+                if (mapp->masq_init_1) mapp->masq_init_1(mapp, ms);
+                ip_masq_app_bind_chg(mapp, +1);
+        }
+        return mapp;
+}
+
+/*
+ * 	Unbind ms from type object and call ms destructor (does not kfree()).
+ */
+
+int ip_masq_unbind_app(struct ip_masq *ms)
+{
+        struct ip_masq_app * mapp;
+        mapp = ms->app;
+
+	if (ms->protocol != IPPROTO_TCP && ms->protocol != IPPROTO_UDP)
+		return 0;
+
+        if (mapp != NULL) {
+                if (mapp->masq_done_1) mapp->masq_done_1(mapp, ms);
+                ms->app = NULL;
+                ip_masq_app_bind_chg(mapp, -1);
+        }
+        return (mapp != NULL);
+}
+
+/*
+ *	Fixes th->seq based on ip_masq_seq info.
+ */
+
+static __inline__ void masq_fix_seq(const struct ip_masq_seq *ms_seq, struct tcphdr *th)
+{
+        __u32 seq;
+
+        seq = ntohl(th->seq);
+
+	/*
+	 * 	Adjust seq with delta-offset for all packets after
+         * 	the most recent resized pkt seq and with previous_delta offset
+         *	for all packets	before most recent resized pkt seq.
+	 */
+
+	if (ms_seq->delta || ms_seq->previous_delta) {
+		if(after(seq,ms_seq->init_seq) ) {
+			th->seq = htonl(seq + ms_seq->delta);
+			IP_MASQ_DEBUG(1, "masq_fix_seq() : added delta (%d) to seq\n",ms_seq->delta);
+		} else {
+			th->seq = htonl(seq + ms_seq->previous_delta);
+			IP_MASQ_DEBUG(1, "masq_fix_seq() : added previous_delta (%d) to seq\n",ms_seq->previous_delta);
+		}
+	}
+
+
+}
+
+/*
+ *	Fixes th->ack_seq based on ip_masq_seq info.
+ */
+
+static __inline__ void masq_fix_ack_seq(const struct ip_masq_seq *ms_seq, struct tcphdr *th)
+{
+        __u32 ack_seq;
+
+        ack_seq=ntohl(th->ack_seq);
+
+        /*
+         * Adjust ack_seq with delta-offset for
+         * the packets AFTER most recent resized pkt has caused a shift
+         * for packets before most recent resized pkt, use previous_delta
+         */
+
+        if (ms_seq->delta || ms_seq->previous_delta) {
+                if(after(ack_seq,ms_seq->init_seq)) {
+                        th->ack_seq = htonl(ack_seq-ms_seq->delta);
+                        IP_MASQ_DEBUG(1, "masq_fix_ack_seq() : subtracted delta (%d) from ack_seq\n",ms_seq->delta);
+
+                } else {
+                        th->ack_seq = htonl(ack_seq-ms_seq->previous_delta);
+                        IP_MASQ_DEBUG(1, "masq_fix_ack_seq() : subtracted previous_delta (%d) from ack_seq\n",ms_seq->previous_delta);
+                }
+        }
+
+}
+
+/*
+ *	Updates ip_masq_seq if pkt has been resized
+ *	Assumes already checked proto==IPPROTO_TCP and diff!=0.
+ */
+
+static __inline__ void masq_seq_update(struct ip_masq *ms, struct ip_masq_seq *ms_seq, unsigned mflag, __u32 seq, int diff)
+{
+        /* if (diff == 0) return; */
+
+        if ( !(ms->flags & mflag) || after(seq, ms_seq->init_seq))
+        {
+                ms_seq->previous_delta=ms_seq->delta;
+                ms_seq->delta+=diff;
+                ms_seq->init_seq=seq;
+                ms->flags |= mflag;
+        }
+}
+
+/*
+ *	Output pkt hook. Will call bound ip_masq_app specific function
+ *	called by ip_fw_masquerade(), assumes previously checked ms!=NULL
+ *	returns (new - old) skb->len diff.
+ */
+
+int ip_masq_app_pkt_out(struct ip_masq *ms, struct sk_buff **skb_p, __u32 maddr)
+{
+        struct ip_masq_app * mapp;
+        struct iphdr *iph;
+	struct tcphdr *th;
+        int diff;
+        __u32 seq;
+
+        /*
+         *	check if application masquerading is bound to
+         *	this ip_masq.
+         *	assumes that once an ip_masq is bound,
+         *	it will not be unbound during its life.
+         */
+
+        if ( (mapp = ms->app) == NULL)
+                return 0;
+
+        iph = (*skb_p)->nh.iph;
+        th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
+
+        /*
+         *	Remember seq number in case this pkt gets resized
+         */
+
+        seq = ntohl(th->seq);
+
+        /*
+         *	Fix seq stuff if flagged as so.
+         */
+
+        if (ms->protocol == IPPROTO_TCP) {
+                if (ms->flags & IP_MASQ_F_OUT_SEQ)
+                        masq_fix_seq(&ms->out_seq, th);
+                if (ms->flags & IP_MASQ_F_IN_SEQ)
+                        masq_fix_ack_seq(&ms->in_seq, th);
+        }
+
+        /*
+         *	Call private output hook function
+         */
+
+        if ( mapp->pkt_out == NULL )
+                return 0;
+
+        diff = mapp->pkt_out(mapp, ms, skb_p, maddr);
+
+        /*
+         *	Update ip_masq seq stuff if len has changed.
+         */
+
+        if (diff != 0 && ms->protocol == IPPROTO_TCP)
+                masq_seq_update(ms, &ms->out_seq, IP_MASQ_F_OUT_SEQ, seq, diff);
+
+        return diff;
+}
+
+/*
+ *	Input pkt hook. Will call bound ip_masq_app specific function
+ *	called by ip_fw_demasquerade(), assumes previously checked ms!=NULL.
+ *	returns (new - old) skb->len diff.
+ */
+
+int ip_masq_app_pkt_in(struct ip_masq *ms, struct sk_buff **skb_p, __u32 maddr)
+{
+        struct ip_masq_app * mapp;
+        struct iphdr *iph;
+	struct tcphdr *th;
+        int diff;
+        __u32 seq;
+
+        /*
+         *	check if application masquerading is bound to
+         *	this ip_masq.
+         *	assumes that once an ip_masq is bound,
+         *	it will not be unbound during its life.
+         */
+
+        if ( (mapp = ms->app) == NULL)
+                return 0;
+
+        iph = (*skb_p)->nh.iph;
+        th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
+
+        /*
+         *	Remember seq number in case this pkt gets resized
+         */
+
+        seq = ntohl(th->seq);
+
+        /*
+         *	Fix seq stuff if flagged as so.
+         */
+
+        if (ms->protocol == IPPROTO_TCP) {
+                if (ms->flags & IP_MASQ_F_IN_SEQ)
+                        masq_fix_seq(&ms->in_seq, th);
+                if (ms->flags & IP_MASQ_F_OUT_SEQ)
+                        masq_fix_ack_seq(&ms->out_seq, th);
+        }
+
+        /*
+         *	Call private input hook function
+         */
+
+        if ( mapp->pkt_in == NULL )
+                return 0;
+
+        diff = mapp->pkt_in(mapp, ms, skb_p, maddr);
+
+        /*
+         *	Update ip_masq seq stuff if len has changed.
+         */
+
+        if (diff != 0 && ms->protocol == IPPROTO_TCP)
+                masq_seq_update(ms, &ms->in_seq, IP_MASQ_F_IN_SEQ, seq, diff);
+
+        return diff;
+}
+
+/*
+ *	/proc/ip_masq_app entry function
+ */
+
+int ip_masq_app_getinfo(char *buffer, char **start, off_t offset, int length, int dummy)
+{
+        off_t pos=0, begin=0;
+        int len=0;
+        struct ip_masq_app * mapp;
+        unsigned idx;
+
+	if (offset < 40)
+		len=sprintf(buffer,"%-39s\n", "prot port    n_attach name");
+	pos = 40;
+
+        for (idx=0 ; idx < IP_MASQ_APP_TAB_SIZE; idx++)
+                for (mapp = ip_masq_app_base[idx]; mapp ; mapp = mapp->next) {
+			/*
+			 * If you change the length of this sprintf, then all
+			 * the length calculations need fixing too!
+			 * Line length = 40 (3 + 2 + 7 + 1 + 7 + 1 + 2 + 17)
+			 */
+			pos += 40;
+			if (pos < offset)
+				continue;
+
+                        len += sprintf(buffer+len, "%-3s  %-7u %-7d  %-17s\n",
+                                       masq_proto_name(IP_MASQ_APP_PROTO(mapp->type)),
+                                       IP_MASQ_APP_PORT(mapp->type), mapp->n_attach,
+				       mapp->name);
+
+                        if(len >= length)
+                                goto done;
+                }
+done:
+	begin = len - (pos - offset);
+        *start = buffer + begin;
+        len -= begin;
+        if (len > length)
+                len = length;
+        return len;
+}
+
+
+#ifdef CONFIG_PROC_FS
+static struct proc_dir_entry proc_net_ip_masq_app = {
+	PROC_NET_IP_MASQ_APP, 3, "app",
+	S_IFREG | S_IRUGO, 1, 0, 0,
+	0, &proc_net_inode_operations,
+	ip_masq_app_getinfo
+};
+#endif
+
+/*
+ *	Initialization routine
+ */
+
+__initfunc(int ip_masq_app_init(void))
+{
+#ifdef CONFIG_PROC_FS
+	ip_masq_proc_register(&proc_net_ip_masq_app);
+#endif
+        return 0;
+}
+
+/*
+ *	Replace a segment (of skb->data) with a new one.
+ *	FIXME: Should re-use same skb if space available, this could
+ *	       be done if n_len < o_len, unless some extra space
+ *	       were already allocated at driver level :P .
+ */
+
+static struct sk_buff * skb_replace(struct sk_buff *skb, int pri, char *o_buf, int o_len, char *n_buf, int n_len)
+{
+        int maxsize, diff, o_offset;
+        struct sk_buff *n_skb;
+	int offset;
+
+	maxsize = skb->truesize;
+
+        diff = n_len - o_len;
+        o_offset = o_buf - (char*) skb->data;
+
+	if (maxsize <= n_len) {
+	    if (diff != 0) {
+		memcpy(skb->data + o_offset + n_len,o_buf + o_len,
+		       skb->len - (o_offset + o_len));
+	    }
+
+	    memcpy(skb->data + o_offset, n_buf, n_len);
+
+	    n_skb    = skb;
+	    skb->len = n_len;
+	    skb->end = skb->head+n_len;
+	} else {
+                /*
+                 * 	Sizes differ, make a copy.
+                 *
+                 *	FIXME: move this to core/sbuff.c:skb_grow()
+                 */
+
+                n_skb = alloc_skb(MAX_HEADER + skb->len + diff, pri);
+                if (n_skb == NULL) {
+                        IP_MASQ_ERR("skb_replace(): no room left (from %p)\n",
+                               __builtin_return_address(0));
+                        return skb;
+
+                }
+                skb_reserve(n_skb, MAX_HEADER);
+                skb_put(n_skb, skb->len + diff);
+
+                /*
+                 *	Copy as much data from the old skb as possible. Even
+                 *	though we're only forwarding packets, we need stuff
+                 *	like skb->protocol (PPP driver wants it).
+                 */
+                offset = n_skb->data - skb->data;
+                n_skb->nh.raw = skb->nh.raw + offset;
+                n_skb->h.raw = skb->h.raw + offset;
+                n_skb->dev = skb->dev;
+                n_skb->mac.raw = skb->mac.raw + offset;
+                n_skb->pkt_type = skb->pkt_type;
+                n_skb->protocol = skb->protocol;
+                n_skb->ip_summed = skb->ip_summed;
+		n_skb->dst = dst_clone(skb->dst);
+
+                /*
+                 * Copy pkt in new buffer
+                 */
+
+                memcpy(n_skb->data, skb->data, o_offset);
+                memcpy(n_skb->data + o_offset, n_buf, n_len);
+                memcpy(n_skb->data + o_offset + n_len, o_buf + o_len,
+                       skb->len - (o_offset + o_len) );
+
+                /*
+                 * Problem, how to replace the new skb with old one,
+                 * preferably inplace
+                 */
+
+                kfree_skb(skb);
+        }
+        return n_skb;
+}
+
+/*
+ *	calls skb_replace() and update ip header if new skb was allocated
+ */
+
+struct sk_buff * ip_masq_skb_replace(struct sk_buff *skb, int pri, char *o_buf, int o_len, char *n_buf, int n_len)
+{
+        int diff;
+        struct sk_buff *n_skb;
+        unsigned skb_len;
+
+        diff = n_len - o_len;
+        n_skb = skb_replace(skb, pri, o_buf, o_len, n_buf, n_len);
+        skb_len = skb->len;
+
+        if (diff)
+        {
+                struct iphdr *iph;
+                IP_MASQ_DEBUG(1, "masq_skb_replace(): pkt resized for %d bytes (len=%d)\n", diff, skb->len);
+                /*
+                 * 	update ip header
+                 */
+                iph = n_skb->nh.iph;
+                iph->check = 0;
+                iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
+                iph->tot_len = htons(skb_len + diff);
+        }
+        return n_skb;
+}
diff --git a/pfinet/linux-src/net/ipv4/ip_masq_autofw.c b/pfinet/linux-src/net/ipv4/ip_masq_autofw.c
new file mode 100644
index 00000000..d2a1729c
--- /dev/null
+++ b/pfinet/linux-src/net/ipv4/ip_masq_autofw.c
@@ -0,0 +1,448 @@
+/*
+ *		IP_MASQ_AUTOFW auto forwarding module
+ *
+ *
+ * 	$Id: ip_masq_autofw.c,v 1.3 1998/08/29 23:51:10 davem Exp $
+ *
+ * Author:	Richard Lynch
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *
+ *
+ * Fixes:
+ *	Juan Jose Ciarlante	: created this new file from ip_masq.c and ip_fw.c
+ *	Juan Jose Ciarlante	: modularized 
+ *	Juan Jose Ciarlante	: use GFP_KERNEL when creating entries
+ *	Juan Jose Ciarlante	: call del_timer() when freeing entries (!)
+ *  FIXME:
+ *	- implement refcnt
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <asm/system.h>
+#include <linux/stat.h>
+#include <linux/proc_fs.h>
+#include <linux/if.h>
+#include <linux/init.h>
+#include <linux/ip_fw.h>
+#include <net/ip_masq.h>
+#include <net/ip_masq_mod.h>
+#include <linux/ip_masq.h>
+
+#define IP_AUTOFW_EXPIRE	     15*HZ
+
+/* WARNING: bitwise equal to ip_autofw_user  in linux/ip_masq.h */
+struct ip_autofw {
+	struct ip_autofw * next;
+	__u16 type;
+	__u16 low;
+	__u16 hidden;
+	__u16 high;
+	__u16 visible;
+	__u16 protocol;
+	__u32 lastcontact;
+	__u32 where;
+	__u16 ctlproto;
+	__u16 ctlport;
+	__u16 flags;
+	struct timer_list timer;
+};
+
+/*
+ *	Debug level
+ */
+#ifdef CONFIG_IP_MASQ_DEBUG
+static int debug=0;
+MODULE_PARM(debug, "i");
+#endif
+
+/*
+ *	Auto-forwarding table
+ */
+
+static struct ip_autofw * ip_autofw_hosts = NULL;
+static struct ip_masq_mod * mmod_self = NULL;
+
+/*
+ *	Check if a masq entry should be created for a packet
+ */
+
+static __inline__ struct ip_autofw * ip_autofw_check_range (__u32 where, __u16 port, __u16 protocol, int reqact)
+{
+	struct ip_autofw *af;
+	af=ip_autofw_hosts;
+	port=ntohs(port);
+	while (af) {
+		if (af->type==IP_FWD_RANGE && 
+		     port>=af->low && 
+		     port<=af->high && 
+		     protocol==af->protocol && 
+
+		     /*
+		      *		It's ok to create masq entries after 
+		      *		the timeout if we're in insecure mode 
+		      */
+		     (af->flags & IP_AUTOFW_ACTIVE || !reqact || !(af->flags & IP_AUTOFW_SECURE)) &&  
+		     (!(af->flags & IP_AUTOFW_SECURE) || af->lastcontact==where || !reqact))
+			return(af);
+		af=af->next;
+	}
+	return(NULL);
+}
+
+static __inline__ struct ip_autofw * ip_autofw_check_port (__u16 port, __u16 protocol)
+{
+	struct ip_autofw *af;
+	af=ip_autofw_hosts;
+	port=ntohs(port);
+	while (af)
+	{
+		if (af->type==IP_FWD_PORT && port==af->visible && protocol==af->protocol)
+			return(af);
+		af=af->next;
+	}
+	return(NULL);
+}
+
+static __inline__ struct ip_autofw * ip_autofw_check_direct (__u16 port, __u16 protocol)
+{
+	struct ip_autofw *af;
+	af=ip_autofw_hosts;
+	port=ntohs(port);
+	while (af)
+	{
+		if (af->type==IP_FWD_DIRECT && af->low<=port && af->high>=port)
+			return(af);
+		af=af->next;
+	}
+	return(NULL);
+}
+
+static __inline__ void ip_autofw_update_out (__u32 who, __u32 where, __u16 port, __u16 protocol)
+{
+	struct ip_autofw *af;
+	af=ip_autofw_hosts;
+	port=ntohs(port);
+	while (af)
+	{
+		if (af->type==IP_FWD_RANGE && af->ctlport==port && af->ctlproto==protocol)
+		{
+			if (af->flags & IP_AUTOFW_USETIME)
+			{
+				mod_timer(&af->timer,
+					  jiffies+IP_AUTOFW_EXPIRE);
+			}
+			af->flags|=IP_AUTOFW_ACTIVE;
+			af->lastcontact=where;
+			af->where=who;
+		}
+		af=af->next;
+	}
+}
+
+#if 0
+static __inline__ void ip_autofw_update_in (__u32 where, __u16 port, __u16 protocol)
+{
+	struct ip_autofw *af;
+	af=ip_autofw_check_range(where, port,protocol);
+	if (af)
+	{
+		mod_timer(&af->timer, jiffies+IP_AUTOFW_EXPIRE);
+	}
+}
+#endif
+
+
+static __inline__ void ip_autofw_expire(unsigned long data)
+{
+	struct ip_autofw * af;
+	af=(struct ip_autofw *) data;
+	af->flags &= ~IP_AUTOFW_ACTIVE;
+	af->timer.expires=0;
+	af->lastcontact=0;
+	if (af->flags & IP_AUTOFW_SECURE)
+		af->where=0;
+}
+
+
+
+static __inline__ int ip_autofw_add(struct ip_autofw_user * af)
+{
+	struct ip_autofw * newaf;
+	newaf = kmalloc( sizeof(struct ip_autofw), GFP_KERNEL );
+	init_timer(&newaf->timer);
+	if ( newaf == NULL ) 
+	{
+		printk("ip_autofw_add:  malloc said no\n");
+		return( ENOMEM );
+	}
+
+	MOD_INC_USE_COUNT;
+
+	memcpy(newaf, af, sizeof(struct ip_autofw_user));
+	newaf->timer.data = (unsigned long) newaf;
+	newaf->timer.function = ip_autofw_expire;
+	newaf->timer.expires = 0;
+	newaf->lastcontact=0;
+	newaf->next=ip_autofw_hosts;
+	ip_autofw_hosts=newaf;
+	ip_masq_mod_inc_nent(mmod_self);
+	return(0);
+}
+
+static __inline__ int ip_autofw_del(struct ip_autofw_user * af)
+{
+	struct ip_autofw ** af_p, *curr;
+
+	for (af_p=&ip_autofw_hosts, curr=*af_p; (curr=*af_p); af_p = &(*af_p)->next) {
+		if (af->type     == curr->type &&
+		    af->low      == curr->low &&
+		    af->high     == curr->high &&
+		    af->hidden   == curr->hidden &&
+		    af->visible  == curr->visible &&
+		    af->protocol == curr->protocol &&
+		    af->where    == curr->where &&
+		    af->ctlproto == curr->ctlproto &&
+		    af->ctlport  == curr->ctlport)
+		{
+			ip_masq_mod_dec_nent(mmod_self);
+			*af_p = curr->next;
+			if (af->flags&IP_AUTOFW_ACTIVE)
+				del_timer(&curr->timer);
+			kfree_s(curr,sizeof(struct ip_autofw));
+			MOD_DEC_USE_COUNT;
+			return 0;
+		}
+		curr=curr->next;
+	}
+	return EINVAL;
+}
+
+static __inline__ int ip_autofw_flush(void)
+{
+	struct ip_autofw * af;
+
+	while (ip_autofw_hosts)
+	{
+		af=ip_autofw_hosts;
+		ip_masq_mod_dec_nent(mmod_self);
+		ip_autofw_hosts=ip_autofw_hosts->next;
+		if (af->flags&IP_AUTOFW_ACTIVE)
+			del_timer(&af->timer);
+		kfree_s(af,sizeof(struct ip_autofw));
+		MOD_DEC_USE_COUNT;
+	}
+	return(0);
+}
+
+/*
+ *	Methods for registered object
+ */
+
+static int autofw_ctl(int optname, struct ip_masq_ctl *mctl, int optlen)
+{
+	struct ip_autofw_user *af = &mctl->u.autofw_user;
+
+	switch (mctl->m_cmd) {
+		case IP_MASQ_CMD_ADD:
+		case IP_MASQ_CMD_INSERT:
+			if (optlen<sizeof(*af))
+				return EINVAL;
+			return ip_autofw_add(af);
+		case IP_MASQ_CMD_DEL:
+			if (optlen<sizeof(*af))
+				return EINVAL;
+			return ip_autofw_del(af);
+		case IP_MASQ_CMD_FLUSH:
+			return ip_autofw_flush();
+
+	}
+	return EINVAL;
+}
+
+
+static int autofw_out_update(const struct sk_buff *skb, const struct iphdr *iph, struct ip_masq *ms)
+{
+	const __u16 *portp = (__u16 *)&(((char *)iph)[iph->ihl*4]);
+	/* 
+	 *	Update any ipautofw entries ...
+	 */
+
+	ip_autofw_update_out(iph->saddr, iph->daddr, portp[1], iph->protocol);
+	return IP_MASQ_MOD_NOP;
+}
+
+static struct ip_masq * autofw_out_create(const struct sk_buff *skb, const struct iphdr *iph, __u32 maddr)
+{
+	const __u16 *portp = (__u16 *)&(((char *)iph)[iph->ihl*4]);
+	/*
+	 *	If the source port is supposed to match the masq port, then
+	 *  	make it so 
+	 */
+
+	if (ip_autofw_check_direct(portp[1],iph->protocol)) {
+		return ip_masq_new(iph->protocol,
+					maddr, portp[0],
+					iph->saddr, portp[0],
+					iph->daddr, portp[1],
+					0);
+	}
+	return NULL;
+}
+
+#if 0
+static int autofw_in_update(const struct sk_buff *skb, const struct iphdr *iph, __u16 *portp, struct ip_masq *ms)
+{
+	const __u16 *portp = (__u16 *)&(((char *)iph)[iph->ihl*4]);
+	ip_autofw_update_in(iph->saddr, portp[1], iph->protocol);
+	return IP_MASQ_MOD_NOP;
+}
+#endif
+
+static int autofw_in_rule(const struct sk_buff *skb, const struct iphdr *iph)
+{
+	const __u16 *portp = (__u16 *)&(((char *)iph)[iph->ihl*4]);
+	return (ip_autofw_check_range(iph->saddr, portp[1], iph->protocol, 0)
+		|| ip_autofw_check_direct(portp[1], iph->protocol)
+		|| ip_autofw_check_port(portp[1], iph->protocol));
+}
+
+static struct ip_masq * autofw_in_create(const struct sk_buff *skb, const struct iphdr *iph, __u32 maddr)
+{
+	const __u16 *portp = (__u16 *)&(((char *)iph)[iph->ihl*4]);
+	struct ip_autofw *af;
+
+        if ((af=ip_autofw_check_range(iph->saddr, portp[1], iph->protocol, 0))) {
+		IP_MASQ_DEBUG(1-debug, "autofw_check_range HIT\n");
+		return ip_masq_new(iph->protocol,
+					maddr, portp[1],
+					af->where, portp[1],
+					iph->saddr, portp[0],
+					0);
+        } 
+        if ((af=ip_autofw_check_port(portp[1], iph->protocol)) ) {
+		IP_MASQ_DEBUG(1-debug, "autofw_check_port HIT\n");
+		return ip_masq_new(iph->protocol,
+					maddr, htons(af->visible),
+					af->where, htons(af->hidden),
+					iph->saddr, portp[0],
+					0);
+        }
+	return NULL;
+}
+
+#ifdef CONFIG_PROC_FS
+static int autofw_procinfo(char *buffer, char **start, off_t offset,
+			      int length, int unused)
+{
+	off_t pos=0, begin=0;
+	struct ip_autofw * af;
+	int len=0;
+	
+	len=sprintf(buffer,"Type Prot Low  High Vis  Hid  Where    Last     CPto CPrt Timer Flags\n"); 
+        
+        for(af = ip_autofw_hosts; af ; af = af->next)
+	{
+		len+=sprintf(buffer+len,"%4X %4X %04X-%04X/%04X %04X %08lX %08lX %04X %04X %6lu %4X\n",
+					af->type,
+					af->protocol,
+					af->low,
+					af->high,
+					af->visible,
+					af->hidden,
+					ntohl(af->where),
+					ntohl(af->lastcontact),
+					af->ctlproto,
+					af->ctlport,
+					(af->timer.expires<jiffies ? 0 : af->timer.expires-jiffies), 
+					af->flags);
+
+		pos=begin+len;
+		if(pos<offset) 
+		{
+ 			len=0;
+			begin=pos;
+		}
+		if(pos>offset+length)
+			break;
+        }
+	*start=buffer+(offset-begin);
+	len-=(offset-begin);
+	if(len>length)
+		len=length;
+	return len;
+}
+
+static struct proc_dir_entry autofw_proc_entry = {
+		0, 0, NULL,
+		S_IFREG | S_IRUGO, 1, 0, 0,
+		0, &proc_net_inode_operations,
+		autofw_procinfo
+};
+
+#define proc_ent &autofw_proc_entry
+#else /* !CONFIG_PROC_FS */
+
+#define proc_ent NULL
+#endif
+
+
+#define	autofw_in_update NULL
+#define autofw_out_rule NULL
+#define autofw_mod_init NULL
+#define autofw_mod_done NULL
+
+static struct ip_masq_mod autofw_mod = {
+	NULL,			/* next */
+	NULL,			/* next_reg */
+	"autofw",		/* name */
+	ATOMIC_INIT(0),		/* nent */
+	ATOMIC_INIT(0),		/* refcnt */
+	proc_ent,
+	autofw_ctl,
+	autofw_mod_init,
+	autofw_mod_done,
+	autofw_in_rule,
+	autofw_in_update,
+	autofw_in_create,
+	autofw_out_rule,
+	autofw_out_update,
+	autofw_out_create,
+};
+
+__initfunc(int ip_autofw_init(void))
+{
+	return register_ip_masq_mod ((mmod_self=&autofw_mod));
+}
+
+int ip_autofw_done(void)
+{
+	return unregister_ip_masq_mod(&autofw_mod);
+}
+
+#ifdef MODULE
+EXPORT_NO_SYMBOLS;
+
+int init_module(void)
+{
+	if (ip_autofw_init() != 0)
+		return -EIO;
+	return 0;
+}
+
+void cleanup_module(void)
+{
+	if (ip_autofw_done() != 0)
+		printk(KERN_INFO "ip_autofw_done(): can't remove module");
+}
+
+#endif /* MODULE */
diff --git a/pfinet/linux-src/net/ipv4/ip_masq_cuseeme.c b/pfinet/linux-src/net/ipv4/ip_masq_cuseeme.c
new file mode 100644
index 00000000..9b412baf
--- /dev/null
+++ b/pfinet/linux-src/net/ipv4/ip_masq_cuseeme.c
@@ -0,0 +1,264 @@
+/*
+ *		IP_MASQ_FTP CUSeeMe masquerading module
+ *
+ *
+ * Version:	@(#)$Id: ip_masq_cuseeme.c,v 1.4 1998/10/06 04:48:57 davem Exp $
+ *
+ * Author:	Richard Lynch
+ *		
+ *
+ * Fixes:
+ *	Richard Lynch     	:	Updated patch to conform to new module
+ *					specifications
+ *	Nigel Metheringham	:	Multiple port support
+ *	Michael Owings		:	Fixed broken init code
+ *					Added code to update inbound
+ *					packets with correct local addresses.
+ *					Fixes audio and "chat" problems
+ *					Thanx to the CU-SeeMe Consortium for
+ *					technical docs
+ *	Steven Clarke		:	Small changes for 2.1	
+ *
+ *
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *	
+ * Multiple Port Support
+ *	The helper can be made to handle up to MAX_MASQ_APP_PORTS (normally 12)
+ *	with the port numbers being defined at module load time.  The module
+ *	uses the symbol "ports" to define a list of monitored ports, which can
+ *	be specified on the insmod command line as
+ *		ports=x1,x2,x3...
+ *	where x[n] are integer port numbers.  This option can be put into
+ *	/etc/conf.modules (or /etc/modules.conf depending on your config)
+ *	where modload will pick it up should you use modload to load your
+ *	modules.
+ *	
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <asm/system.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/init.h>
+#include <net/protocol.h>
+#include <net/udp.h>
+
+/* #define IP_MASQ_NDEBUG */
+#include <net/ip_masq.h>
+
+#pragma pack(1)
+/* CU-SeeMe Data Header */
+typedef struct {
+	u_short 	dest_family;
+	u_short 	dest_port;
+	u_long  	dest_addr;
+	short 		family;
+	u_short 	port;
+	u_long 		addr;
+	u_long 		seq;
+	u_short 	msg;
+	u_short		data_type;
+	u_short		packet_len;
+} cu_header;
+
+/* Open Continue Header */
+typedef struct	{
+	cu_header	cu_head;
+	u_short 	client_count; /* Number of client info structs */
+	u_long		seq_no;
+	char		user_name[20];
+	char		stuff[4]; /* flags,  version stuff,  etc */
+}oc_header;
+
+/* client info structures */
+typedef struct {
+	u_long		address; /* Client address */
+	char	       	stuff[8]; /* Flags, pruning bitfield,  packet counts etc */
+} client_info;
+#pragma pack()
+
+/*
+ * List of ports (up to MAX_MASQ_APP_PORTS) to be handled by helper
+ * First port is set to the default port.
+ */
+static int ports[MAX_MASQ_APP_PORTS] = {7648}; /* I rely on the trailing items being set to zero */
+struct ip_masq_app *masq_incarnations[MAX_MASQ_APP_PORTS];
+
+/*
+ *     Debug level
+ */
+#ifdef CONFIG_IP_MASQ_DEBUG
+static int debug=0;
+MODULE_PARM(debug, "i");
+#endif
+
+MODULE_PARM(ports, "1-" __MODULE_STRING(MAX_MASQ_APP_PORTS) "i");
+
+static int
+masq_cuseeme_init_1 (struct ip_masq_app *mapp, struct ip_masq *ms)
+{
+        MOD_INC_USE_COUNT;
+        return 0;
+}
+
+static int
+masq_cuseeme_done_1 (struct ip_masq_app *mapp, struct ip_masq *ms)
+{
+        MOD_DEC_USE_COUNT;
+        return 0;
+}
+
+int
+masq_cuseeme_out (struct ip_masq_app *mapp, struct ip_masq *ms, struct sk_buff **skb_p, __u32 maddr)
+{
+	struct sk_buff *skb = *skb_p;
+	struct iphdr *iph = skb->nh.iph;
+	struct udphdr *uh = (struct udphdr *)&(((char *)iph)[iph->ihl*4]);
+	cu_header *cu_head;
+	char *data=(char *)&uh[1];
+
+	if (skb->len - ((unsigned char *) data - skb->h.raw) >= sizeof(cu_header))
+	{
+		cu_head         = (cu_header *) data;
+		/* cu_head->port   = ms->mport; */
+	        if( cu_head->addr )
+		cu_head->addr = (u_long) maddr;
+	        if(ntohs(cu_head->data_type) == 257)
+		        IP_MASQ_DEBUG(1-debug, "Sending talk packet!\n");
+	}
+	return 0;
+}
+
+int
+masq_cuseeme_in (struct ip_masq_app *mapp, struct ip_masq *ms, struct sk_buff **skb_p, __u32 maddr)
+{
+	struct sk_buff *skb = *skb_p;
+	struct iphdr *iph = skb->nh.iph;
+	struct udphdr *uh = (struct udphdr *)&(((char *)iph)[iph->ihl*4]);
+	cu_header *cu_head;
+	oc_header	*oc;
+	client_info	*ci;
+	char *data=(char *)&uh[1];
+	u_short len = skb->len - ((unsigned char *) data - skb->h.raw);
+	int		i, off;
+
+	if (len >= sizeof(cu_header))
+	{
+		cu_head         = (cu_header *) data;
+		if(cu_head->dest_addr) /* Correct destination address */
+			cu_head->dest_addr = (u_long) ms->saddr;
+		if(ntohs(cu_head->data_type)==101 && len > sizeof(oc_header))
+		{
+			oc = (oc_header * ) data;
+			/* Spin (grovel) thru client_info structs till we find our own */
+		        off=sizeof(oc_header);
+			for(i=0;
+			    (i < oc->client_count && off+sizeof(client_info) <= len);
+			    i++)		    
+			{
+			        ci=(client_info *)(data+off);
+				if(ci->address==(u_long) maddr)
+				{
+				        /* Update w/ our real ip address and exit */
+					ci->address = (u_long) ms->saddr;
+					break;
+				}
+			        else
+				   off+=sizeof(client_info);
+			}
+		}
+	}
+	return 0;
+}
+
+struct ip_masq_app ip_masq_cuseeme = {
+        NULL,			/* next */
+        "cuseeme",
+        0,                      /* type */
+        0,                      /* n_attach */
+        masq_cuseeme_init_1,	/* ip_masq_init_1 */
+        masq_cuseeme_done_1,	/* ip_masq_done_1 */
+        masq_cuseeme_out,	/* pkt_out */
+        masq_cuseeme_in    	/* pkt_in */
+};
+
+
+/*
+ * 	ip_masq_cuseeme initialization
+ */
+
+__initfunc(int ip_masq_cuseeme_init(void))
+{
+	int i, j;
+
+	for (i=0; (i<MAX_MASQ_APP_PORTS); i++) {
+		if (ports[i]) {
+			if ((masq_incarnations[i] = kmalloc(sizeof(struct ip_masq_app),
+							    GFP_KERNEL)) == NULL)
+				return -ENOMEM;
+			memcpy(masq_incarnations[i], &ip_masq_cuseeme, sizeof(struct ip_masq_app));
+			if ((j = register_ip_masq_app(masq_incarnations[i], 
+						      IPPROTO_UDP,
+						      ports[i]))) {
+				return j;
+			}
+#if DEBUG_CONFIG_IP_MASQ_CUSEEME
+			IP_MASQ_DEBUG(1-debug, "CuSeeMe: loaded support on port[%d] = %d\n",
+			       i, ports[i]);
+#endif
+		} else {
+			/* To be safe, force the incarnation table entry to NULL */
+			masq_incarnations[i] = NULL;
+		}
+	}
+	return 0;
+}
+
+/*
+ * 	ip_masq_cuseeme fin.
+ */
+
+int ip_masq_cuseeme_done(void)
+{
+	int i, j, k;
+
+	k=0;
+	for (i=0; (i<MAX_MASQ_APP_PORTS); i++) {
+		if (masq_incarnations[i]) {
+			if ((j = unregister_ip_masq_app(masq_incarnations[i]))) {
+				k = j;
+			} else {
+				kfree(masq_incarnations[i]);
+				masq_incarnations[i] = NULL;
+				IP_MASQ_DEBUG(1-debug, "CuSeeMe: unloaded support on port[%d] = %d\n", i, ports[i]);
+			}
+		}
+	}
+	return k;
+}
+
+#ifdef MODULE
+EXPORT_NO_SYMBOLS;
+
+int init_module(void)
+{
+        if (ip_masq_cuseeme_init() != 0)
+                return -EIO;
+        return 0;
+}
+
+void cleanup_module(void)
+{
+        if (ip_masq_cuseeme_done() != 0)
+                IP_MASQ_DEBUG(1-debug, "ip_masq_cuseeme: can't remove module");
+}
+
+#endif /* MODULE */
diff --git a/pfinet/linux-src/net/ipv4/ip_masq_ftp.c b/pfinet/linux-src/net/ipv4/ip_masq_ftp.c
new file mode 100644
index 00000000..35d1f544
--- /dev/null
+++ b/pfinet/linux-src/net/ipv4/ip_masq_ftp.c
@@ -0,0 +1,393 @@
+/*
+ *		IP_MASQ_FTP ftp masquerading module
+ *
+ *
+ * Version:	@(#)ip_masq_ftp.c 0.04   02/05/96
+ *
+ * Author:	Wouter Gadeyne
+ *		
+ *
+ * Fixes:
+ *	Wouter Gadeyne		:	Fixed masquerading support of ftp PORT commands
+ * 	Juan Jose Ciarlante	:	Code moved and adapted from ip_fw.c
+ * 	Keith Owens		:	Add keep alive for ftp control channel
+ *	Nigel Metheringham	:	Added multiple port support
+ * 	Juan Jose Ciarlante	:	Use control_add() for ftp control chan
+ * 	Juan Jose Ciarlante	:	Litl bits for 2.1
+ *	Juan Jose Ciarlante	:	use ip_masq_listen() 
+ *	Juan Jose Ciarlante	: 	use private app_data for own flag(s)
+ *
+ *
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *	
+ * Multiple Port Support
+ *	The helper can be made to handle up to MAX_MASQ_APP_PORTS (normally 12)
+ *	with the port numbers being defined at module load time.  The module
+ *	uses the symbol "ports" to define a list of monitored ports, which can
+ *	be specified on the insmod command line as
+ *		ports=x1,x2,x3...
+ *	where x[n] are integer port numbers.  This option can be put into
+ *	/etc/conf.modules (or /etc/modules.conf depending on your config)
+ *	where modload will pick it up should you use modload to load your
+ *	modules.
+ *	
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <asm/system.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/init.h>
+#include <net/protocol.h>
+#include <net/tcp.h>
+
+/* #define IP_MASQ_NDEBUG */
+#include <net/ip_masq.h>
+
+
+/* 
+ * List of ports (up to MAX_MASQ_APP_PORTS) to be handled by helper
+ * First port is set to the default port.
+ */
+static int ports[MAX_MASQ_APP_PORTS] = {21}; /* I rely on the trailing items being set to zero */
+struct ip_masq_app *masq_incarnations[MAX_MASQ_APP_PORTS];
+
+/*
+ *	Debug level
+ */
+#ifdef CONFIG_IP_MASQ_DEBUG
+static int debug=0;
+MODULE_PARM(debug, "i");
+#endif
+
+MODULE_PARM(ports, "1-" __MODULE_STRING(MAX_MASQ_APP_PORTS) "i");
+
+/*	Dummy variable */
+static int masq_ftp_pasv;
+
+static int
+masq_ftp_init_1 (struct ip_masq_app *mapp, struct ip_masq *ms)
+{
+        MOD_INC_USE_COUNT;
+        return 0;
+}
+
+static int
+masq_ftp_done_1 (struct ip_masq_app *mapp, struct ip_masq *ms)
+{
+        MOD_DEC_USE_COUNT;
+        return 0;
+}
+
+int
+masq_ftp_out (struct ip_masq_app *mapp, struct ip_masq *ms, struct sk_buff **skb_p, __u32 maddr)
+{
+        struct sk_buff *skb;
+	struct iphdr *iph;
+	struct tcphdr *th;
+	char *p, *data, *data_limit;
+	unsigned char p1,p2,p3,p4,p5,p6;
+	__u32 from;
+	__u16 port;
+	struct ip_masq *n_ms;
+	char buf[24];		/* xxx.xxx.xxx.xxx,ppp,ppp\000 */
+        unsigned buf_len;
+	int diff;
+
+        skb = *skb_p;
+	iph = skb->nh.iph;
+        th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
+        data = (char *)&th[1];
+
+        data_limit = skb->h.raw + skb->len - 18;
+        if (skb->len >= 6 && (memcmp(data, "PASV\r\n", 6) == 0 || memcmp(data, "pasv\r\n", 6) == 0))
+		ms->app_data = &masq_ftp_pasv;
+
+	while (data < data_limit)
+	{
+		if (memcmp(data,"PORT ",5) && memcmp(data,"port ",5))
+		{
+			data ++;
+			continue;
+		}
+		p = data+5;
+ 		p1 = simple_strtoul(data+5,&data,10);
+		if (*data!=',')
+			continue;
+		p2 = simple_strtoul(data+1,&data,10);
+		if (*data!=',')
+			continue;
+		p3 = simple_strtoul(data+1,&data,10);
+		if (*data!=',')
+			continue;
+		p4 = simple_strtoul(data+1,&data,10);
+		if (*data!=',')
+			continue;
+		p5 = simple_strtoul(data+1,&data,10);
+		if (*data!=',')
+			continue;
+		p6 = simple_strtoul(data+1,&data,10);
+		if (*data!='\r' && *data!='\n')
+			continue;
+
+		from = (p1<<24) | (p2<<16) | (p3<<8) | p4;
+		port = (p5<<8) | p6;
+
+		IP_MASQ_DEBUG(1-debug, "PORT %X:%X detected\n",from,port);
+
+		/*
+		 * Now update or create an masquerade entry for it
+		 */
+
+		IP_MASQ_DEBUG(1-debug, "protocol %d %lX:%X %X:%X\n", iph->protocol, htonl(from), htons(port), iph->daddr, 0);
+
+		n_ms = ip_masq_out_get(iph->protocol,
+					 htonl(from), htons(port),
+					 iph->daddr, 0);
+		if (!n_ms) {
+			n_ms = ip_masq_new(IPPROTO_TCP,
+					   maddr, 0,
+					   htonl(from), htons(port),
+					   iph->daddr, 0,
+					   IP_MASQ_F_NO_DPORT);
+
+			if (n_ms==NULL)
+				return 0;
+			ip_masq_control_add(n_ms, ms);
+		}
+
+		/*
+		 * Replace the old PORT with the new one
+		 */
+		from = ntohl(n_ms->maddr);
+		port = ntohs(n_ms->mport);
+		sprintf(buf,"%d,%d,%d,%d,%d,%d",
+			from>>24&255,from>>16&255,from>>8&255,from&255,
+			port>>8&255,port&255);
+		buf_len = strlen(buf);
+
+		IP_MASQ_DEBUG(1-debug, "new PORT %X:%X\n",from,port);
+
+		/*
+		 * Calculate required delta-offset to keep TCP happy
+		 */
+		
+		diff = buf_len - (data-p);
+		
+		/*
+		 *	No shift.
+		 */
+		
+		if (diff==0) {
+			/*
+			 * simple case, just replace the old PORT cmd
+ 			 */
+ 			memcpy(p,buf,buf_len);
+ 		} else {
+
+			*skb_p = ip_masq_skb_replace(skb, GFP_ATOMIC, p, data-p, buf, buf_len);
+		}
+                /*
+                 * 	Move tunnel to listen state
+                 */
+		ip_masq_listen(n_ms);
+		ip_masq_put(n_ms);
+
+                return diff;
+
+	}
+	return 0;
+
+}
+
+/*
+ * Look at incoming ftp packets to catch the response to a PASV command.  When
+ * we see one we build a masquerading entry for the client address, client port
+ * 0 (unknown at the moment), the server address and the server port.  Mark the
+ * current masquerade entry as a control channel and point the new entry at the
+ * control entry.  All this work just for ftp keepalive across masquerading.
+ *
+ * The incoming packet should be something like
+ * "227 Entering Passive Mode (xxx,xxx,xxx,xxx,ppp,ppp)".
+ * xxx,xxx,xxx,xxx is the server address, ppp,ppp is the server port number.
+ * ncftp 2.3.0 cheats by skipping the leading number then going 22 bytes into
+ * the data so we do the same.  If it's good enough for ncftp then it's good
+ * enough for me.
+ *
+ * In this case, the client is the source machine being masqueraded, the server
+ * is the destination for ftp requests.  It all depends on your point of view ...
+ */
+
+int
+masq_ftp_in (struct ip_masq_app *mapp, struct ip_masq *ms, struct sk_buff **skb_p, __u32 maddr)
+{
+	struct sk_buff *skb;
+	struct iphdr *iph;
+	struct tcphdr *th;
+	char *data, *data_limit;
+	unsigned char p1,p2,p3,p4,p5,p6;
+	__u32 to;
+	__u16 port;
+	struct ip_masq *n_ms;
+
+	if (ms->app_data != &masq_ftp_pasv)
+		return 0;	/* quick exit if no outstanding PASV */
+
+	skb = *skb_p;
+	iph = skb->nh.iph;
+	th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
+	data = (char *)&th[1];
+	data_limit = skb->h.raw + skb->len;
+
+	while (data < data_limit && *data != ' ')
+		++data;	
+	while (data < data_limit && *data == ' ')
+		++data;	
+	data += 22;
+	if (data >= data_limit || *data != '(')
+		return 0;
+	p1 = simple_strtoul(data+1, &data, 10);
+	if (data >= data_limit || *data != ',')
+		return 0;
+	p2 = simple_strtoul(data+1, &data, 10);
+	if (data >= data_limit || *data != ',')
+		return 0;
+	p3 = simple_strtoul(data+1, &data, 10);
+	if (data >= data_limit || *data != ',')
+		return 0;
+	p4 = simple_strtoul(data+1, &data, 10);
+	if (data >= data_limit || *data != ',')
+		return 0;
+	p5 = simple_strtoul(data+1, &data, 10);
+	if (data >= data_limit || *data != ',')
+		return 0;
+	p6 = simple_strtoul(data+1, &data, 10);
+	if (data >= data_limit || *data != ')')
+		return 0;
+
+	to = (p1<<24) | (p2<<16) | (p3<<8) | p4;
+	port = (p5<<8) | p6;
+
+	/*
+	 * Now update or create an masquerade entry for it
+	 */
+	IP_MASQ_DEBUG(1-debug, "PASV response %lX:%X %X:%X detected\n", ntohl(ms->saddr), 0, to, port);
+
+	n_ms = ip_masq_out_get(iph->protocol,
+				 ms->saddr, 0,
+				 htonl(to), htons(port));
+	if (!n_ms) {
+		n_ms = ip_masq_new(IPPROTO_TCP,
+					maddr, 0,
+					ms->saddr, 0,
+					htonl(to), htons(port),
+					IP_MASQ_F_NO_SPORT);
+
+		if (n_ms==NULL)
+			return 0;
+		ip_masq_control_add(n_ms, ms);
+	}
+
+#if 0	/* v0.12 state processing */
+
+	/*
+	 * keep for a bit longer than tcp_fin, client may not issue open
+	 * to server port before tcp_fin_timeout.
+	 */
+	n_ms->timeout = ip_masq_expire->tcp_fin_timeout*3;
+#endif
+	ms->app_data = NULL;
+	ip_masq_put(n_ms);
+
+	return 0;	/* no diff required for incoming packets, thank goodness */
+}
+
+struct ip_masq_app ip_masq_ftp = {
+        NULL,			/* next */
+	"ftp",			/* name */
+        0,                      /* type */
+        0,                      /* n_attach */
+        masq_ftp_init_1,        /* ip_masq_init_1 */
+        masq_ftp_done_1,        /* ip_masq_done_1 */
+        masq_ftp_out,           /* pkt_out */
+        masq_ftp_in,            /* pkt_in */
+};
+
+/*
+ * 	ip_masq_ftp initialization
+ */
+
+__initfunc(int ip_masq_ftp_init(void))
+{
+	int i, j;
+
+	for (i=0; (i<MAX_MASQ_APP_PORTS); i++) {
+		if (ports[i]) {
+			if ((masq_incarnations[i] = kmalloc(sizeof(struct ip_masq_app),
+							    GFP_KERNEL)) == NULL)
+				return -ENOMEM;
+			memcpy(masq_incarnations[i], &ip_masq_ftp, sizeof(struct ip_masq_app));
+			if ((j = register_ip_masq_app(masq_incarnations[i], 
+						      IPPROTO_TCP, 
+						      ports[i]))) {
+				return j;
+			}
+			IP_MASQ_DEBUG(1-debug, "Ftp: loaded support on port[%d] = %d\n",
+			       i, ports[i]);
+		} else {
+			/* To be safe, force the incarnation table entry to NULL */
+			masq_incarnations[i] = NULL;
+		}
+	}
+	return 0;
+}
+
+/*
+ * 	ip_masq_ftp fin.
+ */
+
+int ip_masq_ftp_done(void)
+{
+	int i, j, k;
+
+	k=0;
+	for (i=0; (i<MAX_MASQ_APP_PORTS); i++) {
+		if (masq_incarnations[i]) {
+			if ((j = unregister_ip_masq_app(masq_incarnations[i]))) {
+				k = j;
+			} else {
+				kfree(masq_incarnations[i]);
+				masq_incarnations[i] = NULL;
+				IP_MASQ_DEBUG(1-debug, "Ftp: unloaded support on port[%d] = %d\n",
+				       i, ports[i]);
+			}
+		}
+	}
+	return k;
+}
+
+#ifdef MODULE
+EXPORT_NO_SYMBOLS;
+
+int init_module(void)
+{
+        if (ip_masq_ftp_init() != 0)
+                return -EIO;
+        return 0;
+}
+
+void cleanup_module(void)
+{
+        if (ip_masq_ftp_done() != 0)
+                printk(KERN_INFO "ip_masq_ftp: can't remove module");
+}
+
+#endif /* MODULE */
diff --git a/pfinet/linux-src/net/ipv4/ip_masq_irc.c b/pfinet/linux-src/net/ipv4/ip_masq_irc.c
new file mode 100644
index 00000000..e52a5720
--- /dev/null
+++ b/pfinet/linux-src/net/ipv4/ip_masq_irc.c
@@ -0,0 +1,345 @@
+/*
+ *		IP_MASQ_IRC irc masquerading module
+ *
+ *
+ * Version:	@(#)ip_masq_irc.c 0.04   99/06/19
+ *
+ * Author:	Juan Jose Ciarlante
+ *		
+ * Additions:
+ *  - recognize a few non-irc-II DCC requests (Oliver Wagner)
+ *     DCC MOVE (AmIRC/DCC.MOVE; SEND with resuming)
+ *     DCC SCHAT (AmIRC IDEA encrypted CHAT)
+ *     DCC TSEND (AmIRC/PIRCH SEND without ACKs)
+ * Fixes:
+ *	Juan Jose Ciarlante	:  set NO_DADDR flag in ip_masq_new()
+ *	Nigel Metheringham	:  Added multiple port support 
+ *	Juan Jose Ciarlante	:  litl bits for 2.1
+ *	Oliver Wagner 		:  more IRC cmds processing
+ *	  <winmute@lucifer.gv.kotnet.org>
+ *	Juan Jose Ciarlante	:  put new ms entry to listen()
+ *	Scottie Shore		:  added support for clients that add extra args
+ *	  <sshore@escape.ca>
+ *
+ * FIXME:
+ *	- detect also previous "PRIVMSG" string ?.
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *	
+ * Multiple Port Support
+ *	The helper can be made to handle up to MAX_MASQ_APP_PORTS (normally 12)
+ *	with the port numbers being defined at module load time.  The module
+ *	uses the symbol "ports" to define a list of monitored ports, which can
+ *	be specified on the insmod command line as
+ *		ports=x1,x2,x3...
+ *	where x[n] are integer port numbers.  This option can be put into
+ *	/etc/conf.modules (or /etc/modules.conf depending on your config)
+ *	where modload will pick it up should you use modload to load your
+ *	modules.
+ *	
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <asm/system.h>
+#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/init.h>
+#include <net/protocol.h>
+#include <net/tcp.h>
+#include <net/ip_masq.h>
+
+
+/* 
+ * List of ports (up to MAX_MASQ_APP_PORTS) to be handled by helper
+ * First port is set to the default port.
+ */
+int ports[MAX_MASQ_APP_PORTS] = {6667}; /* I rely on the trailing items being set to zero */
+struct ip_masq_app *masq_incarnations[MAX_MASQ_APP_PORTS];
+/*
+ *	Debug level
+ */
+#ifdef CONFIG_IP_MASQ_DEBUG
+static int debug=0;
+MODULE_PARM(debug, "i");
+#endif
+
+MODULE_PARM(ports, "1-" __MODULE_STRING(MAX_MASQ_APP_PORTS) "i");
+
+
+/*
+ * List of supported DCC protocols
+ */
+
+#define NUM_DCCPROTO 5
+
+struct dccproto 
+{
+  char *match;
+  int matchlen;
+};
+
+struct dccproto dccprotos[NUM_DCCPROTO] = {
+ { "SEND ", 5 },
+ { "CHAT ", 5 },
+ { "MOVE ", 5 },
+ { "TSEND ", 6 },
+ { "SCHAT ", 6 }
+};
+#define MAXMATCHLEN 6
+
+static int
+masq_irc_init_1 (struct ip_masq_app *mapp, struct ip_masq *ms)
+{
+        MOD_INC_USE_COUNT;
+        return 0;
+}
+
+static int
+masq_irc_done_1 (struct ip_masq_app *mapp, struct ip_masq *ms)
+{
+        MOD_DEC_USE_COUNT;
+        return 0;
+}
+
+int
+masq_irc_out (struct ip_masq_app *mapp, struct ip_masq *ms, struct sk_buff **skb_p, __u32 maddr)
+{
+        struct sk_buff *skb;
+	struct iphdr *iph;
+	struct tcphdr *th;
+	char *data, *data_limit;
+	__u32 s_addr;
+	__u16 s_port;
+	struct ip_masq *n_ms;
+	char buf[20];		/* "m_addr m_port" (dec base)*/
+        unsigned buf_len;
+	int diff;
+        char *dcc_p, *addr_beg_p, *addr_end_p;
+
+        skb = *skb_p;
+	iph = skb->nh.iph;
+        th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
+        data = (char *)&th[1];
+
+        /*
+	 *	Hunt irc DCC string, the _shortest_:
+	 *
+	 *	strlen("\1DCC CHAT chat AAAAAAAA P\1\n")=27
+	 *	strlen("\1DCC SCHAT chat AAAAAAAA P\1\n")=28
+	 *	strlen("\1DCC SEND F AAAAAAAA P S\1\n")=26
+	 *	strlen("\1DCC MOVE F AAAAAAAA P S\1\n")=26
+	 *	strlen("\1DCC TSEND F AAAAAAAA P S\1\n")=27
+	 *		AAAAAAAAA: bound addr (1.0.0.0==16777216, min 8 digits)
+	 *		P:         bound port (min 1 d )
+	 *		F:         filename   (min 1 d )
+	 *		S:         size       (min 1 d ) 
+	 *		0x01, \n:  terminators
+         */
+
+        data_limit = skb->h.raw + skb->len;
+        
+	while (data < (data_limit - ( 22 + MAXMATCHLEN ) ) )
+	{
+		int i;
+		if (memcmp(data,"\1DCC ",5))  {
+			data ++;
+			continue;
+		}
+
+		dcc_p = data;
+		data += 5;     /* point to DCC cmd */
+
+		for(i=0; i<NUM_DCCPROTO; i++)
+		{
+			/*
+			 * go through the table and hunt a match string
+			 */
+
+			if( memcmp(data, dccprotos[i].match, dccprotos[i].matchlen ) == 0 )
+			{
+				data += dccprotos[i].matchlen;
+
+				/*
+				 *	skip next string.
+				 */
+
+				while( *data++ != ' ')
+
+					/*
+					 *	must still parse, at least, "AAAAAAAA P\1\n",
+					 *      12 bytes left.
+					 */
+					if (data > (data_limit-12)) return 0;
+
+
+				addr_beg_p = data;
+
+				/*
+				 *	client bound address in dec base
+				 */
+
+				s_addr = simple_strtoul(data,&data,10);
+				if (*data++ !=' ')
+					continue;
+
+				/*
+				 *	client bound port in dec base
+				 */
+
+				s_port = simple_strtoul(data,&data,10);
+				addr_end_p = data;
+
+				/*
+				 *	Now create an masquerade entry for it
+				 * 	must set NO_DPORT and NO_DADDR because
+				 *	connection is requested by another client.
+				 */
+
+				n_ms = ip_masq_new(IPPROTO_TCP,
+						maddr, 0,
+						htonl(s_addr),htons(s_port),
+						0, 0,
+						IP_MASQ_F_NO_DPORT|IP_MASQ_F_NO_DADDR);
+				if (n_ms==NULL)
+					return 0;
+
+				/*
+				 * Replace the old "address port" with the new one
+				 */
+
+				buf_len = sprintf(buf,"%lu %u",
+						ntohl(n_ms->maddr),ntohs(n_ms->mport));
+
+				/*
+				 * Calculate required delta-offset to keep TCP happy
+				 */
+
+				diff = buf_len - (addr_end_p-addr_beg_p);
+
+				*addr_beg_p = '\0';
+				IP_MASQ_DEBUG(1-debug, "masq_irc_out(): '%s' %X:%X detected (diff=%d)\n", dcc_p, s_addr,s_port, diff);
+
+				/*
+				 *	No shift.
+				 */
+
+				if (diff==0) {
+					/*
+					 * simple case, just copy.
+					 */
+					memcpy(addr_beg_p,buf,buf_len);
+				} else {
+
+					*skb_p = ip_masq_skb_replace(skb, GFP_ATOMIC,
+							addr_beg_p, addr_end_p-addr_beg_p,
+							buf, buf_len);
+				}
+				ip_masq_listen(n_ms);
+				ip_masq_put(n_ms);
+				return diff;
+			}
+		}
+	}
+	return 0;
+
+}
+
+/*
+ *	Main irc object
+ *     	You need 1 object per port in case you need
+ *	to offer also other used irc ports (6665,6666,etc),
+ *	they will share methods but they need own space for
+ *	data. 
+ */
+
+struct ip_masq_app ip_masq_irc = {
+        NULL,			/* next */
+	"irc",			/* name */
+        0,                      /* type */
+        0,                      /* n_attach */
+        masq_irc_init_1,        /* init_1 */
+        masq_irc_done_1,        /* done_1 */
+        masq_irc_out,           /* pkt_out */
+        NULL                    /* pkt_in */
+};
+
+/*
+ * 	ip_masq_irc initialization
+ */
+
+__initfunc(int ip_masq_irc_init(void))
+{
+	int i, j;
+
+	for (i=0; (i<MAX_MASQ_APP_PORTS); i++) {
+		if (ports[i]) {
+			if ((masq_incarnations[i] = kmalloc(sizeof(struct ip_masq_app),
+							    GFP_KERNEL)) == NULL)
+				return -ENOMEM;
+			memcpy(masq_incarnations[i], &ip_masq_irc, sizeof(struct ip_masq_app));
+			if ((j = register_ip_masq_app(masq_incarnations[i], 
+						      IPPROTO_TCP, 
+						      ports[i]))) {
+				return j;
+			}
+			IP_MASQ_DEBUG(1-debug,
+					"Irc: loaded support on port[%d] = %d\n",
+			       i, ports[i]);
+		} else {
+			/* To be safe, force the incarnation table entry to NULL */
+			masq_incarnations[i] = NULL;
+		}
+	}
+	return 0;
+}
+
+/*
+ * 	ip_masq_irc fin.
+ */
+
+int ip_masq_irc_done(void)
+{
+	int i, j, k;
+
+	k=0;
+	for (i=0; (i<MAX_MASQ_APP_PORTS); i++) {
+		if (masq_incarnations[i]) {
+			if ((j = unregister_ip_masq_app(masq_incarnations[i]))) {
+				k = j;
+			} else {
+				kfree(masq_incarnations[i]);
+				masq_incarnations[i] = NULL;
+				IP_MASQ_DEBUG(1-debug, "Irc: unloaded support on port[%d] = %d\n",
+				       i, ports[i]);
+			}
+		}
+	}
+	return k;
+}
+
+
+#ifdef MODULE
+EXPORT_NO_SYMBOLS;
+
+int init_module(void)
+{
+        if (ip_masq_irc_init() != 0)
+                return -EIO;
+        return 0;
+}
+
+void cleanup_module(void)
+{
+        if (ip_masq_irc_done() != 0)
+                printk(KERN_INFO "ip_masq_irc: can't remove module");
+}
+
+#endif /* MODULE */
diff --git a/pfinet/linux-src/net/ipv4/ip_masq_mfw.c b/pfinet/linux-src/net/ipv4/ip_masq_mfw.c
new file mode 100644
index 00000000..60c77970
--- /dev/null
+++ b/pfinet/linux-src/net/ipv4/ip_masq_mfw.c
@@ -0,0 +1,769 @@
+/*
+ *		IP_MASQ_MARKFW masquerading module
+ *
+ *	Does (reverse-masq) forwarding based on skb->fwmark value
+ *
+ *	$Id: ip_masq_mfw.c,v 1.3.2.1 1999/07/02 10:10:03 davem Exp $
+ *
+ * Author:	Juan Jose Ciarlante   <jjciarla@raiz.uncu.edu.ar>
+ *		  based on Steven Clarke's portfw
+ *
+ * Fixes:	
+ *	JuanJo Ciarlante:	added u-space sched support
+ *	JuanJo Ciarlante:	if rport==0, use packet dest port *grin*
+ *	JuanJo Ciarlante:	fixed tcp syn&&!ack creation
+ *
+ *
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/list.h>
+#include <net/ip.h>
+#include <linux/ip_fw.h>
+#include <linux/ip_masq.h>
+#include <net/ip_masq.h>
+#include <net/ip_masq_mod.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <asm/softirq.h>
+#include <asm/spinlock.h>
+#include <asm/atomic.h>
+
+static struct ip_masq_mod *mmod_self = NULL;
+#ifdef CONFIG_IP_MASQ_DEBUG
+static int debug=0;
+MODULE_PARM(debug, "i");
+#endif
+
+/*
+ *  Lists structure:
+ *	There is a "main" linked list with entries hashed
+ *	by fwmark value (struct ip_masq_mfw, the "m-entries").
+ *
+ *	Each of this m-entry holds a double linked list
+ *	of "forward-to" hosts (struct ip_masq_mfw_host, the "m.host"),
+ *	the round-robin scheduling takes place by rotating m.host entries
+ *	"inside" its m-entry.
+ */
+
+/*
+ *	Each forwarded host (addr:port) is stored here
+ */
+struct ip_masq_mfw_host {
+	struct 	list_head list;
+	__u32 	addr;
+	__u16	port;
+	__u16	pad0;
+	__u32 	fwmark;
+	int 	pref;
+	atomic_t	pref_cnt;
+};
+
+#define IP_MASQ_MFW_HSIZE	16
+/*
+ *	This entries are indexed by fwmark, 
+ *	they hold a list of forwarded addr:port
+ */	
+
+struct ip_masq_mfw {
+	struct ip_masq_mfw *next;	/* linked list */
+	__u32 fwmark;			/* key: firewall mark */
+	struct list_head hosts;		/* list of forward-to hosts */
+	atomic_t nhosts;		/* number of "" */
+	rwlock_t lock;
+};
+
+
+static struct semaphore mfw_sema = MUTEX;
+static rwlock_t mfw_lock = RW_LOCK_UNLOCKED;
+
+static struct ip_masq_mfw *ip_masq_mfw_table[IP_MASQ_MFW_HSIZE];
+
+static __inline__ int mfw_hash_val(int fwmark)
+{
+	return fwmark & 0x0f;
+}
+
+/*
+ *	Get m-entry by "fwmark"
+ *	Caller must lock tables.
+ */
+
+static struct ip_masq_mfw *__mfw_get(int fwmark)
+{
+	struct ip_masq_mfw* mfw;
+	int hash = mfw_hash_val(fwmark);
+
+	for (mfw=ip_masq_mfw_table[hash];mfw;mfw=mfw->next) {
+		if (mfw->fwmark==fwmark) {
+			goto out;
+		}
+	}
+out:
+	return mfw;
+}
+
+/*
+ *	Links m-entry.
+ *	Caller should have checked if already present for same fwmark
+ *
+ *	Caller must lock tables.
+ */
+static int __mfw_add(struct ip_masq_mfw *mfw)
+{
+	int fwmark = mfw->fwmark;
+	int hash = mfw_hash_val(fwmark);
+
+	mfw->next = ip_masq_mfw_table[hash];
+	ip_masq_mfw_table[hash] = mfw;
+	ip_masq_mod_inc_nent(mmod_self);
+
+	return 0;
+}
+
+/*
+ *	Creates a m-entry (doesn't link it)
+ */
+
+static struct ip_masq_mfw * mfw_new(int fwmark)
+{
+	struct ip_masq_mfw *mfw;
+
+	mfw = kmalloc(sizeof(*mfw), GFP_KERNEL);
+	if (mfw == NULL) 
+		goto out;
+
+	MOD_INC_USE_COUNT;
+	memset(mfw, 0, sizeof(*mfw));
+	mfw->fwmark = fwmark;
+	mfw->lock = RW_LOCK_UNLOCKED;
+
+	INIT_LIST_HEAD(&mfw->hosts);
+out:
+	return mfw;
+}
+
+static void mfw_host_to_user(struct ip_masq_mfw_host *h, struct ip_mfw_user *mu)
+{
+	mu->raddr = h->addr;
+	mu->rport = h->port;
+	mu->fwmark = h->fwmark;
+	mu->pref = h->pref;
+}
+
+/*
+ *	Creates a m.host (doesn't link it in a m-entry)
+ */
+static struct ip_masq_mfw_host * mfw_host_new(struct ip_mfw_user *mu)
+{
+	struct ip_masq_mfw_host * mfw_host;
+	mfw_host = kmalloc(sizeof (*mfw_host), GFP_KERNEL);
+	if (!mfw_host)
+		return NULL;
+
+	MOD_INC_USE_COUNT;
+	memset(mfw_host, 0, sizeof(*mfw_host));
+	mfw_host->addr = mu->raddr;
+	mfw_host->port = mu->rport;
+	mfw_host->fwmark = mu->fwmark;
+	mfw_host->pref = mu->pref;
+	atomic_set(&mfw_host->pref_cnt, mu->pref);
+
+	return mfw_host;
+}
+
+/*
+ *	Create AND link m.host to m-entry.
+ *	It locks m.lock.
+ */
+static int mfw_addhost(struct ip_masq_mfw *mfw, struct ip_mfw_user *mu, int attail)
+{
+	struct ip_masq_mfw_host *mfw_host;
+
+	mfw_host = mfw_host_new(mu);
+	if (!mfw_host) 
+		return -ENOMEM;
+
+	write_lock_bh(&mfw->lock);
+	list_add(&mfw_host->list, attail? mfw->hosts.prev : &mfw->hosts);
+	atomic_inc(&mfw->nhosts);
+	write_unlock_bh(&mfw->lock);
+
+	return 0;
+}
+
+/*
+ *	Unlink AND destroy m.host(s) from m-entry.
+ *	Wildcard (nul host or addr) ok.
+ *	It uses m.lock.
+ */
+static int mfw_delhost(struct ip_masq_mfw *mfw, struct ip_mfw_user *mu)
+{
+
+	struct list_head *l,*e;
+	struct ip_masq_mfw_host *h;
+	int n_del = 0;
+	l = &mfw->hosts;
+
+	write_lock_bh(&mfw->lock);
+	for (e=l->next; e!=l; e=e->next)
+	{
+		h = list_entry(e, struct ip_masq_mfw_host, list);
+		if ((!mu->raddr || h->addr == mu->raddr) && 
+			(!mu->rport || h->port == mu->rport)) {
+			/* HIT */
+			atomic_dec(&mfw->nhosts);
+			list_del(&h->list);
+			kfree_s(h, sizeof(*h));
+			MOD_DEC_USE_COUNT;
+			n_del++;
+		}
+				
+	}
+	write_unlock_bh(&mfw->lock);
+	return n_del? 0 : -ESRCH;
+}
+
+/*
+ *	Changes m.host parameters
+ *	Wildcards ok
+ *
+ *	Caller must lock tables.
+ */
+static int __mfw_edithost(struct ip_masq_mfw *mfw, struct ip_mfw_user *mu)
+{
+
+	struct list_head *l,*e;
+	struct ip_masq_mfw_host *h;
+	int n_edit = 0;
+	l = &mfw->hosts;
+
+	for (e=l->next; e!=l; e=e->next)
+	{
+		h = list_entry(e, struct ip_masq_mfw_host, list);
+		if ((!mu->raddr || h->addr == mu->raddr) && 
+			(!mu->rport || h->port == mu->rport)) {
+			/* HIT */
+			h->pref = mu->pref;
+			atomic_set(&h->pref_cnt, mu->pref);
+			n_edit++;
+		}
+				
+	}
+	return n_edit? 0 : -ESRCH;
+}
+
+/*
+ *	Destroys m-entry.
+ *	Caller must have checked that it doesn't hold any m.host(s)
+ */
+static void mfw_destroy(struct ip_masq_mfw *mfw)
+{
+	kfree_s(mfw, sizeof(*mfw));
+	MOD_DEC_USE_COUNT;
+}
+
+/* 
+ *	Unlink m-entry.
+ *
+ *	Caller must lock tables.
+ */
+static int __mfw_del(struct ip_masq_mfw *mfw)
+{
+	struct ip_masq_mfw **mfw_p;
+	int ret = -EINVAL;
+
+
+	for(mfw_p=&ip_masq_mfw_table[mfw_hash_val(mfw->fwmark)]; 
+			*mfw_p; 
+			mfw_p = &((*mfw_p)->next)) 
+	{
+		if (mfw==(*mfw_p)) {
+			*mfw_p = mfw->next;
+			ip_masq_mod_dec_nent(mmod_self);
+			ret = 0;
+			goto out;
+		}
+	}
+out:
+	return ret;
+}
+
+/*
+ *	Crude m.host scheduler
+ *	This interface could be exported to allow playing with 
+ *	other sched policies.
+ *
+ *	Caller must lock m-entry.
+ */
+static struct ip_masq_mfw_host * __mfw_sched(struct ip_masq_mfw *mfw, int force)
+{
+	struct ip_masq_mfw_host *h = NULL;
+
+	if (atomic_read(&mfw->nhosts) == 0)
+		goto out;
+
+	/*
+	 *	Here resides actual sched policy: 
+	 *	When pref_cnt touches 0, entry gets shifted to tail and
+	 *	its pref_cnt reloaded from h->pref (actual value
+	 *	passed from u-space).
+	 *
+	 *	Exception is pref==0: avoid scheduling.
+	 */
+
+	h = list_entry(mfw->hosts.next, struct ip_masq_mfw_host, list);
+
+	if (atomic_read(&mfw->nhosts) <= 1)
+		goto out;
+
+	if ((h->pref && atomic_dec_and_test(&h->pref_cnt)) || force) {
+		atomic_set(&h->pref_cnt, h->pref);
+		list_del(&h->list);
+		list_add(&h->list, mfw->hosts.prev);
+	}
+out:
+	return h;
+}
+
+/*
+ *	Main lookup routine.
+ *	HITs fwmark and schedules m.host entries if required
+ */
+static struct ip_masq_mfw_host * mfw_lookup(int fwmark)
+{
+	struct ip_masq_mfw *mfw;
+	struct ip_masq_mfw_host *h = NULL;
+
+	read_lock(&mfw_lock);
+	mfw = __mfw_get(fwmark);
+
+	if (mfw) {
+		write_lock(&mfw->lock);
+		h = __mfw_sched(mfw, 0);
+		write_unlock(&mfw->lock);
+	}
+
+	read_unlock(&mfw_lock);
+	return h;
+}
+
+#ifdef CONFIG_PROC_FS
+static int mfw_procinfo(char *buffer, char **start, off_t offset,
+			      int length, int dummy)
+{
+	struct ip_masq_mfw *mfw;
+	struct ip_masq_mfw_host *h;
+	struct list_head *l,*e;
+	off_t pos=0, begin;
+	char temp[129];
+        int idx = 0;
+	int len=0;
+
+	MOD_INC_USE_COUNT;
+
+	IP_MASQ_DEBUG(1-debug, "Entered mfw_info\n");
+
+	if (offset < 64)
+	{
+                sprintf(temp, "FwMark > RAddr    RPort PrCnt  Pref");
+		len = sprintf(buffer, "%-63s\n", temp);
+	}
+	pos = 64;
+
+        for(idx = 0; idx < IP_MASQ_MFW_HSIZE; idx++)
+	{
+		read_lock(&mfw_lock);
+		for(mfw = ip_masq_mfw_table[idx]; mfw ; mfw = mfw->next)
+		{
+			read_lock_bh(&mfw->lock);
+			l=&mfw->hosts;
+
+			for(e=l->next;l!=e;e=e->next) {
+				h = list_entry(e, struct ip_masq_mfw_host, list);
+				pos += 64;
+				if (pos <= offset) {
+					len = 0;
+					continue;
+				}
+
+				sprintf(temp,"0x%x > %08lX %5u %5d %5d",
+						h->fwmark,
+						ntohl(h->addr), ntohs(h->port),
+						atomic_read(&h->pref_cnt), h->pref);
+				len += sprintf(buffer+len, "%-63s\n", temp);
+
+				if(len >= length) {
+					read_unlock_bh(&mfw->lock);
+					read_unlock(&mfw_lock);
+					goto done;
+				}
+			}
+			read_unlock_bh(&mfw->lock);
+		}
+		read_unlock(&mfw_lock);
+	}
+
+done:
+
+	if (len) {
+		begin = len - (pos - offset);
+		*start = buffer + begin;
+		len -= begin;
+	}
+	if(len>length)
+		len = length;
+	MOD_DEC_USE_COUNT;
+	return len;
+}
+static struct proc_dir_entry mfw_proc_entry = {
+/* 		0, 0, NULL", */
+		0, 3, "mfw",
+		S_IFREG | S_IRUGO, 1, 0, 0,
+		0, &proc_net_inode_operations,
+		mfw_procinfo
+};
+
+#define proc_ent &mfw_proc_entry
+#else /* !CONFIG_PROC_FS */
+
+#define proc_ent NULL
+#endif
+
+
+static void mfw_flush(void)
+{
+	struct ip_masq_mfw *mfw, *local_table[IP_MASQ_MFW_HSIZE];
+	struct ip_masq_mfw_host *h;
+	struct ip_masq_mfw *mfw_next;
+	int idx;
+	struct list_head *l,*e;
+
+	write_lock_bh(&mfw_lock);
+	memcpy(local_table, ip_masq_mfw_table, sizeof ip_masq_mfw_table);
+	memset(ip_masq_mfw_table, 0, sizeof ip_masq_mfw_table);
+	write_unlock_bh(&mfw_lock);
+
+	/*
+	 *	For every hash table row ...
+	 */
+	for(idx=0;idx<IP_MASQ_MFW_HSIZE;idx++) {
+
+		/*
+		 *	For every m-entry in row ...
+		 */
+		for(mfw=local_table[idx];mfw;mfw=mfw_next) {
+			/*
+			 *	For every m.host in m-entry ...
+			 */
+			l=&mfw->hosts;
+			while((e=l->next) != l) {
+				h = list_entry(e, struct ip_masq_mfw_host, list);
+				atomic_dec(&mfw->nhosts);
+				list_del(&h->list);
+				kfree_s(h, sizeof(*h));
+				MOD_DEC_USE_COUNT;
+			}
+
+			if (atomic_read(&mfw->nhosts)) {
+				IP_MASQ_ERR("mfw_flush(): after flushing row nhosts=%d\n",
+						atomic_read(&mfw->nhosts));
+			}
+			mfw_next = mfw->next;
+			kfree_s(mfw, sizeof(*mfw));	
+			MOD_DEC_USE_COUNT;
+			ip_masq_mod_dec_nent(mmod_self);
+		}
+	}
+}
+
+/*
+ *	User space control entry point
+ */
+static int mfw_ctl(int optname, struct ip_masq_ctl *mctl, int optlen)
+{
+        struct ip_mfw_user *mu =  &mctl->u.mfw_user;
+	struct ip_masq_mfw *mfw;
+	int ret = EINVAL;
+	int arglen = optlen - IP_MASQ_CTL_BSIZE;
+	int cmd;
+
+
+	IP_MASQ_DEBUG(1-debug, "ip_masq_user_ctl(len=%d/%d|%d/%d)\n",
+		arglen,
+		sizeof (*mu),
+		optlen,
+		sizeof (*mctl));
+
+	/*
+	 *	checks ...
+	 */
+	if (arglen != sizeof(*mu) && optlen != sizeof(*mctl)) 
+		return -EINVAL;
+ 
+	/* 
+	 *	Don't trust the lusers - plenty of error checking! 
+	 */
+	cmd = mctl->m_cmd;
+	IP_MASQ_DEBUG(1-debug, "ip_masq_mfw_ctl(cmd=%d, fwmark=%d)\n",
+			cmd, mu->fwmark);
+
+
+	switch(cmd) {
+		case IP_MASQ_CMD_NONE:
+			return 0;
+		case IP_MASQ_CMD_FLUSH:
+			break;
+		case IP_MASQ_CMD_ADD:
+		case IP_MASQ_CMD_INSERT:
+		case IP_MASQ_CMD_SET:
+			if (mu->fwmark == 0) {
+				IP_MASQ_DEBUG(1-debug, "invalid fwmark==0\n");
+				return -EINVAL;
+			}
+			if (mu->pref < 0) {
+				IP_MASQ_DEBUG(1-debug, "invalid pref==%d\n",
+					mu->pref);
+				return -EINVAL;
+			}
+			break;
+	}
+
+
+	ret = -EINVAL;
+
+	switch(cmd) {
+	case IP_MASQ_CMD_ADD:
+	case IP_MASQ_CMD_INSERT:
+		if (!mu->raddr) {
+			IP_MASQ_DEBUG(0-debug, "ip_masq_mfw_ctl(ADD): invalid redirect 0x%x:%d\n",
+					mu->raddr, mu->rport);
+			goto out;
+		}
+
+		/*
+		 *	Cannot just use mfw_lock because below
+		 *	are allocations that can sleep; so
+		 *	to assure "new entry" atomic creation
+		 *	I use a semaphore.
+		 *
+		 */
+		down(&mfw_sema);
+
+		read_lock(&mfw_lock);
+		mfw = __mfw_get(mu->fwmark);
+		read_unlock(&mfw_lock);
+		
+		/*
+		 *	If first host, create m-entry
+		 */
+		if (mfw == NULL) {
+			mfw = mfw_new(mu->fwmark);
+			if (mfw == NULL) 
+				ret = -ENOMEM;
+		} 
+
+		if (mfw) {
+			/*
+			 *	Put m.host in m-entry.
+			 */
+			ret = mfw_addhost(mfw, mu, cmd == IP_MASQ_CMD_ADD);
+
+			/*
+			 *	If first host, link m-entry to hash table.
+			 *	Already protected by global lock.
+			 */
+			if (ret == 0 && atomic_read(&mfw->nhosts) == 1)  {
+				write_lock_bh(&mfw_lock);
+				__mfw_add(mfw);
+				write_unlock_bh(&mfw_lock);
+			} 
+			if (atomic_read(&mfw->nhosts) == 0) {
+				mfw_destroy(mfw);
+			}
+		}
+
+		up(&mfw_sema);
+
+		break;
+
+	case IP_MASQ_CMD_DEL:
+		down(&mfw_sema);
+
+		read_lock(&mfw_lock);
+		mfw = __mfw_get(mu->fwmark);
+		read_unlock(&mfw_lock);
+
+		if (mfw) {
+			ret = mfw_delhost(mfw, mu);
+
+			/*
+			 *	Last lease will free
+			 *	XXX check logic XXX
+			 */
+			if (atomic_read(&mfw->nhosts) == 0) {
+				write_lock_bh(&mfw_lock);
+				__mfw_del(mfw);
+				write_unlock_bh(&mfw_lock);
+				mfw_destroy(mfw);
+			}
+		} else 
+			ret = -ESRCH;
+
+		up(&mfw_sema);
+		break;
+	case IP_MASQ_CMD_FLUSH:
+
+		down(&mfw_sema);
+		mfw_flush();
+		up(&mfw_sema);
+		ret = 0;
+		break;
+	case IP_MASQ_CMD_SET:
+		/*
+		 *	No need to semaphorize here, main list is not 
+		 *	modified.
+		 */
+		read_lock(&mfw_lock);
+		
+		mfw = __mfw_get(mu->fwmark);
+		if (mfw) {
+			write_lock_bh(&mfw->lock);
+			
+			if (mu->flags & IP_MASQ_MFW_SCHED) {
+				struct ip_masq_mfw_host *h;
+				if ((h=__mfw_sched(mfw, 1))) {
+					mfw_host_to_user(h, mu);
+					ret = 0;
+				} 
+			} else {
+				ret = __mfw_edithost(mfw, mu);
+			}
+				
+			write_unlock_bh(&mfw->lock);
+		}
+
+		read_unlock(&mfw_lock);
+		break;
+	}
+out:
+	
+	return ret;
+}
+
+/*
+ *	Module stubs called from ip_masq core module
+ */
+ 
+/*
+ *	Input rule stub, called very early for each incoming packet, 
+ *	to see if this module has "interest" in packet.
+ */
+static int mfw_in_rule(const struct sk_buff *skb, const struct iphdr *iph)
+{
+	int val;
+	read_lock(&mfw_lock);
+	val = ( __mfw_get(skb->fwmark) != 0);
+	read_unlock(&mfw_lock);
+	return val;
+}
+
+/*
+ *	Input-create stub, called to allow "custom" masq creation
+ */
+static struct ip_masq * mfw_in_create(const struct sk_buff *skb, const struct iphdr *iph, __u32 maddr)
+{
+	union ip_masq_tphdr tph;
+	struct ip_masq *ms = NULL;
+	struct ip_masq_mfw_host *h = NULL;
+
+	tph.raw = (char*) iph + iph->ihl * 4;
+
+	switch (iph->protocol) {
+		case IPPROTO_TCP:
+			/* 	
+			 *	Only open TCP tunnel if SYN+!ACK packet
+			 */
+			if (!tph.th->syn && tph.th->ack)
+				return NULL;
+		case IPPROTO_UDP:
+			break;
+		default:
+			return NULL;
+	}
+
+	/* 
+	 *	If no entry exists in the masquerading table
+ 	 * 	and the port is involved
+	 *  	in port forwarding, create a new masq entry 
+	 */
+
+	if ((h=mfw_lookup(skb->fwmark))) {
+		ms = ip_masq_new(iph->protocol,
+				iph->daddr, tph.portp[1],	
+				/* if no redir-port, use packet dest port */
+				h->addr, h->port? h->port : tph.portp[1],
+				iph->saddr, tph.portp[0],
+				0);
+
+		if (ms != NULL)
+			ip_masq_listen(ms);
+	}
+	return ms;
+}
+
+
+#define mfw_in_update	NULL
+#define mfw_out_rule	NULL
+#define mfw_out_create	NULL
+#define mfw_out_update	NULL
+
+static struct ip_masq_mod mfw_mod = {
+	NULL,			/* next */
+	NULL,			/* next_reg */
+	"mfw",		/* name */
+	ATOMIC_INIT(0),		/* nent */
+	ATOMIC_INIT(0),		/* refcnt */
+	proc_ent,
+	mfw_ctl,
+	NULL,			/* masq_mod_init */
+	NULL,			/* masq_mod_done */
+	mfw_in_rule,
+	mfw_in_update,
+	mfw_in_create,
+	mfw_out_rule,
+	mfw_out_update,
+	mfw_out_create,
+};
+
+
+__initfunc(int ip_mfw_init(void))
+{
+	return register_ip_masq_mod ((mmod_self=&mfw_mod));
+}
+
+int ip_mfw_done(void)
+{
+	return unregister_ip_masq_mod(&mfw_mod);
+}
+
+#ifdef MODULE
+EXPORT_NO_SYMBOLS;
+
+int init_module(void)
+{
+	if (ip_mfw_init() != 0)
+		return -EIO;
+	return 0;
+}
+
+void cleanup_module(void)
+{
+	if (ip_mfw_done() != 0)
+		printk(KERN_INFO "can't remove module");
+}
+
+#endif /* MODULE */
diff --git a/pfinet/linux-src/net/ipv4/ip_masq_mod.c b/pfinet/linux-src/net/ipv4/ip_masq_mod.c
new file mode 100644
index 00000000..b99502f3
--- /dev/null
+++ b/pfinet/linux-src/net/ipv4/ip_masq_mod.c
@@ -0,0 +1,322 @@
+/*
+ *		IP_MASQ_MOD masq modules support
+ *
+ *
+ * Author:	Juan Jose Ciarlante, <jjciarla@raiz.uncu.edu.ar>
+ *
+ * 	$Id: ip_masq_mod.c,v 1.5.2.1 1999/07/02 10:10:03 davem Exp $
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *		Cyrus Durgin:		fixed kerneld stuff for kmod.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <net/ip_masq.h>
+#include <net/ip_masq_mod.h>
+
+#include <linux/ip_masq.h>
+#ifdef CONFIG_KMOD
+#include <linux/kmod.h>
+#endif
+
+EXPORT_SYMBOL(register_ip_masq_mod);
+EXPORT_SYMBOL(unregister_ip_masq_mod);
+EXPORT_SYMBOL(ip_masq_mod_lkp_link);
+EXPORT_SYMBOL(ip_masq_mod_lkp_unlink);
+
+static spinlock_t masq_mod_lock = SPIN_LOCK_UNLOCKED;
+
+/*
+ *	Base pointer for registered modules
+ */
+struct ip_masq_mod * ip_masq_mod_reg_base = NULL;
+
+/*
+ *	Base pointer for lookup (subset of above, a module could be
+ *	registered, but it could have no active rule); will avoid
+ *	unnecessary lookups.
+ */
+struct ip_masq_mod * ip_masq_mod_lkp_base = NULL;
+
+int ip_masq_mod_register_proc(struct ip_masq_mod *mmod)
+{
+#ifdef CONFIG_PROC_FS        
+	int ret;
+
+	struct proc_dir_entry *ent = mmod->mmod_proc_ent;
+
+	if (!ent) 
+		return 0;
+	if (!ent->name) {
+		ent->name = mmod->mmod_name;
+		ent->namelen = strlen (mmod->mmod_name);
+	}
+	ret = ip_masq_proc_register(ent);
+	if (ret) mmod->mmod_proc_ent = NULL;
+
+	return ret;
+#else
+	return 0;
+#endif
+}
+
+void ip_masq_mod_unregister_proc(struct ip_masq_mod *mmod)
+{
+#ifdef CONFIG_PROC_FS        
+	struct proc_dir_entry *ent = mmod->mmod_proc_ent;
+	if (!ent)
+		return;
+	ip_masq_proc_unregister(ent);
+#endif
+}
+
+/*
+ *	Link/unlink object for lookups
+ */
+
+int ip_masq_mod_lkp_unlink(struct ip_masq_mod *mmod)
+{
+	struct ip_masq_mod **mmod_p;
+
+	write_lock_bh(&masq_mod_lock);
+
+	for (mmod_p = &ip_masq_mod_lkp_base; *mmod_p ; mmod_p = &(*mmod_p)->next)
+		if (mmod == (*mmod_p))  {
+			*mmod_p = mmod->next;
+			mmod->next = NULL;
+			write_unlock_bh(&masq_mod_lock);
+			return 0;
+		}
+
+	write_unlock_bh(&masq_mod_lock);
+	return -EINVAL;
+}
+
+int ip_masq_mod_lkp_link(struct ip_masq_mod *mmod)
+{
+	write_lock_bh(&masq_mod_lock);
+
+	mmod->next = ip_masq_mod_lkp_base;
+	ip_masq_mod_lkp_base=mmod;
+
+	write_unlock_bh(&masq_mod_lock);
+	return 0;
+}
+
+int register_ip_masq_mod(struct ip_masq_mod *mmod)
+{
+	if (!mmod) {
+		IP_MASQ_ERR("register_ip_masq_mod(): NULL arg\n");
+		return -EINVAL;
+	}
+	if (!mmod->mmod_name) {
+		IP_MASQ_ERR("register_ip_masq_mod(): NULL mmod_name\n");
+		return -EINVAL;
+	}
+	ip_masq_mod_register_proc(mmod);
+
+	mmod->next_reg = ip_masq_mod_reg_base;
+	ip_masq_mod_reg_base=mmod;
+
+	return 0;
+}
+
+int unregister_ip_masq_mod(struct ip_masq_mod *mmod)
+{
+	struct ip_masq_mod **mmod_p;
+
+	if (!mmod) {
+		IP_MASQ_ERR( "unregister_ip_masq_mod(): NULL arg\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * 	Only allow unregistration if it is not referenced
+	 */
+	if (atomic_read(&mmod->refcnt))  {
+		IP_MASQ_ERR( "unregister_ip_masq_mod(): is in use by %d guys. failed\n",
+				atomic_read(&mmod->refcnt));
+		return -EINVAL;
+	}
+
+	/*	
+	 *	Must be already unlinked from lookup list
+	 */
+	if (mmod->next) {
+		IP_MASQ_WARNING("MASQ: unregistering \"%s\" while in lookup list.fixed.",
+			mmod->mmod_name);
+		ip_masq_mod_lkp_unlink(mmod);
+	}
+
+	for (mmod_p = &ip_masq_mod_reg_base; *mmod_p ; mmod_p = &(*mmod_p)->next_reg)
+		if (mmod == (*mmod_p))  {
+			ip_masq_mod_unregister_proc(mmod);
+			*mmod_p = mmod->next_reg;
+			return 0;
+		}
+
+	IP_MASQ_ERR("unregister_ip_masq_mod(%s): not linked \n", mmod->mmod_name);
+	return -EINVAL;
+}
+
+int ip_masq_mod_in_rule(const struct sk_buff *skb, const struct iphdr *iph)
+{
+	struct ip_masq_mod *mmod;
+	int ret = IP_MASQ_MOD_NOP;
+
+	for (mmod=ip_masq_mod_lkp_base;mmod;mmod=mmod->next) {
+		if (!mmod->mmod_in_rule) continue;
+		switch (ret=mmod->mmod_in_rule(skb, iph)) {
+			case IP_MASQ_MOD_NOP:
+				continue;
+			case IP_MASQ_MOD_ACCEPT:
+			case IP_MASQ_MOD_REJECT:
+				goto out;
+		}
+	}
+out:
+	return ret;
+}
+
+int ip_masq_mod_out_rule(const struct sk_buff *skb, const struct iphdr *iph)
+{
+	struct ip_masq_mod *mmod;
+	int ret = IP_MASQ_MOD_NOP;
+
+	for (mmod=ip_masq_mod_lkp_base;mmod;mmod=mmod->next) {
+		if (!mmod->mmod_out_rule) continue;
+		switch (ret=mmod->mmod_out_rule(skb, iph)) {
+			case IP_MASQ_MOD_NOP:
+				continue;
+			case IP_MASQ_MOD_ACCEPT:
+			case IP_MASQ_MOD_REJECT:
+				goto out;
+		}
+	}
+out:
+	return ret;
+}
+
+struct ip_masq * ip_masq_mod_in_create(const struct sk_buff *skb, const struct iphdr *iph, __u32 maddr)
+{
+	struct ip_masq_mod *mmod;
+	struct ip_masq *ms = NULL;
+
+	for (mmod=ip_masq_mod_lkp_base;mmod;mmod=mmod->next) {
+		if (!mmod->mmod_in_create) continue;
+		if ((ms=mmod->mmod_in_create(skb, iph, maddr))) {
+			goto out;
+		}
+	}
+out:
+	return ms;
+}
+
+struct ip_masq * ip_masq_mod_out_create(const struct sk_buff *skb, const struct iphdr *iph,  __u32 maddr)
+{
+	struct ip_masq_mod *mmod;
+	struct ip_masq *ms = NULL;
+
+	for (mmod=ip_masq_mod_lkp_base;mmod;mmod=mmod->next) {
+		if (!mmod->mmod_out_create) continue;
+		if ((ms=mmod->mmod_out_create(skb, iph, maddr))) {
+			goto out;
+		}
+	}
+out:
+	return ms;
+}
+
+int ip_masq_mod_in_update(const struct sk_buff *skb, const struct iphdr *iph, struct ip_masq *ms)
+{
+	struct ip_masq_mod *mmod;
+	int ret = IP_MASQ_MOD_NOP;
+
+	for (mmod=ip_masq_mod_lkp_base;mmod;mmod=mmod->next) {
+		if (!mmod->mmod_in_update) continue;
+		switch (ret=mmod->mmod_in_update(skb, iph, ms)) {
+			case IP_MASQ_MOD_NOP:
+				continue;
+			case IP_MASQ_MOD_ACCEPT:
+			case IP_MASQ_MOD_REJECT:
+				goto out;
+		}
+	}
+out:
+	return ret;
+}
+
+int ip_masq_mod_out_update(const struct sk_buff *skb, const struct iphdr *iph, struct ip_masq *ms)
+{
+	struct ip_masq_mod *mmod;
+	int ret = IP_MASQ_MOD_NOP;
+
+	for (mmod=ip_masq_mod_lkp_base;mmod;mmod=mmod->next) {
+		if (!mmod->mmod_out_update) continue;
+		switch (ret=mmod->mmod_out_update(skb, iph, ms)) {
+			case IP_MASQ_MOD_NOP:
+				continue;
+			case IP_MASQ_MOD_ACCEPT:
+			case IP_MASQ_MOD_REJECT:
+				goto out;
+		}
+	}
+out:
+	return ret;
+}
+
+struct ip_masq_mod * ip_masq_mod_getbyname(const char *mmod_name)
+{
+	struct ip_masq_mod * mmod;
+
+	IP_MASQ_DEBUG(1, "searching mmod_name \"%s\"\n", mmod_name);
+	
+	for (mmod=ip_masq_mod_reg_base; mmod ; mmod=mmod->next_reg) {
+		if (mmod->mmod_ctl && *(mmod_name)
+				&& (strcmp(mmod_name, mmod->mmod_name)==0)) {
+			/* HIT */
+			return mmod;
+		}
+	}
+	return NULL;
+}
+
+/*
+ *	Module control entry
+ */
+int ip_masq_mod_ctl(int optname, struct ip_masq_ctl *mctl, int optlen)
+{
+	struct ip_masq_mod * mmod;
+#ifdef CONFIG_KMOD
+	char kmod_name[IP_MASQ_TNAME_MAX+8];
+#endif
+	/* tappo */
+	mctl->m_tname[IP_MASQ_TNAME_MAX-1] = 0;
+
+	mmod = ip_masq_mod_getbyname(mctl->m_tname);
+	if (mmod)
+		return mmod->mmod_ctl(optname, mctl, optlen);
+#ifdef CONFIG_KMOD
+	sprintf(kmod_name,"ip_masq_%s", mctl->m_tname);
+
+	IP_MASQ_DEBUG(1, "About to request \"%s\" module\n", kmod_name);
+
+	/* 
+	 *	Let sleep for a while ...
+	 */
+	request_module(kmod_name);
+	mmod = ip_masq_mod_getbyname(mctl->m_tname);
+	if (mmod)
+		return mmod->mmod_ctl(optname, mctl, optlen);
+#endif
+	return ESRCH;
+}
diff --git a/pfinet/linux-src/net/ipv4/ip_masq_portfw.c b/pfinet/linux-src/net/ipv4/ip_masq_portfw.c
new file mode 100644
index 00000000..6c697a10
--- /dev/null
+++ b/pfinet/linux-src/net/ipv4/ip_masq_portfw.c
@@ -0,0 +1,508 @@
+/*
+ *		IP_MASQ_PORTFW masquerading module
+ *
+ *
+ *	$Id: ip_masq_portfw.c,v 1.3.2.1 1999/07/02 10:10:02 davem Exp $
+ *
+ * Author:	Steven Clarke <steven.clarke@monmouth.demon.co.uk>
+ *
+ * Fixes:	
+ *	Juan Jose Ciarlante	: created this new file from ip_masq.c and ip_fw.c
+ *	Juan Jose Ciarlante	: modularized 
+ *	Juan Jose Ciarlante 	: use GFP_KERNEL
+ *	Juan Jose Ciarlante 	: locking
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/list.h>
+#include <net/ip.h>
+#include <linux/ip_fw.h>
+#include <linux/ip_masq.h>
+#include <net/ip_masq.h>
+#include <net/ip_masq_mod.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+
+#define IP_PORTFW_PORT_MIN 1
+#define IP_PORTFW_PORT_MAX 60999
+
+struct ip_portfw {
+	struct 		list_head list;
+	__u32           laddr, raddr;
+	__u16           lport, rport;
+	atomic_t	pref_cnt;	/* pref "counter" down to 0 */
+	int 		pref;		/* user set pref */
+};
+
+static struct ip_masq_mod *mmod_self = NULL;
+/*
+ *	Debug level
+ */
+#ifdef CONFIG_IP_MASQ_DEBUG
+static int debug=0;
+MODULE_PARM(debug, "i");
+#endif
+
+/*
+ *	Lock
+ */
+static rwlock_t portfw_lock = RW_LOCK_UNLOCKED;
+
+static struct list_head portfw_list[2];
+static __inline__ int portfw_idx(int protocol)
+{
+        return (protocol==IPPROTO_TCP);
+}
+
+/*
+ *
+ *	Delete forwarding entry(s):
+ *	called from _DEL, u-space.
+ *	. "relaxed" match, except for lport
+ *
+ */
+
+static __inline__ int ip_portfw_del(__u16 protocol, __u16 lport, __u32 laddr, __u16 rport, __u32 raddr)
+{
+        int prot = portfw_idx(protocol);
+        struct ip_portfw *n;
+	struct list_head *entry;
+	struct list_head *list = &portfw_list[prot];
+	int nent;
+
+	nent = atomic_read(&mmod_self->mmod_nent);
+
+	write_lock_bh(&portfw_lock);
+
+	for (entry=list->next;entry != list;entry = entry->next)  {
+		n = list_entry(entry, struct ip_portfw, list);
+		if (n->lport == lport && 
+				(!laddr || n->laddr == laddr) &&
+				(!raddr || n->raddr == raddr) && 
+				(!rport || n->rport == rport)) {
+			list_del(entry);
+			ip_masq_mod_dec_nent(mmod_self);
+			kfree_s(n, sizeof(struct ip_portfw));
+			MOD_DEC_USE_COUNT;
+		}
+	}
+	write_unlock_bh(&portfw_lock);
+	
+	return nent==atomic_read(&mmod_self->mmod_nent)? ESRCH : 0;
+}
+
+/*
+ *	Flush tables
+ *	called from _FLUSH, u-space.
+ */
+static __inline__ void ip_portfw_flush(void)
+{
+        int prot;
+	struct list_head *l;
+	struct list_head *e;
+	struct ip_portfw *n;
+
+	write_lock_bh(&portfw_lock);
+
+	for (prot = 0; prot < 2;prot++) {
+		l = &portfw_list[prot];
+		while((e=l->next) != l) {
+			ip_masq_mod_dec_nent(mmod_self);
+			n = list_entry (e, struct ip_portfw, list);
+			list_del(e);
+			kfree_s(n, sizeof (*n));
+			MOD_DEC_USE_COUNT;
+		}
+	}
+
+	write_unlock_bh(&portfw_lock);
+}
+
+/*
+ *	Lookup routine for lport,laddr match
+ *	must be called with locked tables
+ */
+static __inline__ struct ip_portfw *ip_portfw_lookup(__u16 protocol, __u16 lport, __u32 laddr, __u32 *daddr_p, __u16 *dport_p)
+{
+	int prot = portfw_idx(protocol);
+	
+	struct ip_portfw *n = NULL;
+	struct list_head *l, *e;
+
+	l = &portfw_list[prot];
+
+	for (e=l->next;e!=l;e=e->next) {
+		n = list_entry(e, struct ip_portfw, list);
+		if (lport == n->lport && laddr == n->laddr) {
+			/* Please be nice, don't pass only a NULL dport */
+			if (daddr_p) {
+				*daddr_p = n->raddr;
+				*dport_p = n->rport;
+			}
+			
+			goto out;
+		}
+	}
+	n = NULL;
+out:
+	return n;
+}
+
+/*
+ *	Edit routine for lport,[laddr], [raddr], [rport] match
+ *	By now, only called from u-space
+ */
+static __inline__ int ip_portfw_edit(__u16 protocol, __u16 lport, __u32 laddr, __u16 rport, __u32 raddr, int pref)
+{
+	int prot = portfw_idx(protocol);
+	
+	struct ip_portfw *n = NULL;
+	struct list_head *l, *e;
+	int count = 0;
+
+
+	read_lock_bh(&portfw_lock);
+
+	l = &portfw_list[prot];
+
+	for (e=l->next;e!=l;e=e->next) {
+		n = list_entry(e, struct ip_portfw, list);
+		if (lport == n->lport && 
+				(!laddr || laddr == n->laddr) &&
+				(!rport || rport == n->rport) && 
+				(!raddr || raddr == n->raddr)) {
+			n->pref = pref;
+			atomic_set(&n->pref_cnt, pref);
+			count++;
+		}
+	}
+
+	read_unlock_bh(&portfw_lock);
+
+	return count;
+}
+
+/*
+ *	Add/edit en entry
+ *	called from _ADD, u-space.
+ *	must return 0 or +errno
+ */
+static __inline__ int ip_portfw_add(__u16 protocol, __u16 lport, __u32 laddr, __u16 rport, __u32 raddr, int pref)
+{
+        struct ip_portfw  *npf;
+        int prot = portfw_idx(protocol);
+         
+	if (pref <= 0)
+		return EINVAL;
+
+	if (ip_portfw_edit(protocol, lport, laddr, rport, raddr, pref)) {
+		/*
+		 *	Edit ok ...
+		 */
+		return 0;
+	}
+
+	/* may block ... */
+	npf = (struct ip_portfw*) kmalloc(sizeof(struct ip_portfw), GFP_KERNEL);
+
+	if (!npf)
+		return ENOMEM;
+
+	MOD_INC_USE_COUNT;
+        memset(npf, 0, sizeof(*npf));
+
+        npf->laddr = laddr;
+        npf->lport = lport;
+        npf->rport = rport;
+        npf->raddr = raddr;
+	npf->pref  = pref;
+
+	atomic_set(&npf->pref_cnt, npf->pref);
+	INIT_LIST_HEAD(&npf->list);
+
+	write_lock_bh(&portfw_lock);
+
+	/*
+	 *	Add at head
+	 */
+	list_add(&npf->list, &portfw_list[prot]);
+
+	write_unlock_bh(&portfw_lock);
+
+	ip_masq_mod_inc_nent(mmod_self);
+        return 0;
+}
+
+
+
+static __inline__ int portfw_ctl(int optname, struct ip_masq_ctl *mctl, int optlen)
+{
+        struct ip_portfw_user *mm =  &mctl->u.portfw_user;
+	int ret = EINVAL;
+	int arglen = optlen - IP_MASQ_CTL_BSIZE;
+	int cmd;
+
+
+	IP_MASQ_DEBUG(1-debug, "ip_masq_user_ctl(len=%d/%d|%d/%d)\n",
+		arglen,
+		sizeof (*mm),
+		optlen,
+		sizeof (*mctl));
+
+	/*
+	 *	Yes, I'm a bad guy ...
+	 */
+	if (arglen != sizeof(*mm) && optlen != sizeof(*mctl)) 
+		return EINVAL;
+ 
+	/* 
+	 *	Don't trust the lusers - plenty of error checking! 
+	 */
+	cmd = mctl->m_cmd;
+	IP_MASQ_DEBUG(1-debug, "ip_masq_portfw_ctl(cmd=%d)\n", cmd);
+
+
+	switch (cmd) {
+		case IP_MASQ_CMD_NONE:
+			return 0;
+		case IP_MASQ_CMD_FLUSH:
+			break;
+		default:
+			if (htons(mm->lport) < IP_PORTFW_PORT_MIN || htons(mm->lport) > IP_PORTFW_PORT_MAX)
+				return EINVAL;
+
+			if (mm->protocol!=IPPROTO_TCP && mm->protocol!=IPPROTO_UDP)
+				return EINVAL;
+	}
+
+	switch(cmd) {
+	case IP_MASQ_CMD_ADD:
+		ret = ip_portfw_add(mm->protocol,
+				mm->lport, mm->laddr,
+				mm->rport, mm->raddr,
+				mm->pref);
+		break;
+
+	case IP_MASQ_CMD_DEL:
+		ret = ip_portfw_del(mm->protocol, 
+				mm->lport, mm->laddr,
+				mm->rport, mm->raddr);
+		break;
+	case IP_MASQ_CMD_FLUSH:
+		ip_portfw_flush();
+		ret = 0;
+		break;
+	}
+				
+
+	return ret;
+}
+
+
+
+
+#ifdef CONFIG_PROC_FS
+
+static int portfw_procinfo(char *buffer, char **start, off_t offset,
+                            int length, int unused)
+{
+        off_t pos=0, begin;
+        struct ip_portfw *pf;
+	struct list_head *l, *e;
+        char temp[65];
+        int ind;
+        int len=0;
+
+
+        if (offset < 64) 
+        {
+                sprintf(temp, "Prot LAddr    LPort > RAddr    RPort PrCnt  Pref");
+                len = sprintf(buffer, "%-63s\n", temp);
+        }
+        pos = 64;
+
+	read_lock_bh(&portfw_lock);
+
+        for(ind = 0; ind < 2; ind++)
+        {
+		l = &portfw_list[ind];
+		for (e=l->next; e!=l; e=e->next)
+                {
+			pf = list_entry(e, struct ip_portfw, list);
+                        pos += 64;
+                        if (pos <= offset) {
+				len = 0;
+                                continue;
+			}
+
+                        sprintf(temp,"%s  %08lX %5u > %08lX %5u %5d %5d",
+                                ind ? "TCP" : "UDP",
+				ntohl(pf->laddr), ntohs(pf->lport),
+				ntohl(pf->raddr), ntohs(pf->rport),
+				atomic_read(&pf->pref_cnt), pf->pref);
+                        len += sprintf(buffer+len, "%-63s\n", temp);
+
+                        if (len >= length)
+                                goto done;
+		}
+        }
+done:
+	read_unlock_bh(&portfw_lock);
+
+        begin = len - (pos - offset);
+        *start = buffer + begin;
+        len -= begin;
+        if(len>length)
+                len = length;
+        return len;
+}
+
+static struct proc_dir_entry portfw_proc_entry = {
+/* 		0, 0, NULL", */
+		0, 6, "portfw",   /* Just for compatibility, for now ... */
+		S_IFREG | S_IRUGO, 1, 0, 0,
+		0, &proc_net_inode_operations,
+		portfw_procinfo
+};
+
+#define proc_ent &portfw_proc_entry
+#else /* !CONFIG_PROC_FS */
+
+#define proc_ent NULL
+#endif
+
+static int portfw_in_rule(const struct sk_buff *skb, const struct iphdr *iph)
+{
+	const __u16 *portp = (__u16 *)&(((char *)iph)[iph->ihl*4]);
+#ifdef CONFIG_IP_MASQ_DEBUG
+	struct rtable *rt = (struct rtable *)skb->dst;
+#endif
+	struct ip_portfw *pfw;
+
+	IP_MASQ_DEBUG(2, "portfw_in_rule(): skb:= dev=%s (index=%d), rt_iif=%d, rt_flags=0x%x rt_dev___=%s daddr=%d.%d.%d.%d dport=%d\n",
+		skb->dev->name, skb->dev->ifindex, rt->rt_iif, rt->rt_flags,
+		rt->u.dst.dev->name,
+		NIPQUAD(iph->daddr), ntohs(portp[1]));
+
+	read_lock(&portfw_lock);
+	pfw = ip_portfw_lookup(iph->protocol, portp[1], iph->daddr, NULL, NULL);
+	read_unlock(&portfw_lock);
+	return (pfw!=0);
+}
+
+static struct ip_masq * portfw_in_create(const struct sk_buff *skb, const struct iphdr *iph, __u32 maddr)
+{
+	/* 
+	 *	If no entry exists in the masquerading table
+ 	 * 	and the port is involved
+	 *  	in port forwarding, create a new masq entry 
+	 */
+
+	__u32 raddr;
+	__u16 rport;
+	const __u16 *portp = (__u16 *)&(((char *)iph)[iph->ihl*4]);
+	struct ip_masq *ms = NULL;
+	struct ip_portfw *pf;
+
+	/*
+	 *	Lock for writing.
+	 */
+	write_lock(&portfw_lock);
+
+	if ((pf=ip_portfw_lookup(iph->protocol, 
+			portp[1], iph->daddr, 
+			&raddr, &rport))) {
+		ms = ip_masq_new(iph->protocol,
+				iph->daddr, portp[1],	
+				raddr, rport,
+				iph->saddr, portp[0],
+				0);
+		ip_masq_listen(ms);
+
+		if (!ms || atomic_read(&mmod_self->mmod_nent) <= 1 
+			/* || ip_masq_nlocks(&portfw_lock) != 1 */ )
+				/*
+				 *	Maybe later...
+				 */
+				goto out;
+
+		/*
+		 *	Entry created, lock==1.
+		 *	if pref_cnt == 0, move
+		 *	entry at _tail_.
+		 *	This is a simple load balance scheduling
+		 */
+	
+		if (atomic_dec_and_test(&pf->pref_cnt)) {
+
+			atomic_set(&pf->pref_cnt, pf->pref);
+			list_del(&pf->list);
+			list_add(&pf->list, 
+				portfw_list[portfw_idx(iph->protocol)].prev);
+
+		}
+	}
+out:
+	write_unlock(&portfw_lock);
+	return ms;
+}
+
+#define portfw_in_update	NULL
+#define portfw_out_rule		NULL
+#define portfw_out_create	NULL
+#define portfw_out_update	NULL
+
+static struct ip_masq_mod portfw_mod = {
+	NULL,			/* next */
+	NULL,			/* next_reg */
+	"portfw",		/* name */
+	ATOMIC_INIT(0),		/* nent */
+	ATOMIC_INIT(0),		/* refcnt */
+	proc_ent,
+	portfw_ctl,
+	NULL,			/* masq_mod_init */
+	NULL,			/* masq_mod_done */
+	portfw_in_rule,
+	portfw_in_update,
+	portfw_in_create,
+	portfw_out_rule,
+	portfw_out_update,
+	portfw_out_create,
+};
+
+
+
+__initfunc(int ip_portfw_init(void))
+{
+	INIT_LIST_HEAD(&portfw_list[0]);
+	INIT_LIST_HEAD(&portfw_list[1]);
+	return register_ip_masq_mod ((mmod_self=&portfw_mod));
+}
+
+int ip_portfw_done(void)
+{
+	return unregister_ip_masq_mod(&portfw_mod);
+}
+
+#ifdef MODULE
+EXPORT_NO_SYMBOLS;
+
+int init_module(void)
+{
+	if (ip_portfw_init() != 0)
+		return -EIO;
+	return 0;
+}
+
+void cleanup_module(void)
+{
+	if (ip_portfw_done() != 0)
+		printk(KERN_INFO "ip_portfw_done(): can't remove module");
+}
+
+#endif /* MODULE */
diff --git a/pfinet/linux-src/net/ipv4/ip_masq_quake.c b/pfinet/linux-src/net/ipv4/ip_masq_quake.c
new file mode 100644
index 00000000..995c3a0a
--- /dev/null
+++ b/pfinet/linux-src/net/ipv4/ip_masq_quake.c
@@ -0,0 +1,322 @@
+/*
+ *		IP_MASQ_QUAKE quake masquerading module
+ *
+ *
+ * Version:	@(#)ip_masq_quake.c 0.02   22/02/97
+ *
+ * Author:	Harald Hoyer mailto:HarryH@Royal.Net
+ *		
+ *
+ * Fixes: 
+ *      Harald Hoyer            :       Unofficial Quake Specs found at 
+ *                                 http://www.gamers.org/dEngine/quake/spec/ 
+ *      Harald Hoyer            :       Check for QUAKE-STRING
+ *	Juan Jose Ciarlante	:  litl bits for 2.1
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *  
+ *  
+ */
+
+#include <linux/module.h>
+#include <asm/system.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/init.h>
+#include <net/protocol.h>
+#include <net/udp.h>
+#include <net/ip_masq.h>
+
+#define DEBUG_CONFIG_IP_MASQ_QUAKE 0
+
+typedef struct
+{ 
+        __u16 type;     // (Little Endian) Type of message.
+	__u16 length;   // (Little Endian) Length of message, header included. 
+	char  message[0];  // The contents of the message.
+} QUAKEHEADER;
+
+struct quake_priv_data {
+	/* Have we seen a client connect message */
+	signed char	cl_connect;
+};
+
+static int
+masq_quake_init_1 (struct ip_masq_app *mapp, struct ip_masq *ms)
+{
+        MOD_INC_USE_COUNT;
+	if ((ms->app_data = kmalloc(sizeof(struct quake_priv_data),
+				    GFP_ATOMIC)) == NULL) 
+		printk(KERN_INFO "Quake: No memory for application data\n");
+	else 
+	{
+		struct quake_priv_data *priv = 
+			(struct quake_priv_data *)ms->app_data;
+		priv->cl_connect = 0;
+	}
+        return 0;
+}
+
+static int
+masq_quake_done_1 (struct ip_masq_app *mapp, struct ip_masq *ms)
+{
+	MOD_DEC_USE_COUNT;
+	if (ms->app_data)
+		kfree_s(ms->app_data, sizeof(struct quake_priv_data));
+	return 0;
+}
+
+int
+masq_quake_in (struct ip_masq_app *mapp, struct ip_masq *ms, struct sk_buff **skb_p, __u32 maddr)
+{
+	struct sk_buff *skb;
+	struct iphdr *iph;
+	struct udphdr *uh;
+	QUAKEHEADER *qh;
+	__u16 udp_port;
+	char *data;
+	unsigned char code;
+	struct quake_priv_data *priv = (struct quake_priv_data *)ms->app_data;
+        
+	if(priv->cl_connect == -1)
+	  return 0;
+
+	skb = *skb_p;
+
+	iph = skb->nh.iph;
+	uh = (struct udphdr *)&(((char *)iph)[iph->ihl*4]);
+
+	/* Check for lenght */
+	if(ntohs(uh->len) < 5)
+	  return 0;
+	
+	qh = (QUAKEHEADER *)&uh[1];
+
+	if(qh->type != 0x0080)
+	  return 0;
+
+	
+	code = qh->message[0];
+
+#if DEBUG_CONFIG_IP_MASQ_QUAKE
+	  printk("Quake_in: code = %d \n", (int)code);
+#endif
+
+	switch(code) {
+	case 0x01:
+	  /* Connection Request */
+
+	  if(ntohs(qh->length) < 0x0c) {
+#if DEBUG_CONFIG_IP_MASQ_QUAKE
+	    printk("Quake_in: length < 0xc \n");
+#endif
+	    return 0;
+	  }
+
+	  data = &qh->message[1];
+
+	  /* Check for stomping string */
+	  if(memcmp(data,"QUAKE\0\3",7)) {
+#if DEBUG_CONFIG_IP_MASQ_QUAKE
+	    printk("Quake_out: memcmp failed \n");
+#endif
+	    return 0;
+	  }
+	  else {
+	    priv->cl_connect = 1;
+#if DEBUG_CONFIG_IP_MASQ_QUAKE
+	    printk("Quake_out: memcmp ok \n");
+#endif
+	  }
+	  break;
+
+	case 0x81:
+	  /* Accept Connection */
+	  if((ntohs(qh->length) < 0x09) || (priv->cl_connect == 0))
+	    return 0;
+	  data = &qh->message[1];
+
+	  memcpy(&udp_port, data, 2);
+
+	  ms->dport = htons(udp_port);
+
+#if DEBUG_CONFIG_IP_MASQ_QUAKE
+	  printk("Quake_in: in_rewrote UDP port %d \n", udp_port);
+#endif
+	  priv->cl_connect = -1;
+
+	  break;
+	}
+	 
+	return 0;
+}
+
+int
+masq_quake_out (struct ip_masq_app *mapp, struct ip_masq *ms, struct sk_buff **skb_p, __u32 maddr)
+{
+	struct sk_buff *skb;
+	struct iphdr *iph;
+	struct udphdr *uh;
+	QUAKEHEADER *qh;
+	__u16 udp_port;
+	char *data;
+	unsigned char code;
+	struct ip_masq *n_ms;
+	struct quake_priv_data *priv = (struct quake_priv_data *)ms->app_data;
+
+	if(priv->cl_connect == -1)
+	  return 0;
+        
+	skb = *skb_p;
+
+	iph = skb->nh.iph;
+	uh = (struct udphdr *)&(((char *)iph)[iph->ihl*4]);
+
+	/* Check for lenght */
+	if(ntohs(uh->len) < 5)
+	  return 0;
+	
+	qh = (QUAKEHEADER *)&uh[1];
+
+#if DEBUG_CONFIG_IP_MASQ_QUAKE
+	  printk("Quake_out: qh->type = %d \n", (int)qh->type);
+#endif
+
+	if(qh->type != 0x0080)
+	  return 0;
+	
+	code = qh->message[0];
+
+#if DEBUG_CONFIG_IP_MASQ_QUAKE
+	  printk("Quake_out: code = %d \n", (int)code);
+#endif
+
+	switch(code) {
+	case 0x01:
+	  /* Connection Request */
+
+	  if(ntohs(qh->length) < 0x0c) {
+#if DEBUG_CONFIG_IP_MASQ_QUAKE
+	    printk("Quake_out: length < 0xc \n");
+#endif
+	    return 0;
+	  }
+
+	  data = &qh->message[1];
+
+	  /* Check for stomping string */
+	  if(memcmp(data,"QUAKE\0\3",7)) {
+#if DEBUG_CONFIG_IP_MASQ_QUAKE
+	    printk("Quake_out: memcmp failed \n");
+#endif
+	    return 0;
+	  }
+	  else {
+	    priv->cl_connect = 1;
+#if DEBUG_CONFIG_IP_MASQ_QUAKE
+	    printk("Quake_out: memcmp ok \n");
+#endif
+	  }
+	  break;
+
+	case 0x81:
+	  /* Accept Connection */
+	  if((ntohs(qh->length) < 0x09) || (priv->cl_connect == 0))
+	    return 0;
+
+	  data = &qh->message[1];
+
+	  memcpy(&udp_port, data, 2);
+	  
+	  n_ms = ip_masq_new(IPPROTO_UDP,
+			     maddr, 0,
+			     ms->saddr, htons(udp_port),
+			     ms->daddr, ms->dport,
+			     0);
+
+	  if (n_ms==NULL)
+	    return 0;
+
+#if DEBUG_CONFIG_IP_MASQ_QUAKE
+	  printk("Quake_out: out_rewrote UDP port %d -> %d\n",
+		 udp_port, ntohs(n_ms->mport));
+#endif
+	  udp_port = ntohs(n_ms->mport);
+	  memcpy(data, &udp_port, 2);
+
+	  ip_masq_listen(n_ms);
+	  ip_masq_control_add(n_ms, ms);
+	  ip_masq_put(n_ms);
+
+	  break;
+	}
+	 
+	return 0;
+}
+
+struct ip_masq_app ip_masq_quake = {
+        NULL,			/* next */
+	"Quake_26",	       	/* name */
+        0,                      /* type */
+        0,                      /* n_attach */
+        masq_quake_init_1,      /* ip_masq_init_1 */
+        masq_quake_done_1,      /* ip_masq_done_1 */
+        masq_quake_out,         /* pkt_out */
+        masq_quake_in           /* pkt_in */
+};
+struct ip_masq_app ip_masq_quakenew = {
+        NULL,			/* next */
+	"Quake_27",	       	/* name */
+        0,                      /* type */
+        0,                      /* n_attach */
+        masq_quake_init_1,      /* ip_masq_init_1 */
+        masq_quake_done_1,      /* ip_masq_done_1 */
+        masq_quake_out,         /* pkt_out */
+        masq_quake_in           /* pkt_in */
+};
+
+/*
+ * 	ip_masq_quake initialization
+ */
+
+__initfunc(int ip_masq_quake_init(void))
+{
+        return (register_ip_masq_app(&ip_masq_quake, IPPROTO_UDP, 26000) +
+		register_ip_masq_app(&ip_masq_quakenew, IPPROTO_UDP, 27000));
+}
+
+/*
+ * 	ip_masq_quake fin.
+ */
+
+int ip_masq_quake_done(void)
+{
+        return (unregister_ip_masq_app(&ip_masq_quake) +
+                unregister_ip_masq_app(&ip_masq_quakenew));
+}
+
+#ifdef MODULE
+EXPORT_NO_SYMBOLS;
+
+int init_module(void)
+{
+        if (ip_masq_quake_init() != 0)
+                return -EIO;
+        return 0;
+}
+
+void cleanup_module(void)
+{
+        if (ip_masq_quake_done() != 0)
+                printk("ip_masq_quake: can't remove module");
+}
+
+#endif /* MODULE */
+
+
diff --git a/pfinet/linux-src/net/ipv4/ip_masq_raudio.c b/pfinet/linux-src/net/ipv4/ip_masq_raudio.c
new file mode 100644
index 00000000..ee3e276b
--- /dev/null
+++ b/pfinet/linux-src/net/ipv4/ip_masq_raudio.c
@@ -0,0 +1,578 @@
+/*
+ *		IP_MASQ_RAUDIO  - Real Audio masquerading module
+ *
+ *
+ * Version:	@(#)$Id: ip_masq_raudio.c,v 1.11 1998/10/06 04:49:04 davem Exp $
+ *
+ * Author:	Nigel Metheringham
+ *		Real Time Streaming code by Progressive Networks
+ *		[strongly based on ftp module by Juan Jose Ciarlante & Wouter Gadeyne]
+ *		[Real Audio information taken from Progressive Networks firewall docs]
+ *		[Kudos to Progressive Networks for making the protocol specs available]
+ *
+ *
+ *
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *
+ *
+ * Limitations
+ *	The IP Masquerading proxies at present do not have access to a processed
+ *	data stream.  Hence for a protocol like the Real Audio control protocol,
+ *	which depends on knowing where you are in the data stream, you either
+ *	to keep a *lot* of state in your proxy, or you cheat and simplify the
+ *	problem [needless to say I did the latter].
+ *
+ *	This proxy only handles data in the first packet.  Everything else is
+ *	passed transparently.  This means it should work under all normal
+ *	circumstances, but it could be fooled by new data formats or a
+ *	malicious application!
+ *
+ *	At present the "first packet" is defined as a packet starting with
+ *	the protocol ID string - "PNA".
+ *	When the link is up there appears to be enough control data 
+ *	crossing the control link to keep it open even if a long audio
+ *	piece is playing.
+ *
+ *	The Robust UDP support added in RealAudio 3.0 is supported, but due
+ *	to servers/clients not making great use of this has not been greatly
+ *	tested.  RealVideo (as used in the Real client version 4.0beta1) is
+ *	supported but again is not greatly tested (bandwidth requirements
+ *	appear to exceed that available at the sites supporting the protocol).
+ *
+ * Multiple Port Support
+ *	The helper can be made to handle up to MAX_MASQ_APP_PORTS (normally 12)
+ *	with the port numbers being defined at module load time.  The module
+ *	uses the symbol "ports" to define a list of monitored ports, which can
+ *	be specified on the insmod command line as
+ *		ports=x1,x2,x3...
+ *	where x[n] are integer port numbers.  This option can be put into
+ *	/etc/conf.modules (or /etc/modules.conf depending on your config)
+ *	where modload will pick it up should you use modload to load your
+ *	modules.
+ *
+ * Fixes:
+ * 	Juan Jose Ciarlante	:	Use control_add() for control chan
+ * 	10/15/97 - Modifications to allow masquerading of RTSP connections as
+ *     		well as PNA, which can potentially exist on the same port.
+ *		Joe Rumsey <ogre@real.com>
+ *	
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <asm/system.h>
+#include <linux/types.h>
+#include <linux/ctype.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/init.h>
+#include <net/protocol.h>
+#include <net/tcp.h>
+#include <net/ip_masq.h>
+
+/*
+#ifndef DEBUG_CONFIG_IP_MASQ_RAUDIO
+#define DEBUG_CONFIG_IP_MASQ_RAUDIO 0
+#endif
+*/
+
+#define TOLOWER(c) (((c) >= 'A' && (c) <= 'Z') ? ((c) - 'A' + 'a') : (c))
+#define ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
+
+struct raudio_priv_data {
+	/* Associated data connection - setup but not used at present */
+	struct	ip_masq *data_conn;
+	/* UDP Error correction connection - setup but not used at present */
+	struct	ip_masq *error_conn;
+	/* Have we seen and performed setup */
+	short	seen_start;
+        short   is_rtsp;
+};
+
+int
+masq_rtsp_out (struct ip_masq_app *mapp, 
+		 struct ip_masq *ms, 
+		 struct sk_buff **skb_p, 
+		 __u32 maddr);
+
+/* 
+ * List of ports (up to MAX_MASQ_APP_PORTS) to be handled by helper
+ * First port is set to the default port.
+ */
+int ports[MAX_MASQ_APP_PORTS] = {554, 7070, 0}; /* I rely on the trailing items being set to zero */
+struct ip_masq_app *masq_incarnations[MAX_MASQ_APP_PORTS];
+
+/*
+ *	Debug level
+ */
+#ifdef CONFIG_IP_MASQ_DEBUG
+static int debug=0;
+MODULE_PARM(debug, "i");
+#endif
+
+MODULE_PARM(ports, "1-" __MODULE_STRING(MAX_MASQ_APP_PORTS) "i");
+
+
+static int
+masq_raudio_init_1 (struct ip_masq_app *mapp, struct ip_masq *ms)
+{
+        MOD_INC_USE_COUNT;
+	if ((ms->app_data = kmalloc(sizeof(struct raudio_priv_data),
+				    GFP_ATOMIC)) == NULL) 
+		printk(KERN_INFO "RealAudio: No memory for application data\n");
+	else 
+	{
+		struct raudio_priv_data *priv = 
+			(struct raudio_priv_data *)ms->app_data;
+		priv->seen_start = 0;
+		priv->data_conn = NULL;
+		priv->error_conn = NULL;
+		priv->is_rtsp = 0;
+	}
+        return 0;
+}
+
+static int
+masq_raudio_done_1 (struct ip_masq_app *mapp, struct ip_masq *ms)
+{
+        MOD_DEC_USE_COUNT;
+	if (ms->app_data)
+		kfree_s(ms->app_data, sizeof(struct raudio_priv_data));
+        return 0;
+}
+
+int
+masq_raudio_out (struct ip_masq_app *mapp, struct ip_masq *ms, struct sk_buff **skb_p, __u32 maddr)
+{
+        struct sk_buff *skb;
+	struct iphdr *iph;
+	struct tcphdr *th;
+	char *p, *data, *data_limit;
+	struct ip_masq *n_ms;
+	unsigned short version, msg_id, msg_len, udp_port;
+	struct raudio_priv_data *priv = 
+		(struct raudio_priv_data *)ms->app_data;
+
+	/* Everything running correctly already */
+	if (priv && priv->seen_start)
+		return 0;
+
+	if(priv && priv->is_rtsp)
+	    return masq_rtsp_out(mapp, ms, skb_p, maddr);
+
+        skb = *skb_p;
+	iph = skb->nh.iph;
+        th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
+        data = (char *)&th[1];
+
+        data_limit = skb->h.raw + skb->len;
+
+	if(memcmp(data, "OPTIONS", 7) == 0 ||
+	   memcmp(data, "DESCRIBE", 8) == 0)
+	{
+	    IP_MASQ_DEBUG(1-debug, "RealAudio: Detected RTSP connection\n");
+	    /* This is an RTSP client */
+	    if(priv)
+		priv->is_rtsp = 1;
+	    return masq_rtsp_out(mapp, ms, skb_p, maddr);
+	}
+
+	/* Check to see if this is the first packet with protocol ID */
+	if (memcmp(data, "PNA", 3)) {
+		IP_MASQ_DEBUG(1-debug, "RealAudio: not initial protocol packet - ignored\n");
+		return(0);
+	}
+	data += 3;
+	memcpy(&version, data, 2);
+
+	IP_MASQ_DEBUG(1-debug, "RealAudio: initial seen - protocol version %d\n",
+	       ntohs(version));
+	if (priv)
+		priv->seen_start = 1;
+
+	if (ntohs(version) >= 256)
+	{
+		printk(KERN_INFO "RealAudio: version (%d) not supported\n",
+		       ntohs(version));
+		return 0;
+	}
+
+	data += 2;
+	while (data+4 < data_limit) {
+		memcpy(&msg_id, data, 2);
+		data += 2;
+		memcpy(&msg_len, data, 2);
+		data += 2;
+		if (ntohs(msg_id) == 0) {
+			/* The zero tag indicates the end of options */
+			IP_MASQ_DEBUG(1-debug, "RealAudio: packet end tag seen\n");
+			return 0;
+		}
+		IP_MASQ_DEBUG(1-debug, "RealAudio: msg %d - %d byte\n",
+		       ntohs(msg_id), ntohs(msg_len));
+		if (ntohs(msg_id) == 0) {
+			/* The zero tag indicates the end of options */
+			return 0;
+		}
+		p = data;
+		data += ntohs(msg_len);
+		if (data > data_limit)
+		{
+			printk(KERN_INFO "RealAudio: Packet too short for data\n");
+			return 0;
+		}
+		if ((ntohs(msg_id) == 1) || (ntohs(msg_id) == 7)) {
+			/* 
+			 * MsgId == 1
+			 * Audio UDP data port on client
+			 *
+			 * MsgId == 7
+			 * Robust UDP error correction port number on client
+			 *
+			 * Since these messages are treated just the same, they
+			 * are bundled together here....
+			 */
+			memcpy(&udp_port, p, 2);
+
+			/* 
+			 * Sometimes a server sends a message 7 with a zero UDP port
+			 * Rather than do anything with this, just ignore it!
+			 */
+			if (udp_port == 0)
+				continue;
+
+
+			n_ms = ip_masq_new(IPPROTO_UDP,
+						maddr, 0,
+						ms->saddr, udp_port,
+						ms->daddr, 0,
+						IP_MASQ_F_NO_DPORT);
+
+			if (n_ms==NULL)
+				return 0;
+
+			ip_masq_listen(n_ms);
+			ip_masq_control_add(n_ms, ms);
+
+			memcpy(p, &(n_ms->mport), 2);
+			IP_MASQ_DEBUG(1-debug, "RealAudio: rewrote UDP port %d -> %d in msg %d\n",
+			       ntohs(udp_port), ntohs(n_ms->mport), ntohs(msg_id));
+
+			/* Make ref in application data to data connection */
+			if (priv) {
+				if (ntohs(msg_id) == 1)
+					priv->data_conn = n_ms;
+				else
+					priv->error_conn = n_ms;
+			}
+			
+			ip_masq_put(n_ms);
+		}
+	}
+	return 0;
+}
+
+/*
+ * masq_rtsp_out
+ *
+ * 
+ */
+int
+masq_rtsp_out (struct ip_masq_app *mapp, 
+		 struct ip_masq *ms, 
+		 struct sk_buff **skb_p, 
+		 __u32 maddr)
+{
+        struct sk_buff *skb;
+	struct iphdr *iph;
+	struct tcphdr *th;
+	char *data, *data_limit;
+	struct ip_masq *n_ms, *n_ms2;
+	unsigned short udp_port;
+	struct raudio_priv_data *priv = 
+		(struct raudio_priv_data *)ms->app_data;
+	const char* srch = "transport:";
+	const char* srchpos = srch;
+	const char* srchend = srch + strlen(srch);
+	int state = 0;
+	char firstport[6];
+	int firstportpos = 0;
+	char secondport[6];
+	int secondportpos = 0;
+	char *portstart = NULL, *portend = NULL;
+	int diff;
+
+	/* Everything running correctly already */
+	if (priv && priv->seen_start)
+		return 0;
+
+        skb = *skb_p;
+	iph = skb->nh.iph;
+        th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
+        data = (char *)&th[1];
+
+        data_limit = skb->h.raw + skb->len;
+
+	firstport[0] = 0;
+	secondport[0] = 0;
+
+	while(data < data_limit && state >= 0)
+	{
+	    switch(state)
+	    {
+		case 0:
+		case 1:
+		    if(TOLOWER(*data) == *srchpos)
+		    {
+			srchpos++;
+			if(srchpos == srchend)
+			{
+			    IP_MASQ_DEBUG(1-debug, "Found string %s in message\n",
+				   srch);
+			    state++;
+			    if(state == 1)
+			    {
+				srch = "client_port";
+				srchpos = srch;
+				srchend = srch + strlen(srch);
+			    }
+			}
+		    }
+		    else
+		    {
+			srchpos = srch;
+		    }
+		    break;
+		case 2:
+		    if(*data == '=')
+			state = 3;
+		    break;
+		case 3:
+		    if(ISDIGIT(*data))
+		    {
+			portstart = data;
+			firstportpos = 0;
+			firstport[firstportpos++] = *data;
+			state = 4;
+		    }
+		    break;
+		case 4:
+		    if(*data == '-')
+		    {
+			state = 5;
+		    }
+		    else if(*data == ';')
+		    {
+			portend = data - 1;
+			firstport[firstportpos] = 0;
+			state = -1;
+		    }
+		    else if(ISDIGIT(*data))
+		    {
+			firstport[firstportpos++] = *data;
+		    }
+		    else if(*data != ' ' && *data != '\t')
+		    {
+			/* This is a badly formed RTSP message, let's bail out */
+			IP_MASQ_DEBUG(1-debug, "Badly formed RTSP Message\n");
+			return 0;
+		    }
+		    break;
+		case 5:
+		    if(ISDIGIT(*data))
+		    {
+			secondportpos = 0;
+			secondport[secondportpos++] = *data;
+			state = 6;
+		    }
+		    else if(*data == ';')
+		    {
+			portend = data - 1;
+			secondport[secondportpos] = 0;
+			state = -1;
+		    }
+		    break;
+		case 6:
+		    if(*data == ';')
+		    {
+			portend = data - 1;
+			secondport[secondportpos] = 0;
+			state = -1;
+		    }
+		    else if(ISDIGIT(*data))
+		    {
+			secondport[secondportpos++] = *data;
+		    }
+		    else if(*data != ' ' && *data != '\t')
+		    {
+			/* This is a badly formed RTSP message, let's bail out */
+			IP_MASQ_DEBUG(1-debug, "Badly formed RTSP Message\n");
+			return 0;
+		    }
+		    break;
+	    }
+	    data++;
+	}
+
+	if(state >= 0)
+	    return 0;
+
+	if(firstportpos > 0)
+	{
+	    char newbuf[12]; /* xxxxx-xxxxx\0 */
+	    char* tmpptr;
+
+	    udp_port = htons(simple_strtoul(firstport, &tmpptr, 10));
+	    n_ms = ip_masq_new(IPPROTO_UDP,
+			       maddr, 0,
+			       ms->saddr, udp_port,
+			       ms->daddr, 0,
+			       IP_MASQ_F_NO_DPORT);
+	    if (n_ms==NULL)
+		return 0;
+	    
+	    ip_masq_listen(n_ms);
+	    ip_masq_control_add(n_ms, ms);
+
+	    if(secondportpos > 0)
+	    {
+		udp_port = htons(simple_strtoul(secondport, &tmpptr, 10));
+		n_ms2 = ip_masq_new(IPPROTO_UDP,
+				maddr, 0,
+				ms->saddr, udp_port,
+				ms->daddr, 0,
+				IP_MASQ_F_NO_DPORT);
+		if (n_ms2==NULL) {
+		    ip_masq_put(n_ms);
+		    return 0;
+		}
+
+		ip_masq_listen(n_ms2);
+		ip_masq_control_add(n_ms2, ms);
+		sprintf(newbuf, "%d-%d", ntohs(n_ms->mport), 
+			ntohs(n_ms2->mport));
+	    }
+	    else
+	    {
+		sprintf(newbuf, "%d", ntohs(n_ms->mport));
+		n_ms2 = NULL;
+	    }
+	    *skb_p = ip_masq_skb_replace(skb, GFP_ATOMIC,
+					 portstart, portend - portstart + 1,
+					 newbuf, strlen(newbuf));
+	    IP_MASQ_DEBUG(1-debug, "RTSP: rewrote client_port to %s\n", newbuf);
+	    diff = strlen(newbuf) - (portend - portstart);
+	}
+	else
+	{
+	    return 0;
+	}
+	    
+	if(priv)
+	{
+	    priv->seen_start = 1;
+	    if(n_ms)
+		priv->data_conn = n_ms;
+	    if(n_ms2)
+		priv->error_conn = n_ms2;
+	}
+	/*
+	 *	Release tunnels
+	 */
+
+	if (n_ms)
+		ip_masq_put(n_ms);
+
+	if (n_ms2)
+		ip_masq_put(n_ms2);
+
+	return diff;
+}
+
+struct ip_masq_app ip_masq_raudio = {
+        NULL,			/* next */
+	"RealAudio",	       	/* name */
+        0,                      /* type */
+        0,                      /* n_attach */
+        masq_raudio_init_1,     /* ip_masq_init_1 */
+        masq_raudio_done_1,     /* ip_masq_done_1 */
+        masq_raudio_out,        /* pkt_out */
+        NULL                    /* pkt_in */
+};
+
+/*
+ * 	ip_masq_raudio initialization
+ */
+
+__initfunc(int ip_masq_raudio_init(void))
+{
+	int i, j;
+
+	for (i=0; (i<MAX_MASQ_APP_PORTS); i++) {
+		if (ports[i]) {
+			if ((masq_incarnations[i] = kmalloc(sizeof(struct ip_masq_app),
+							    GFP_KERNEL)) == NULL)
+				return -ENOMEM;
+			memcpy(masq_incarnations[i], &ip_masq_raudio, sizeof(struct ip_masq_app));
+			if ((j = register_ip_masq_app(masq_incarnations[i], 
+						      IPPROTO_TCP, 
+						      ports[i]))) {
+				return j;
+			}
+			IP_MASQ_DEBUG(1-debug, "RealAudio: loaded support on port[%d] = %d\n",
+			       i, ports[i]);
+		} else {
+			/* To be safe, force the incarnation table entry to NULL */
+			masq_incarnations[i] = NULL;
+		}
+	}
+	return 0;
+}
+
+/*
+ * 	ip_masq_raudio fin.
+ */
+
+int ip_masq_raudio_done(void)
+{
+	int i, j, k;
+
+	k=0;
+	for (i=0; (i<MAX_MASQ_APP_PORTS); i++) {
+		if (masq_incarnations[i]) {
+			if ((j = unregister_ip_masq_app(masq_incarnations[i]))) {
+				k = j;
+			} else {
+				kfree(masq_incarnations[i]);
+				masq_incarnations[i] = NULL;
+				IP_MASQ_DEBUG(1-debug, "RealAudio: unloaded support on port[%d] = %d\n",
+				       i, ports[i]);
+			}
+		}
+	}
+	return k;
+}
+
+#ifdef MODULE
+EXPORT_NO_SYMBOLS;
+
+int init_module(void)
+{
+        if (ip_masq_raudio_init() != 0)
+                return -EIO;
+        return 0;
+}
+
+void cleanup_module(void)
+{
+        if (ip_masq_raudio_done() != 0)
+                printk(KERN_INFO "ip_masq_raudio: can't remove module");
+}
+
+#endif /* MODULE */
diff --git a/pfinet/linux-src/net/ipv4/ip_masq_user.c b/pfinet/linux-src/net/ipv4/ip_masq_user.c
new file mode 100644
index 00000000..51297441
--- /dev/null
+++ b/pfinet/linux-src/net/ipv4/ip_masq_user.c
@@ -0,0 +1,473 @@
+/*
+ *	IP_MASQ_USER user space control module
+ *
+ *
+ *	$Id: ip_masq_user.c,v 1.1.2.1 1999/08/07 10:56:33 davem Exp $
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <asm/system.h>
+#include <linux/stat.h>
+#include <linux/proc_fs.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/inet.h>
+#include <linux/init.h>
+#include <net/protocol.h>
+#include <net/icmp.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <net/checksum.h>
+#include <net/ip_masq.h>
+#include <net/ip_masq_mod.h>
+#include <linux/sysctl.h>
+#include <linux/ip_fw.h>
+
+#include <linux/ip_masq.h>
+
+/*
+ *	Debug level
+ */
+static int debug=0;
+
+MODULE_PARM(ports, "1-" __MODULE_STRING(MAX_MASQ_APP_PORTS) "i");
+MODULE_PARM(debug, "i");
+
+/*
+static int check_5uple (struct ip_masq_user *ums) {
+	return 0;
+}
+*/
+static void masq_user_k2u(const struct ip_masq *ms, struct ip_masq_user *ums)
+{
+	ums->protocol = ms->protocol;
+	ums->daddr = ms->daddr;
+	ums->dport = ms->dport;
+	ums->maddr = ms->maddr;
+	ums->mport = ms->mport;
+	ums->saddr = ms->saddr;
+	ums->sport = ms->sport;
+	ums->timeout = ms->timeout;
+}
+
+
+static int ip_masq_user_maddr(struct ip_masq_user *ums)
+{
+	struct device *dev;
+	struct rtable *rt;
+	int ret = -EINVAL;
+	u32 rt_daddr, rt_saddr;
+	u32 tos;
+
+	/*
+	 *	Did specify masq address.
+	 */
+	if (ums->maddr)
+		return 0;
+
+	/*
+	 *	Select address to use for routing query
+	 */
+
+	rt_daddr = ums->rt_daddr? ums->rt_daddr : ums->daddr;
+	rt_saddr = ums->rt_saddr? ums->rt_saddr : ums->saddr;
+
+
+	/*
+	 *	No address for routing, cannot continue
+	 */
+	if (rt_daddr == 0) {
+		IP_MASQ_DEBUG(1-debug, "cannot setup maddr with daddr=%lX, rt_addr=%lX\n",
+			     ntohl(ums->daddr), ntohl(ums->rt_daddr));
+		return -EINVAL;
+	}
+
+	/*
+	 *	Find out rt device 
+	 */
+
+	rt_saddr = 0; 
+	tos = RT_TOS(ums->ip_tos) | RTO_CONN;
+
+	if ((ret=ip_route_output(&rt, rt_daddr, rt_saddr, tos, 0 /* dev */))) {
+		IP_MASQ_DEBUG(0-debug, "could not setup maddr for routing daddr=%lX, saddr=%lX\n",
+			     ntohl(rt_daddr), ntohl(rt_saddr));
+		return ret;
+	}
+	dev = rt->u.dst.dev;
+	ums->maddr = ip_masq_select_addr(dev, rt->rt_gateway, RT_SCOPE_UNIVERSE);
+
+	IP_MASQ_DEBUG(1-debug, "did setup maddr=%lX\n", ntohl(ums->maddr));
+	ip_rt_put(rt);
+	return 0;
+}
+
+/*
+ *	Create new entry (from uspace)
+ */
+static int ip_masq_user_new(struct ip_masq_user *ums)
+{
+	struct ip_masq *ms = NULL;
+	unsigned mflags = 0;
+	int ret;
+
+	if (masq_proto_num (ums->protocol) == -1) {
+		return EPROTONOSUPPORT;
+	}
+
+	if (ums->dport == 0) {
+		ums->flags |= IP_MASQ_USER_F_LISTEN;
+	}
+
+	if (ums->flags | IP_MASQ_USER_F_LISTEN) {
+		if ((ums->saddr == 0) || (ums->sport == 0)) {
+			return EINVAL;
+		}
+		mflags |= (IP_MASQ_F_NO_DPORT|IP_MASQ_F_NO_DADDR);
+
+	}
+
+	if ((ret = ip_masq_user_maddr(ums)) < 0) {
+		return -ret;
+	}
+
+	mflags |= IP_MASQ_F_USER;
+	ms = ip_masq_new(ums->protocol, 
+			ums->maddr, ums->mport, 
+			ums->saddr, ums->sport,
+			ums->daddr, ums->dport,
+			mflags);
+	
+	if (ms == NULL) {
+		/*
+		 *	FIXME: ip_masq_new() should return errno
+		 */
+		return EBUSY;
+	}
+
+	/*
+	 *	Setup timeouts for this new entry
+	 */
+
+	if (ums->timeout) {
+		ms->timeout = ums->timeout;
+	} else if (ums->flags | IP_MASQ_USER_F_LISTEN) {
+		ip_masq_listen(ms);
+	}
+
+	masq_user_k2u(ms, ums);
+	ip_masq_put(ms);
+	return 0;
+}
+
+/* 
+ *	Delete existing entry
+ */
+static int ip_masq_user_del(struct ip_masq_user *ums)
+{
+	struct ip_masq *ms=NULL;
+
+	if (masq_proto_num (ums->protocol) == -1) {
+		return EPROTONOSUPPORT;
+	}
+	start_bh_atomic();
+	if (ums->mport && ums->maddr) {
+		ms = ip_masq_in_get(ums->protocol, 
+				ums->daddr, ums->dport, 
+				ums->maddr, ums->mport);
+		end_bh_atomic();
+	} else if (ums->sport && ums->saddr) {
+		ms = ip_masq_out_get(ums->protocol,
+				ums->saddr, ums->sport,
+				ums->daddr, ums->dport);
+		end_bh_atomic();
+	} else
+		return EINVAL;	
+	
+	if (ms == NULL) {
+		return ESRCH;
+	}
+
+	/*
+	 *	got (locked) entry, setup almost tiny timeout :) and  
+	 *	give away
+	 *
+	 *	FIXME: should use something better than S_CLOSE
+	 */
+	ms->timeout = IP_MASQ_S_CLOSE;
+
+	masq_user_k2u(ms, ums);
+	ip_masq_put(ms);
+	return 0;
+}
+
+static struct ip_masq * ip_masq_user_locked_get (struct ip_masq_user *ums, int *err)
+{
+	struct ip_masq *ms=NULL;
+	if (masq_proto_num (ums->protocol) == -1) {
+		*err = EPROTONOSUPPORT;
+	}
+
+	start_bh_atomic();
+	if (ums->mport && ums->maddr) {
+		ms = ip_masq_in_get(ums->protocol, 
+				ums->daddr, ums->dport, 
+				ums->maddr, ums->mport);
+		end_bh_atomic();
+	} else if (ums->sport && ums->saddr) {
+		ms = ip_masq_out_get(ums->protocol,
+				ums->saddr, ums->sport,
+				ums->daddr, ums->dport);
+		end_bh_atomic();
+	} else
+		*err = EINVAL;	
+	
+	if (ms == NULL) *err = ESRCH;
+	return ms;
+}
+
+/*
+ * 	Get existing entry (complete full tunnel info)
+ */
+static int ip_masq_user_get(struct ip_masq_user *ums)
+{
+	struct ip_masq *ms=NULL;
+	int err;
+
+	ms = ip_masq_user_locked_get(ums, &err);
+	if (ms == NULL)
+		return err;
+
+	masq_user_k2u(ms, ums);
+
+	ip_masq_put(ms);
+	return 0;
+}
+
+/* 
+ *	Set (some, valid) entry parameters
+ */
+static int ip_masq_user_set(struct ip_masq_user *ums)
+{
+	struct ip_masq *ms = NULL;
+	int err;
+
+	ms = ip_masq_user_locked_get(ums, &err);
+	if (ms == NULL)
+		return err;
+	
+	/*
+	 *	FIXME: must allow selecting what you want to set
+	 */
+	ms->timeout = ums->timeout;
+
+	masq_user_k2u(ms, ums);
+	
+	ip_masq_put(ms);
+	return 0;
+}
+
+
+/*
+ *	Entry point
+ *	ret value:
+ *		<0   err
+ *		==0  ok
+ *		>0   ok, copy to user
+ */
+static int ip_masq_user_ctl(int optname, struct ip_masq_ctl *mctl, int optlen)
+{
+	struct ip_masq_user *ums = &mctl->u.user;
+	int ret = EINVAL;
+	int arglen = optlen - IP_MASQ_CTL_BSIZE;
+	int cmd;
+
+	IP_MASQ_DEBUG(1-debug, "ip_masq_user_ctl(len=%d/%d|%d/%d)\n",
+		arglen,
+		sizeof (*ums),
+		optlen,
+		sizeof (*mctl));
+
+	/*
+	 *	Yes, I'm a bad guy ...
+	 */
+	if (arglen != sizeof(*ums) && optlen != sizeof(*mctl)) 
+		return EINVAL;
+
+	MOD_INC_USE_COUNT;
+
+	/* 
+	 *	Don't trust the lusers - plenty of error checking! 
+	 */
+	cmd = mctl->m_cmd;
+	IP_MASQ_DEBUG(1-debug, "ip_masq_user_ctl(cmd=%d)\n", cmd);
+
+	switch (mctl->m_cmd) {
+		case IP_MASQ_CMD_ADD:
+		case IP_MASQ_CMD_INSERT:
+			ret = ip_masq_user_new(ums);
+			break;
+		case IP_MASQ_CMD_DEL:
+			ret = ip_masq_user_del(ums);
+			break;
+		case IP_MASQ_CMD_SET:
+			ret = ip_masq_user_set(ums);
+			break;
+		case IP_MASQ_CMD_GET:
+			ret = ip_masq_user_get(ums);
+			break;
+	}
+
+	/*
+	 *	For all of the above, return masq tunnel info
+	 */
+
+	ret = -ret;
+
+	if (ret == 0) {
+		ret = sizeof (*ums) + IP_MASQ_CTL_BSIZE;
+		IP_MASQ_DEBUG(1-debug, "will return %d bytes to user\n", ret);
+	}
+
+	MOD_DEC_USE_COUNT;
+	return ret;
+}
+
+
+#ifdef CONFIG_PROC_FS
+static int ip_masq_user_info(char *buffer, char **start, off_t offset,
+			      int length, int proto)
+{
+	off_t pos=0, begin;
+	struct ip_masq *ms;
+	char temp[129];
+        int idx = 0;
+	int col;
+	int len=0;
+	int magic_control;
+	struct list_head *l,*e;
+
+	MOD_INC_USE_COUNT;
+
+	IP_MASQ_DEBUG(1-debug, "Entered user_info with proto=%d\n", proto);
+
+	if (offset < 128)
+	{
+		sprintf(temp,
+			"Prot SrcIP    SPrt DstIP    DPrt MAddr    MPrt State        Flgs Ref Ctl Expires HRow HCol (free=%d,%d,%d)",
+			atomic_read(ip_masq_free_ports), 
+			atomic_read(ip_masq_free_ports+1), 
+			atomic_read(ip_masq_free_ports+2));
+		len = sprintf(buffer, "%-127s\n", temp);
+	}
+	pos = 128;
+
+        for(idx = 0; idx < IP_MASQ_TAB_SIZE; idx++)
+	{
+	/*
+	 *	Lock is actually only need in next loop 
+	 *	we are called from uspace: must stop bh.
+	 */
+	col=0;
+	read_lock_bh(&__ip_masq_lock);
+	l = &ip_masq_m_table[idx];
+	for (e=l->next; e!=l; e=e->next) {
+		col++;
+		ms = list_entry(e, struct ip_masq, m_list);
+		if (ms->protocol != proto) {
+			continue;
+		}
+
+		pos += 128;
+		if (pos <= offset) {
+			len = 0;
+			continue;
+		}
+
+		/*
+		 *	We have locked the tables, no need to del/add timers
+		 *	nor cli()  8)
+		 */
+		
+
+		magic_control = atomic_read(&ms->n_control);
+		if (!magic_control && ms->control) magic_control = -1;
+		sprintf(temp,"%-4s %08lX:%04X %08lX:%04X %08lX:%04X %-12s %3X %4d %3d %7lu %4d %4d",
+			masq_proto_name(ms->protocol),
+			ntohl(ms->saddr), ntohs(ms->sport),
+			ntohl(ms->daddr), ntohs(ms->dport),
+			ntohl(ms->maddr), ntohs(ms->mport),
+			ip_masq_state_name(ms->state),
+			ms->flags,
+			atomic_read(&ms->refcnt),
+			magic_control,
+			(ms->timer.expires-jiffies)/HZ,
+			idx, col);
+		len += sprintf(buffer+len, "%-127s\n", temp);
+
+		if(len >= length) {
+			read_unlock_bh(&__ip_masq_lock);
+			goto done;
+		}
+	}
+	read_unlock_bh(&__ip_masq_lock);
+	}
+
+done:
+
+	if (len) {
+		begin = len - (pos - offset);
+		*start = buffer + begin;
+		len -= begin;
+	}
+	if(len>length)
+		len = length;
+	MOD_DEC_USE_COUNT;
+	return len;
+}
+#else
+#define ip_masq_user_info	NULL
+#endif
+
+static struct ip_masq_hook ip_masq_user = {
+	ip_masq_user_ctl,
+	ip_masq_user_info
+};
+
+int ip_masq_user_init(void)
+{
+	if (ip_masq_user_hook != NULL) 
+		return -EEXIST;
+	ip_masq_user_hook = &ip_masq_user;
+	return 0;
+}
+
+int ip_masq_user_done(void)
+{
+	if (ip_masq_user_hook == NULL) 
+		return ENOENT;
+	ip_masq_user_hook = NULL;
+	return 0;
+}
+
+#ifdef MODULE
+EXPORT_NO_SYMBOLS;
+int init_module(void)
+{
+	if (ip_masq_user_init() != 0)
+		return -EIO;
+	return 0;
+}
+
+void cleanup_module(void)
+{
+	if (ip_masq_user_done() != 0)
+		printk(KERN_INFO "ip_masq_user_done(): can't remove module");
+}
+
+#endif /* MODULE */
diff --git a/pfinet/linux-src/net/ipv4/ip_masq_vdolive.c b/pfinet/linux-src/net/ipv4/ip_masq_vdolive.c
new file mode 100644
index 00000000..4724e3b9
--- /dev/null
+++ b/pfinet/linux-src/net/ipv4/ip_masq_vdolive.c
@@ -0,0 +1,294 @@
+/*
+ *		IP_MASQ_VDOLIVE  - VDO Live masquerading module
+ *
+ *
+ * Version:	@(#)$Id: ip_masq_vdolive.c,v 1.4 1998/10/06 04:49:07 davem Exp $
+ *
+ * Author:	Nigel Metheringham <Nigel.Metheringham@ThePLAnet.net>
+ *		PLAnet Online Ltd
+ *
+ * Fixes:	Minor changes for 2.1 by
+ *		Steven Clarke <Steven.Clarke@ThePlanet.Net>, Planet Online Ltd
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *
+ * Thanks:
+ *	Thank you to VDOnet Corporation for allowing me access to
+ *	a protocol description without an NDA.  This means that
+ *	this module can be distributed as source - a great help!
+ *	
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <asm/system.h>
+#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/init.h>
+#include <net/protocol.h>
+#include <net/tcp.h>
+#include <net/ip_masq.h>
+
+struct vdolive_priv_data {
+	/* Ports used */
+	unsigned short	origport;
+	unsigned short	masqport;
+	/* State of decode */
+	unsigned short	state;
+};
+
+/* 
+ * List of ports (up to MAX_MASQ_APP_PORTS) to be handled by helper
+ * First port is set to the default port.
+ */
+static int ports[MAX_MASQ_APP_PORTS] = {7000}; /* I rely on the trailing items being set to zero */
+struct ip_masq_app *masq_incarnations[MAX_MASQ_APP_PORTS];
+
+/*
+ *     Debug level
+ */
+#ifdef CONFIG_IP_MASQ_DEBUG
+static int debug=0;
+MODULE_PARM(debug, "i");
+#endif
+
+MODULE_PARM(ports, "1-" __MODULE_STRING(MAX_MASQ_APP_PORTS) "i");
+
+static int
+masq_vdolive_init_1 (struct ip_masq_app *mapp, struct ip_masq *ms)
+{
+	MOD_INC_USE_COUNT;
+	if ((ms->app_data = kmalloc(sizeof(struct vdolive_priv_data),
+				    GFP_ATOMIC)) == NULL) 
+		IP_MASQ_DEBUG(1-debug, "VDOlive: No memory for application data\n");
+	else 
+	{
+		struct vdolive_priv_data *priv = 
+			(struct vdolive_priv_data *)ms->app_data;
+		priv->origport = 0;
+		priv->masqport = 0;
+		priv->state = 0;
+	}
+        return 0;
+}
+
+static int
+masq_vdolive_done_1 (struct ip_masq_app *mapp, struct ip_masq *ms)
+{
+        MOD_DEC_USE_COUNT;
+	if (ms->app_data)
+		kfree_s(ms->app_data, sizeof(struct vdolive_priv_data));
+        return 0;
+}
+
+int
+masq_vdolive_out (struct ip_masq_app *mapp, struct ip_masq *ms, struct sk_buff **skb_p, __u32 maddr)
+{
+        struct sk_buff *skb;
+	struct iphdr *iph;
+	struct tcphdr *th;
+	char *data, *data_limit;
+	unsigned int tagval;	/* This should be a 32 bit quantity */
+	struct ip_masq *n_ms;
+	struct vdolive_priv_data *priv = 
+		(struct vdolive_priv_data *)ms->app_data;
+
+	/* This doesn't work at all if no priv data was allocated on startup */
+	if (!priv)
+		return 0;
+
+	/* Everything running correctly already */
+	if (priv->state == 3)
+		return 0;
+
+        skb = *skb_p;
+	iph = skb->nh.iph;
+        th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
+        data = (char *)&th[1];
+
+        data_limit = skb->h.raw + skb->len;
+
+	if (data+8 > data_limit) {
+		IP_MASQ_DEBUG(1-debug, "VDOlive: packet too short for ID %p %p\n", data, data_limit);
+		return 0;
+	}
+	memcpy(&tagval, data+4, 4);
+	IP_MASQ_DEBUG(1-debug, "VDOlive: packet seen, tag %ld, in initial state %d\n", ntohl(tagval), priv->state);
+
+	/* Check for leading packet ID */
+	if ((ntohl(tagval) != 6) && (ntohl(tagval) != 1)) {
+		IP_MASQ_DEBUG(1-debug, "VDOlive: unrecognised tag %ld, in initial state %d\n", ntohl(tagval), priv->state);
+		return 0;
+	}
+		
+
+	/* Check packet is long enough for data - ignore if not */
+	if ((ntohl(tagval) == 6) && (data+36 > data_limit)) {
+		IP_MASQ_DEBUG(1-debug, "VDOlive: initial packet too short %p %p\n", data, data_limit);
+		return 0;
+	} else if ((ntohl(tagval) == 1) && (data+20 > data_limit)) {
+		IP_MASQ_DEBUG(1-debug,"VDOlive: secondary packet too short %p %p\n", data, data_limit);
+		return 0;
+	}
+
+	/* Adjust data pointers */
+	/*
+	 * I could check the complete protocol version tag 
+	 * in here however I am just going to look for the
+	 * "VDO Live" tag in the hope that this part will
+	 * remain constant even if the version changes
+	 */
+	if (ntohl(tagval) == 6) {
+		data += 24;
+		IP_MASQ_DEBUG(1-debug, "VDOlive: initial packet found\n");
+	} else {
+		data += 8;
+		IP_MASQ_DEBUG(1-debug, "VDOlive: secondary packet found\n");
+	}
+
+	if (memcmp(data, "VDO Live", 8) != 0) {
+		IP_MASQ_DEBUG(1-debug,"VDOlive: did not find tag\n");
+		return 0;
+	}
+	/* 
+	 * The port number is the next word after the tag.
+	 * VDOlive encodes all of these values
+	 * in 32 bit words, so in this case I am
+	 * skipping the first 2 bytes of the next
+	 * word to get to the relevant 16 bits
+	 */
+	data += 10;
+
+	/*
+	 * If we have not seen the port already,
+	 * set the masquerading tunnel up
+	 */
+	if (!priv->origport) {
+		memcpy(&priv->origport, data, 2);
+		IP_MASQ_DEBUG(1-debug, "VDOlive: found port %d\n", ntohs(priv->origport));
+
+		/* Open up a tunnel */
+		n_ms = ip_masq_new(IPPROTO_UDP,
+				   maddr, 0,
+				   ms->saddr, priv->origport,
+				   ms->daddr, 0,
+				   IP_MASQ_F_NO_DPORT);
+					
+		if (n_ms==NULL) {
+		        ip_masq_put(n_ms);
+			IP_MASQ_DEBUG(1-debug, "VDOlive: unable to build UDP tunnel for %x:%x\n", ms->saddr, priv->origport);
+			/* Leave state as unset */
+			priv->origport = 0;
+			return 0;
+		}
+		ip_masq_listen(n_ms);
+
+		ip_masq_put(ms);
+		priv->masqport = n_ms->mport;
+	} else if (memcmp(data, &(priv->origport), 2)) {
+		IP_MASQ_DEBUG(1-debug, "VDOlive: ports do not match\n");
+		/* Write the port in anyhow!!! */
+	}
+
+	/*
+	 * Write masq port into packet
+	 */
+	memcpy(data, &(priv->masqport), 2);
+	IP_MASQ_DEBUG(1-debug, "VDOlive: rewrote port %d to %d, server %08X\n", ntohs(priv->origport), ntohs(priv->masqport), ms->saddr);
+
+	/*
+	 * Set state bit to make which bit has been done
+	 */
+
+	priv->state |= (ntohl(tagval) == 6) ? 1 : 2;
+
+	return 0;
+}
+
+
+struct ip_masq_app ip_masq_vdolive = {
+        NULL,			/* next */
+	"VDOlive",	       	/* name */
+        0,                      /* type */
+        0,                      /* n_attach */
+        masq_vdolive_init_1,	/* ip_masq_init_1 */
+        masq_vdolive_done_1,	/* ip_masq_done_1 */
+        masq_vdolive_out,	/* pkt_out */
+        NULL                    /* pkt_in */
+};
+
+/*
+ * 	ip_masq_vdolive initialization
+ */
+
+__initfunc(int ip_masq_vdolive_init(void))
+{
+	int i, j;
+
+	for (i=0; (i<MAX_MASQ_APP_PORTS); i++) {
+		if (ports[i]) {
+			if ((masq_incarnations[i] = kmalloc(sizeof(struct ip_masq_app),
+							    GFP_KERNEL)) == NULL)
+				return -ENOMEM;
+			memcpy(masq_incarnations[i], &ip_masq_vdolive, sizeof(struct ip_masq_app));
+			if ((j = register_ip_masq_app(masq_incarnations[i], 
+						      IPPROTO_TCP, 
+						      ports[i]))) {
+				return j;
+			}
+			IP_MASQ_DEBUG(1-debug, "RealAudio: loaded support on port[%d] = %d\n", i, ports[i]);
+		} else {
+			/* To be safe, force the incarnation table entry to NULL */
+			masq_incarnations[i] = NULL;
+		}
+	}
+	return 0;
+}
+
+/*
+ * 	ip_masq_vdolive fin.
+ */
+
+int ip_masq_vdolive_done(void)
+{
+	int i, j, k;
+
+	k=0;
+	for (i=0; (i<MAX_MASQ_APP_PORTS); i++) {
+		if (masq_incarnations[i]) {
+			if ((j = unregister_ip_masq_app(masq_incarnations[i]))) {
+				k = j;
+			} else {
+				kfree(masq_incarnations[i]);
+				masq_incarnations[i] = NULL;
+				IP_MASQ_DEBUG(1-debug,"VDOlive: unloaded support on port[%d] = %d\n", i, ports[i]);
+			}
+		}
+	}
+	return k;
+}
+
+
+#ifdef MODULE
+EXPORT_NO_SYMBOLS;
+
+int init_module(void)
+{
+        if (ip_masq_vdolive_init() != 0)
+                return -EIO;
+        return 0;
+}
+
+void cleanup_module(void)
+{
+        if (ip_masq_vdolive_done() != 0)
+                IP_MASQ_DEBUG(1-debug, "ip_masq_vdolive: can't remove module");
+}
+
+#endif /* MODULE */
diff --git a/pfinet/linux-src/net/ipv4/ip_nat_dumb.c b/pfinet/linux-src/net/ipv4/ip_nat_dumb.c
new file mode 100644
index 00000000..5a1c6d75
--- /dev/null
+++ b/pfinet/linux-src/net/ipv4/ip_nat_dumb.c
@@ -0,0 +1,158 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		Dumb Network Address Translation.
+ *
+ * Version:	$Id: ip_nat_dumb.c,v 1.8 1999/03/21 05:22:40 davem Exp $
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Fixes:
+ *		Rani Assaf	:	A zero checksum is a special case
+ *					only in UDP
+ * 		Rani Assaf	:	Added ICMP messages rewriting
+ * 		Rani Assaf	:	Repaired wrong changes, made by ANK.
+ *
+ *
+ * NOTE:	It is just working model of real NAT.
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/icmp.h>
+#include <linux/netdevice.h>
+#include <net/sock.h>
+#include <net/ip.h>
+#include <net/icmp.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/firewall.h>
+#include <linux/ip_fw.h>
+#include <net/checksum.h>
+#include <linux/route.h>
+#include <net/route.h>
+#include <net/ip_fib.h>
+
+
+int
+ip_do_nat(struct sk_buff *skb)
+{
+	struct rtable *rt = (struct rtable*)skb->dst;
+	struct iphdr *iph = skb->nh.iph;
+	u32 odaddr = iph->daddr;
+	u32 osaddr = iph->saddr;
+	u16	check;
+
+	IPCB(skb)->flags |= IPSKB_TRANSLATED;
+
+	/* Rewrite IP header */
+	iph->daddr = rt->rt_dst_map;
+	iph->saddr = rt->rt_src_map;
+	iph->check = 0;
+	iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
+
+	/* If it is the first fragment, rewrite protocol headers */
+
+	if (!(iph->frag_off & htons(IP_OFFSET))) {
+		u16	*cksum;
+
+		switch(iph->protocol) {
+		case IPPROTO_TCP:
+			cksum  = (u16*)&((struct tcphdr*)(((char*)iph) + (iph->ihl<<2)))->check;
+			if ((u8*)(cksum+1) > skb->tail)
+				goto truncated;
+			check  = csum_tcpudp_magic(iph->saddr, iph->daddr, 0, 0, ~(*cksum));
+			*cksum = csum_tcpudp_magic(~osaddr, ~odaddr, 0, 0, ~check);
+			break;
+		case IPPROTO_UDP:
+			cksum  = (u16*)&((struct udphdr*)(((char*)iph) + (iph->ihl<<2)))->check;
+			if ((u8*)(cksum+1) > skb->tail)
+				goto truncated;
+			if ((check = *cksum) != 0) {
+				check = csum_tcpudp_magic(iph->saddr, iph->daddr, 0, 0, ~check);
+				check = csum_tcpudp_magic(~osaddr, ~odaddr, 0, 0, ~check);
+				*cksum = check ? : 0xFFFF;
+			}
+			break;
+		case IPPROTO_ICMP:
+		{
+			struct icmphdr *icmph = (struct icmphdr*)((char*)iph + (iph->ihl<<2));
+			struct   iphdr *ciph;
+			u32 idaddr, isaddr;
+			int updated;
+
+			if ((icmph->type != ICMP_DEST_UNREACH) &&
+			    (icmph->type != ICMP_TIME_EXCEEDED) &&
+			    (icmph->type != ICMP_PARAMETERPROB))
+				break;
+
+			ciph = (struct iphdr *) (icmph + 1);
+
+			if ((u8*)(ciph+1) > skb->tail)
+				goto truncated;
+
+			isaddr = ciph->saddr;
+			idaddr = ciph->daddr;
+			updated = 0;
+
+			if (rt->rt_flags&RTCF_DNAT && ciph->saddr == odaddr) {
+				ciph->saddr = iph->daddr;
+				updated = 1;
+			}
+			if (rt->rt_flags&RTCF_SNAT) {
+				if (ciph->daddr != osaddr) {
+					struct   fib_result res;
+					struct   rt_key key;
+					unsigned flags = 0;
+
+					key.src = ciph->daddr;
+					key.dst = ciph->saddr;
+					key.iif = skb->dev->ifindex;
+					key.oif = 0;
+#ifdef CONFIG_IP_ROUTE_TOS
+					key.tos = RT_TOS(ciph->tos);
+#endif
+#ifdef CONFIG_IP_ROUTE_FWMARK
+					key.fwmark = 0;
+#endif
+					/* Use fib_lookup() until we get our own
+					 * hash table of NATed hosts -- Rani
+				 	 */
+					if (fib_lookup(&key, &res) == 0 && res.r) {
+						ciph->daddr = fib_rules_policy(ciph->daddr, &res, &flags);
+						if (ciph->daddr != idaddr)
+							updated = 1;
+					}
+				} else {
+					ciph->daddr = iph->saddr;
+					updated = 1;
+				}
+			}
+			if (updated) {
+				cksum  = &icmph->checksum;
+				/* Using tcpudp primitive. Why not? */
+				check  = csum_tcpudp_magic(ciph->saddr, ciph->daddr, 0, 0, ~(*cksum));
+				*cksum = csum_tcpudp_magic(~isaddr, ~idaddr, 0, 0, ~check);
+			}
+			break;
+		}
+		default:
+			break;
+		}
+	}
+	return 0;
+
+truncated:
+	return -EINVAL;
+}
diff --git a/pfinet/linux-src/net/ipv4/ip_options.c b/pfinet/linux-src/net/ipv4/ip_options.c
new file mode 100644
index 00000000..a3d1f0aa
--- /dev/null
+++ b/pfinet/linux-src/net/ipv4/ip_options.c
@@ -0,0 +1,617 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		The options processing module for ip.c
+ *
+ * Version:	$Id: ip_options.c,v 1.16.2.1 1999/06/02 04:06:19 davem Exp $
+ *
+ * Authors:	A.N.Kuznetsov
+ *		
+ */
+
+#include <linux/types.h>
+#include <asm/uaccess.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/icmp.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <net/sock.h>
+#include <net/ip.h>
+#include <net/icmp.h>
+
+/* 
+ * Write options to IP header, record destination address to
+ * source route option, address of outgoing interface
+ * (we should already know it, so that this  function is allowed be
+ * called only after routing decision) and timestamp,
+ * if we originate this datagram.
+ *
+ * daddr is real destination address, next hop is recorded in IP header.
+ * saddr is address of outgoing interface.
+ */
+
+void ip_options_build(struct sk_buff * skb, struct ip_options * opt,
+			    u32 daddr, struct rtable *rt, int is_frag) 
+{
+	unsigned char * iph = skb->nh.raw;
+
+	memcpy(&(IPCB(skb)->opt), opt, sizeof(struct ip_options));
+	memcpy(iph+sizeof(struct iphdr), opt->__data, opt->optlen);
+	opt = &(IPCB(skb)->opt);
+	opt->is_data = 0;
+
+	if (opt->srr)
+		memcpy(iph+opt->srr+iph[opt->srr+1]-4, &daddr, 4);
+
+	if (!is_frag) {
+		if (opt->rr_needaddr)
+			ip_rt_get_source(iph+opt->rr+iph[opt->rr+2]-5, rt);
+		if (opt->ts_needaddr)
+			ip_rt_get_source(iph+opt->ts+iph[opt->ts+2]-9, rt);
+		if (opt->ts_needtime) {
+			struct timeval tv;
+			__u32 midtime;
+			do_gettimeofday(&tv);
+			midtime = htonl((tv.tv_sec % 86400) * 1000 + tv.tv_usec / 1000);
+			memcpy(iph+opt->ts+iph[opt->ts+2]-5, &midtime, 4);
+		}
+		return;
+	}
+	if (opt->rr) {
+		memset(iph+opt->rr, IPOPT_NOP, iph[opt->rr+1]);
+		opt->rr = 0;
+		opt->rr_needaddr = 0;
+	}
+	if (opt->ts) {
+		memset(iph+opt->ts, IPOPT_NOP, iph[opt->ts+1]);
+		opt->ts = 0;
+		opt->ts_needaddr = opt->ts_needtime = 0;
+	}
+}
+
+/* 
+ * Provided (sopt, skb) points to received options,
+ * build in dopt compiled option set appropriate for answering.
+ * i.e. invert SRR option, copy anothers,
+ * and grab room in RR/TS options.
+ *
+ * NOTE: dopt cannot point to skb.
+ */
+
+int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) 
+{
+	struct ip_options *sopt;
+	unsigned char *sptr, *dptr;
+	int soffset, doffset;
+	int	optlen;
+	u32	daddr;
+
+	memset(dopt, 0, sizeof(struct ip_options));
+
+	dopt->is_data = 1;
+
+	sopt = &(IPCB(skb)->opt);
+
+	if (sopt->optlen == 0) {
+		dopt->optlen = 0;
+		return 0;
+	}
+
+	sptr = skb->nh.raw;
+	dptr = dopt->__data;
+
+	if (skb->dst)
+		daddr = ((struct rtable*)skb->dst)->rt_spec_dst;
+	else
+		daddr = skb->nh.iph->daddr;
+
+	if (sopt->rr) {
+		optlen  = sptr[sopt->rr+1];
+		soffset = sptr[sopt->rr+2];
+		dopt->rr = dopt->optlen + sizeof(struct iphdr);
+		memcpy(dptr, sptr+sopt->rr, optlen);
+		if (sopt->rr_needaddr && soffset <= optlen) {
+			if (soffset + 3 > optlen)
+				return -EINVAL;
+			dptr[2] = soffset + 4;
+			dopt->rr_needaddr = 1;
+		}
+		dptr += optlen;
+		dopt->optlen += optlen;
+	}
+	if (sopt->ts) {
+		optlen = sptr[sopt->ts+1];
+		soffset = sptr[sopt->ts+2];
+		dopt->ts = dopt->optlen + sizeof(struct iphdr);
+		memcpy(dptr, sptr+sopt->ts, optlen);
+		if (soffset <= optlen) {
+			if (sopt->ts_needaddr) {
+				if (soffset + 3 > optlen)
+					return -EINVAL;
+				dopt->ts_needaddr = 1;
+				soffset += 4;
+			}
+			if (sopt->ts_needtime) {
+				if (soffset + 3 > optlen)
+					return -EINVAL;
+				if ((dptr[3]&0xF) != IPOPT_TS_PRESPEC) {
+					dopt->ts_needtime = 1;
+					soffset += 4;
+				} else {
+					dopt->ts_needtime = 0;
+
+					if (soffset + 8 <= optlen) {
+						__u32 addr;
+
+						memcpy(&addr, sptr+soffset-1, 4);
+						if (inet_addr_type(addr) != RTN_LOCAL) {
+							dopt->ts_needtime = 1;
+							soffset += 8;
+						}
+					}
+				}
+			}
+			dptr[2] = soffset;
+		}
+		dptr += optlen;
+		dopt->optlen += optlen;
+	}
+	if (sopt->srr) {
+		unsigned char * start = sptr+sopt->srr;
+		u32 faddr;
+
+		optlen  = start[1];
+		soffset = start[2];
+		doffset = 0;
+		if (soffset > optlen)
+			soffset = optlen + 1;
+		soffset -= 4;
+		if (soffset > 3) {
+			memcpy(&faddr, &start[soffset-1], 4);
+			for (soffset-=4, doffset=4; soffset > 3; soffset-=4, doffset+=4)
+				memcpy(&dptr[doffset-1], &start[soffset-1], 4);
+			/*
+			 * RFC1812 requires to fix illegal source routes.
+			 */
+			if (memcmp(&skb->nh.iph->saddr, &start[soffset+3], 4) == 0)
+				doffset -= 4;
+		}
+		if (doffset > 3) {
+			memcpy(&start[doffset-1], &daddr, 4);
+			dopt->faddr = faddr;
+			dptr[0] = start[0];
+			dptr[1] = doffset+3;
+			dptr[2] = 4;
+			dptr += doffset+3;
+			dopt->srr = dopt->optlen + sizeof(struct iphdr);
+			dopt->optlen += doffset+3;
+			dopt->is_strictroute = sopt->is_strictroute;
+		}
+	}
+	while (dopt->optlen & 3) {
+		*dptr++ = IPOPT_END;
+		dopt->optlen++;
+	}
+	return 0;
+}
+
+/*
+ *	Options "fragmenting", just fill options not
+ *	allowed in fragments with NOOPs.
+ *	Simple and stupid 8), but the most efficient way.
+ */
+
+void ip_options_fragment(struct sk_buff * skb) 
+{
+	unsigned char * optptr = skb->nh.raw;
+	struct ip_options * opt = &(IPCB(skb)->opt);
+	int  l = opt->optlen;
+	int  optlen;
+
+	while (l > 0) {
+		switch (*optptr) {
+		case IPOPT_END:
+			return;
+		case IPOPT_NOOP:
+			l--;
+			optptr++;
+			continue;
+		}
+		optlen = optptr[1];
+		if (optlen<2 || optlen>l)
+		  return;
+		if (!IPOPT_COPIED(*optptr))
+			memset(optptr, IPOPT_NOOP, optlen);
+		l -= optlen;
+		optptr += optlen;
+	}
+	opt->ts = 0;
+	opt->rr = 0;
+	opt->rr_needaddr = 0;
+	opt->ts_needaddr = 0;
+	opt->ts_needtime = 0;
+	return;
+}
+
+/*
+ * Verify options and fill pointers in struct options.
+ * Caller should clear *opt, and set opt->data.
+ * If opt == NULL, then skb->data should point to IP header.
+ */
+
+int ip_options_compile(struct ip_options * opt, struct sk_buff * skb)
+{
+	int l;
+	unsigned char * iph;
+	unsigned char * optptr;
+	int optlen;
+	unsigned char * pp_ptr = NULL;
+	struct rtable *rt = skb ? (struct rtable*)skb->dst : NULL;
+
+	if (!opt) {
+		opt = &(IPCB(skb)->opt);
+		memset(opt, 0, sizeof(struct ip_options));
+		iph = skb->nh.raw;
+		opt->optlen = ((struct iphdr *)iph)->ihl*4 - sizeof(struct iphdr);
+		optptr = iph + sizeof(struct iphdr);
+		opt->is_data = 0;
+	} else {
+		optptr = opt->is_data ? opt->__data : (unsigned char*)&(skb->nh.iph[1]);
+		iph = optptr - sizeof(struct iphdr);
+	}
+
+	for (l = opt->optlen; l > 0; ) {
+		switch (*optptr) {
+		      case IPOPT_END:
+			for (optptr++, l--; l>0; l--) {
+				if (*optptr != IPOPT_END) {
+					*optptr = IPOPT_END;
+					opt->is_changed = 1;
+				}
+			}
+			goto eol;
+		      case IPOPT_NOOP:
+			l--;
+			optptr++;
+			continue;
+		}
+		optlen = optptr[1];
+		if (optlen<2 || optlen>l) {
+			pp_ptr = optptr;
+			goto error;
+		}
+		switch (*optptr) {
+		      case IPOPT_SSRR:
+		      case IPOPT_LSRR:
+			if (optlen < 3) {
+				pp_ptr = optptr + 1;
+				goto error;
+			}
+			if (optptr[2] < 4) {
+				pp_ptr = optptr + 2;
+				goto error;
+			}
+			/* NB: cf RFC-1812 5.2.4.1 */
+			if (opt->srr) {
+				pp_ptr = optptr;
+				goto error;
+			}
+			if (!skb) {
+				if (optptr[2] != 4 || optlen < 7 || ((optlen-3) & 3)) {
+					pp_ptr = optptr + 1;
+					goto error;
+				}
+				memcpy(&opt->faddr, &optptr[3], 4);
+				if (optlen > 7)
+					memmove(&optptr[3], &optptr[7], optlen-7);
+			}
+			opt->is_strictroute = (optptr[0] == IPOPT_SSRR);
+			opt->srr = optptr - iph;
+			break;
+		      case IPOPT_RR:
+			if (opt->rr) {
+				pp_ptr = optptr;
+				goto error;
+			}
+			if (optlen < 3) {
+				pp_ptr = optptr + 1;
+				goto error;
+			}
+			if (optptr[2] < 4) {
+				pp_ptr = optptr + 2;
+				goto error;
+			}
+			if (optptr[2] <= optlen) {
+				if (optptr[2]+3 > optlen) {
+					pp_ptr = optptr + 2;
+					goto error;
+				}
+				if (skb) {
+					memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4);
+					opt->is_changed = 1;
+				}
+				optptr[2] += 4;
+				opt->rr_needaddr = 1;
+			}
+			opt->rr = optptr - iph;
+			break;
+		      case IPOPT_TIMESTAMP:
+			if (opt->ts) {
+				pp_ptr = optptr;
+				goto error;
+			}
+			if (optlen < 4) {
+				pp_ptr = optptr + 1;
+				goto error;
+			}
+			if (optptr[2] < 5) {
+				pp_ptr = optptr + 2;
+				goto error;
+			}
+			if (optptr[2] <= optlen) {
+				__u32 * timeptr = NULL;
+				if (optptr[2]+3 > optptr[1]) {
+					pp_ptr = optptr + 2;
+					goto error;
+				}
+				switch (optptr[3]&0xF) {
+				      case IPOPT_TS_TSONLY:
+					opt->ts = optptr - iph;
+					if (skb) 
+						timeptr = (__u32*)&optptr[optptr[2]-1];
+					opt->ts_needtime = 1;
+					optptr[2] += 4;
+					break;
+				      case IPOPT_TS_TSANDADDR:
+					if (optptr[2]+7 > optptr[1]) {
+						pp_ptr = optptr + 2;
+						goto error;
+					}
+					opt->ts = optptr - iph;
+					if (skb) {
+						memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4);
+						timeptr = (__u32*)&optptr[optptr[2]+3];
+					}
+					opt->ts_needaddr = 1;
+					opt->ts_needtime = 1;
+					optptr[2] += 8;
+					break;
+				      case IPOPT_TS_PRESPEC:
+					if (optptr[2]+7 > optptr[1]) {
+						pp_ptr = optptr + 2;
+						goto error;
+					}
+					opt->ts = optptr - iph;
+					{
+						u32 addr;
+						memcpy(&addr, &optptr[optptr[2]-1], 4);
+						if (inet_addr_type(addr) == RTN_UNICAST)
+							break;
+						if (skb)
+							timeptr = (__u32*)&optptr[optptr[2]+3];
+					}
+					opt->ts_needtime = 1;
+					optptr[2] += 8;
+					break;
+				      default:
+					if (!skb && !capable(CAP_NET_RAW)) {
+						pp_ptr = optptr + 3;
+						goto error;
+					}
+					break;
+				}
+				if (timeptr) {
+					struct timeval tv;
+					__u32  midtime;
+					do_gettimeofday(&tv);
+					midtime = htonl((tv.tv_sec % 86400) * 1000 + tv.tv_usec / 1000);
+					memcpy(timeptr, &midtime, sizeof(__u32));
+					opt->is_changed = 1;
+				}
+			} else {
+				unsigned overflow = optptr[3]>>4;
+				if (overflow == 15) {
+					pp_ptr = optptr + 3;
+					goto error;
+				}
+				opt->ts = optptr - iph;
+				if (skb) {
+					optptr[3] = (optptr[3]&0xF)|((overflow+1)<<4);
+					opt->is_changed = 1;
+				}
+			}
+			break;
+		      case IPOPT_RA:
+			if (optlen < 4) {
+				pp_ptr = optptr + 1;
+				goto error;
+			}
+			if (optptr[2] == 0 && optptr[3] == 0)
+				opt->router_alert = optptr - iph;
+			break;
+		      case IPOPT_SEC:
+		      case IPOPT_SID:
+		      default:
+			if (!skb && !capable(CAP_NET_RAW)) {
+				pp_ptr = optptr;
+				goto error;
+			}
+			break;
+		}
+		l -= optlen;
+		optptr += optlen;
+	}
+
+eol:
+	if (!pp_ptr)
+		return 0;
+
+error:
+	if (skb) {
+		icmp_send(skb, ICMP_PARAMETERPROB, 0, htonl((pp_ptr-iph)<<24));
+	}
+	return -EINVAL;
+}
+
+
+/*
+ *	Undo all the changes done by ip_options_compile().
+ */
+
+void ip_options_undo(struct ip_options * opt)
+{
+	if (opt->srr) {
+		unsigned  char * optptr = opt->__data+opt->srr-sizeof(struct  iphdr);
+		memmove(optptr+7, optptr+3, optptr[1]-7);
+		memcpy(optptr+3, &opt->faddr, 4);
+	}
+	if (opt->rr_needaddr) {
+		unsigned  char * optptr = opt->__data+opt->rr-sizeof(struct  iphdr);
+		optptr[2] -= 4;
+		memset(&optptr[optptr[2]-1], 0, 4);
+	}
+	if (opt->ts) {
+		unsigned  char * optptr = opt->__data+opt->ts-sizeof(struct  iphdr);
+		if (opt->ts_needtime) {
+			optptr[2] -= 4;
+			memset(&optptr[optptr[2]-1], 0, 4);
+			if ((optptr[3]&0xF) == IPOPT_TS_PRESPEC)
+				optptr[2] -= 4;
+		}
+		if (opt->ts_needaddr) {
+			optptr[2] -= 4;
+			memset(&optptr[optptr[2]-1], 0, 4);
+		}
+	}
+}
+
+int ip_options_get(struct ip_options **optp, unsigned char *data, int optlen, int user)
+{
+	struct ip_options *opt;
+
+	opt = kmalloc(sizeof(struct ip_options)+((optlen+3)&~3), GFP_KERNEL);
+	if (!opt)
+		return -ENOMEM;
+	memset(opt, 0, sizeof(struct ip_options));
+	if (optlen) {
+		if (user) {
+			if (copy_from_user(opt->__data, data, optlen))
+				return -EFAULT;
+		} else
+			memcpy(opt->__data, data, optlen);
+	}
+	while (optlen & 3)
+		opt->__data[optlen++] = IPOPT_END;
+	opt->optlen = optlen;
+	opt->is_data = 1;
+	opt->is_setbyuser = 1;
+	if (optlen && ip_options_compile(opt, NULL)) {
+		kfree_s(opt, sizeof(struct ip_options) + optlen);
+		return -EINVAL;
+	}
+	*optp = opt;
+	return 0;
+}
+
+void ip_forward_options(struct sk_buff *skb)
+{
+	struct   ip_options * opt	= &(IPCB(skb)->opt);
+	unsigned char * optptr;
+	struct rtable *rt = (struct rtable*)skb->dst;
+	unsigned char *raw = skb->nh.raw;
+
+	if (opt->rr_needaddr) {
+		optptr = (unsigned char *)raw + opt->rr;
+		ip_rt_get_source(&optptr[optptr[2]-5], rt);
+		opt->is_changed = 1;
+	}
+	if (opt->srr_is_hit) {
+		int srrptr, srrspace;
+
+		optptr = raw + opt->srr;
+
+		for ( srrptr=optptr[2], srrspace = optptr[1];
+		     srrptr <= srrspace;
+		     srrptr += 4
+		     ) {
+			if (srrptr + 3 > srrspace)
+				break;
+			if (memcmp(&rt->rt_dst, &optptr[srrptr-1], 4) == 0)
+				break;
+		}
+		if (srrptr + 3 <= srrspace) {
+			opt->is_changed = 1;
+			ip_rt_get_source(&optptr[srrptr-1], rt);
+			skb->nh.iph->daddr = rt->rt_dst;
+			optptr[2] = srrptr+4;
+		} else
+			printk(KERN_CRIT "ip_forward(): Argh! Destination lost!\n");
+		if (opt->ts_needaddr) {
+			optptr = raw + opt->ts;
+			ip_rt_get_source(&optptr[optptr[2]-9], rt);
+			opt->is_changed = 1;
+		}
+	}
+	if (opt->is_changed) {
+		opt->is_changed = 0;
+		ip_send_check(skb->nh.iph);
+	}
+}
+
+int ip_options_rcv_srr(struct sk_buff *skb)
+{
+	struct ip_options *opt = &(IPCB(skb)->opt);
+	int srrspace, srrptr;
+	u32 nexthop;
+	struct iphdr *iph = skb->nh.iph;
+	unsigned char * optptr = skb->nh.raw + opt->srr;
+	struct rtable *rt = (struct rtable*)skb->dst;
+	struct rtable *rt2;
+	int err;
+
+	if (!opt->srr)
+		return 0;
+
+	if (skb->pkt_type != PACKET_HOST)
+		return -EINVAL;
+	if (rt->rt_type == RTN_UNICAST) {
+		if (!opt->is_strictroute)
+			return 0;
+		icmp_send(skb, ICMP_PARAMETERPROB, 0, htonl(16<<24));
+		return -EINVAL;
+	}
+	if (rt->rt_type != RTN_LOCAL)
+		return -EINVAL;
+
+	for (srrptr=optptr[2], srrspace = optptr[1]; srrptr <= srrspace; srrptr += 4) {
+		if (srrptr + 3 > srrspace) {
+			icmp_send(skb, ICMP_PARAMETERPROB, 0, htonl((opt->srr+2)<<24));
+			return -EINVAL;
+		}
+		memcpy(&nexthop, &optptr[srrptr-1], 4);
+
+		rt = (struct rtable*)skb->dst;
+		skb->dst = NULL;
+		err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, skb->dev);
+		rt2 = (struct rtable*)skb->dst;
+		if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) {
+			ip_rt_put(rt2);
+			skb->dst = &rt->u.dst;
+			return -EINVAL;
+		}
+		ip_rt_put(rt);
+		if (rt2->rt_type != RTN_LOCAL)
+			break;
+		/* Superfast 8) loopback forward */
+		memcpy(&iph->daddr, &optptr[srrptr-1], 4);
+		opt->is_changed = 1;
+	}
+	if (srrptr <= srrspace) {
+		opt->srr_is_hit = 1;
+		opt->is_changed = 1;
+	}
+	return 0;
+}
diff --git a/pfinet/linux-src/net/ipv4/ip_output.c b/pfinet/linux-src/net/ipv4/ip_output.c
new file mode 100644
index 00000000..44d63557
--- /dev/null
+++ b/pfinet/linux-src/net/ipv4/ip_output.c
@@ -0,0 +1,992 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		The Internet Protocol (IP) output module.
+ *
+ * Version:	$Id: ip_output.c,v 1.67 1999/03/25 00:43:00 davem Exp $
+ *
+ * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
+ *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *		Donald Becker, <becker@super.org>
+ *		Alan Cox, <Alan.Cox@linux.org>
+ *		Richard Underwood
+ *		Stefan Becker, <stefanb@yello.ping.de>
+ *		Jorge Cwik, <jorge@laser.satlink.net>
+ *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
+ *
+ *	See ip_input.c for original log
+ *
+ *	Fixes:
+ *		Alan Cox	:	Missing nonblock feature in ip_build_xmit.
+ *		Mike Kilburn	:	htons() missing in ip_build_xmit.
+ *		Bradford Johnson:	Fix faulty handling of some frames when 
+ *					no route is found.
+ *		Alexander Demenshin:	Missing sk/skb free in ip_queue_xmit
+ *					(in case if packet not accepted by
+ *					output firewall rules)
+ *		Mike McLagan	:	Routing by source
+ *		Alexey Kuznetsov:	use new route cache
+ *		Andi Kleen:		Fix broken PMTU recovery and remove
+ *					some redundant tests.
+ *	Vitaly E. Lavrov	:	Transparent proxy revived after year coma.
+ *		Andi Kleen	: 	Replace ip_reply with ip_send_reply.
+ *		Andi Kleen	:	Split fast and slow ip_build_xmit path 
+ *					for decreased register pressure on x86 
+ *					and more readibility. 
+ *		Marc Boucher	:	When call_out_firewall returns FW_QUEUE,
+ *					silently drop skb instead of failing with -EPERM.
+ */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/config.h>
+
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/proc_fs.h>
+#include <linux/stat.h>
+#include <linux/init.h>
+
+#include <net/snmp.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/route.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/arp.h>
+#include <net/icmp.h>
+#include <net/raw.h>
+#include <net/checksum.h>
+#include <linux/igmp.h>
+#include <linux/ip_fw.h>
+#include <linux/firewall.h>
+#include <linux/mroute.h>
+#include <linux/netlink.h>
+
+/*
+ *      Shall we try to damage output packets if routing dev changes?
+ */
+
+int sysctl_ip_dynaddr = 0;
+
+
+int ip_id_count = 0;
+
+/* Generate a checksum for an outgoing IP datagram. */
+__inline__ void ip_send_check(struct iphdr *iph)
+{
+	iph->check = 0;
+	iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
+}
+
+/* 
+ *		Add an ip header to a skbuff and send it out.
+ */
+void ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
+			   u32 saddr, u32 daddr, struct ip_options *opt)
+{
+	struct rtable *rt = (struct rtable *)skb->dst;
+	struct iphdr *iph;
+	struct device *dev;
+	
+	/* Build the IP header. */
+	if (opt)
+		iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr) + opt->optlen);
+	else
+		iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr));
+
+	iph->version  = 4;
+	iph->ihl      = 5;
+	iph->tos      = sk->ip_tos;
+	iph->frag_off = 0;
+	if (ip_dont_fragment(sk, &rt->u.dst))
+		iph->frag_off |= htons(IP_DF);
+	iph->ttl      = sk->ip_ttl;
+	iph->daddr    = rt->rt_dst;
+	iph->saddr    = rt->rt_src;
+	iph->protocol = sk->protocol;
+	iph->tot_len  = htons(skb->len);
+	iph->id       = htons(ip_id_count++);
+	skb->nh.iph   = iph;
+
+	if (opt && opt->optlen) {
+		iph->ihl += opt->optlen>>2;
+		ip_options_build(skb, opt, daddr, rt, 0);
+	}
+
+	dev = rt->u.dst.dev;
+
+#ifdef CONFIG_FIREWALL
+	/* Now we have no better mechanism to notify about error. */
+	switch (call_out_firewall(PF_INET, dev, iph, NULL, &skb)) {
+	case FW_REJECT:
+		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
+		/* Fall thru... */
+	case FW_BLOCK:
+	case FW_QUEUE:
+		kfree_skb(skb);
+		return;
+	}
+#endif
+
+	ip_send_check(iph);
+
+	/* Send it out. */
+	skb->dst->output(skb);
+	return;
+}
+
+int __ip_finish_output(struct sk_buff *skb)
+{
+	return ip_finish_output(skb);
+}
+
+int ip_mc_output(struct sk_buff *skb)
+{
+	struct sock *sk = skb->sk;
+	struct rtable *rt = (struct rtable*)skb->dst;
+	struct device *dev = rt->u.dst.dev;
+
+	/*
+	 *	If the indicated interface is up and running, send the packet.
+	 */
+	 
+	ip_statistics.IpOutRequests++;
+#ifdef CONFIG_IP_ROUTE_NAT
+	if (rt->rt_flags & RTCF_NAT)
+		ip_do_nat(skb);
+#endif
+
+	skb->dev = dev;
+	skb->protocol = __constant_htons(ETH_P_IP);
+
+	/*
+	 *	Multicasts are looped back for other local users
+	 */
+
+	if (rt->rt_flags&RTCF_MULTICAST && (!sk || sk->ip_mc_loop)) {
+#ifdef CONFIG_IP_MROUTE
+		/* Small optimization: do not loopback not local frames,
+		   which returned after forwarding; they will be  dropped
+		   by ip_mr_input in any case.
+		   Note, that local frames are looped back to be delivered
+		   to local recipients.
+
+		   This check is duplicated in ip_mr_input at the moment.
+		 */
+		if ((rt->rt_flags&RTCF_LOCAL) || !(IPCB(skb)->flags&IPSKB_FORWARDED))
+#endif
+		dev_loopback_xmit(skb);
+
+		/* Multicasts with ttl 0 must not go beyond the host */
+
+		if (skb->nh.iph->ttl == 0) {
+			kfree_skb(skb);
+			return 0;
+		}
+	}
+
+	if (rt->rt_flags&RTCF_BROADCAST)
+		dev_loopback_xmit(skb);
+
+	return ip_finish_output(skb);
+}
+
+int ip_output(struct sk_buff *skb)
+{
+#ifdef CONFIG_IP_ROUTE_NAT
+	struct rtable *rt = (struct rtable*)skb->dst;
+#endif
+
+	ip_statistics.IpOutRequests++;
+
+#ifdef CONFIG_IP_ROUTE_NAT
+	if (rt->rt_flags&RTCF_NAT)
+		ip_do_nat(skb);
+#endif
+
+	return ip_finish_output(skb);
+}
+
+/* Queues a packet to be sent, and starts the transmitter if necessary.  
+ * This routine also needs to put in the total length and compute the 
+ * checksum.  We use to do this in two stages, ip_build_header() then
+ * this, but that scheme created a mess when routes disappeared etc.
+ * So we do it all here, and the TCP send engine has been changed to
+ * match. (No more unroutable FIN disasters, etc. wheee...)  This will
+ * most likely make other reliable transport layers above IP easier
+ * to implement under Linux.
+ */
+void ip_queue_xmit(struct sk_buff *skb)
+{
+	struct sock *sk = skb->sk;
+	struct ip_options *opt = sk->opt;
+	struct rtable *rt;
+	struct device *dev;
+	struct iphdr *iph;
+	unsigned int tot_len;
+
+	/* Make sure we can route this packet. */
+	rt = (struct rtable *) sk->dst_cache;
+	if(rt == NULL || rt->u.dst.obsolete) {
+		u32 daddr;
+
+		sk->dst_cache = NULL;
+		ip_rt_put(rt);
+
+		/* Use correct destination address if we have options. */
+		daddr = sk->daddr;
+		if(opt && opt->srr)
+			daddr = opt->faddr;
+
+		/* If this fails, retransmit mechanism of transport layer will
+		 * keep trying until route appears or the connection times itself
+		 * out.
+		 */
+		if(ip_route_output(&rt, daddr, sk->saddr,
+				   RT_TOS(sk->ip_tos) | RTO_CONN | sk->localroute,
+				   sk->bound_dev_if))
+			goto drop;
+		sk->dst_cache = &rt->u.dst;
+	}
+	if(opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
+		goto no_route;
+
+	/* We have a route, so grab a reference. */
+	skb->dst = dst_clone(sk->dst_cache);
+
+	/* OK, we know where to send it, allocate and build IP header. */
+	iph = (struct iphdr *) skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
+	iph->version  = 4;
+	iph->ihl      = 5;
+	iph->tos      = sk->ip_tos;
+	iph->frag_off = 0;
+	iph->ttl      = sk->ip_ttl;
+	iph->daddr    = rt->rt_dst;
+	iph->saddr    = rt->rt_src;
+	iph->protocol = sk->protocol;
+	skb->nh.iph   = iph;
+	/* Transport layer set skb->h.foo itself. */
+
+	if(opt && opt->optlen) {
+		iph->ihl += opt->optlen >> 2;
+		ip_options_build(skb, opt, sk->daddr, rt, 0);
+	}
+
+	tot_len = skb->len;
+	iph->tot_len = htons(tot_len);
+	iph->id = htons(ip_id_count++);
+
+	dev = rt->u.dst.dev;
+
+#ifdef CONFIG_FIREWALL
+	/* Now we have no better mechanism to notify about error. */
+	switch (call_out_firewall(PF_INET, dev, iph, NULL, &skb)) {
+	case FW_REJECT:
+		start_bh_atomic();
+		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
+		end_bh_atomic();
+		/* Fall thru... */
+	case FW_BLOCK:
+	case FW_QUEUE:
+ 		goto drop;
+	}
+#endif
+
+	/* This can happen when the transport layer has segments queued
+	 * with a cached route, and by the time we get here things are
+	 * re-routed to a device with a different MTU than the original
+	 * device.  Sick, but we must cover it.
+	 */
+	if (skb_headroom(skb) < dev->hard_header_len && dev->hard_header) {
+		struct sk_buff *skb2;
+
+		skb2 = skb_realloc_headroom(skb, (dev->hard_header_len + 15) & ~15);
+		kfree_skb(skb);
+		if (skb2 == NULL)
+			return;
+		if (sk)
+			skb_set_owner_w(skb, sk);
+		skb = skb2;
+		iph = skb->nh.iph;
+	}
+
+	/* Do we need to fragment.  Again this is inefficient.  We
+	 * need to somehow lock the original buffer and use bits of it.
+	 */
+	if (tot_len > rt->u.dst.pmtu)
+		goto fragment;
+
+	if (ip_dont_fragment(sk, &rt->u.dst))
+		iph->frag_off |= __constant_htons(IP_DF);
+
+	/* Add an IP checksum. */
+	ip_send_check(iph);
+
+	skb->priority = sk->priority;
+	skb->dst->output(skb);
+	return;
+
+fragment:
+	if (ip_dont_fragment(sk, &rt->u.dst) &&
+	    tot_len > (iph->ihl<<2) + sizeof(struct tcphdr)+16) {
+		/* Reject packet ONLY if TCP might fragment
+		   it itself, if were careful enough.
+		   Test is not precise (f.e. it does not take sacks
+		   into account). Actually, tcp should make it. --ANK (980801)
+		 */
+		iph->frag_off |= __constant_htons(IP_DF);
+		NETDEBUG(printk(KERN_DEBUG "sending pkt_too_big to self\n"));
+
+		/* icmp_send is not reenterable, so that bh_atomic... --ANK */
+		start_bh_atomic();
+		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+			  htonl(rt->u.dst.pmtu));
+		end_bh_atomic();
+		goto drop;
+	}
+	ip_fragment(skb, skb->dst->output);
+	return;
+
+no_route:
+	sk->dst_cache = NULL;
+	ip_rt_put(rt);
+	ip_statistics.IpOutNoRoutes++;
+	/* Fall through... */
+drop:
+	kfree_skb(skb);
+}
+
+/*
+ *	Build and send a packet, with as little as one copy
+ *
+ *	Doesn't care much about ip options... option length can be
+ *	different for fragment at 0 and other fragments.
+ *
+ *	Note that the fragment at the highest offset is sent first,
+ *	so the getfrag routine can fill in the TCP/UDP checksum header
+ *	field in the last fragment it sends... actually it also helps
+ * 	the reassemblers, they can put most packets in at the head of
+ *	the fragment queue, and they know the total size in advance. This
+ *	last feature will measurably improve the Linux fragment handler one
+ *	day.
+ *
+ *	The callback has five args, an arbitrary pointer (copy of frag),
+ *	the source IP address (may depend on the routing table), the 
+ *	destination address (char *), the offset to copy from, and the
+ *	length to be copied.
+ */
+
+int ip_build_xmit_slow(struct sock *sk,
+		  int getfrag (const void *,
+			       char *,
+			       unsigned int,	
+			       unsigned int),
+		  const void *frag,
+		  unsigned length,
+		  struct ipcm_cookie *ipc,
+		  struct rtable *rt,
+		  int flags)
+{
+	unsigned int fraglen, maxfraglen, fragheaderlen;
+	int err;
+	int offset, mf;
+	int mtu;
+	unsigned short id;
+
+	int hh_len = (rt->u.dst.dev->hard_header_len + 15)&~15;
+	int nfrags=0;
+	struct ip_options *opt = ipc->opt;
+	int df = 0;
+
+	mtu = rt->u.dst.pmtu;
+	if (ip_dont_fragment(sk, &rt->u.dst))
+		df = htons(IP_DF);
+  
+	length -= sizeof(struct iphdr);
+
+	if (opt) {
+		fragheaderlen = sizeof(struct iphdr) + opt->optlen;
+		maxfraglen = ((mtu-sizeof(struct iphdr)-opt->optlen) & ~7) + fragheaderlen;
+	} else {
+		fragheaderlen = sizeof(struct iphdr);
+		
+		/*
+		 *	Fragheaderlen is the size of 'overhead' on each buffer. Now work
+		 *	out the size of the frames to send.
+		 */
+	 
+		maxfraglen = ((mtu-sizeof(struct iphdr)) & ~7) + fragheaderlen;
+	}
+
+	if (length + fragheaderlen > 0xFFFF) {
+		ip_local_error(sk, EMSGSIZE, rt->rt_dst, sk->dport, mtu);
+		return -EMSGSIZE;
+	}
+
+	/*
+	 *	Start at the end of the frame by handling the remainder.
+	 */
+	 
+	offset = length - (length % (maxfraglen - fragheaderlen));
+	
+	/*
+	 *	Amount of memory to allocate for final fragment.
+	 */
+	 
+	fraglen = length - offset + fragheaderlen;
+	
+	if (length-offset==0) {
+		fraglen = maxfraglen;
+		offset -= maxfraglen-fragheaderlen;
+	}
+	
+	
+	/*
+	 *	The last fragment will not have MF (more fragments) set.
+	 */
+	 
+	mf = 0;
+
+	/*
+	 *	Don't fragment packets for path mtu discovery.
+	 */
+	 
+	if (offset > 0 && df) { 
+		ip_local_error(sk, EMSGSIZE, rt->rt_dst, sk->dport, mtu);
+ 		return(-EMSGSIZE);
+	}
+
+	/*
+	 *	Lock the device lists.
+	 */
+
+	dev_lock_list();
+	
+	/*
+	 *	Get an identifier
+	 */
+	 
+	id = htons(ip_id_count++);
+
+	/*
+	 *	Begin outputting the bytes.
+	 */
+	 
+	do {
+		char *data;
+		struct sk_buff * skb;
+
+		/*
+		 *	Get the memory we require with some space left for alignment.
+		 */
+
+		skb = sock_alloc_send_skb(sk, fraglen+hh_len+15, 0, flags&MSG_DONTWAIT, &err);
+		if (skb == NULL)
+			goto error;
+
+		/*
+		 *	Fill in the control structures
+		 */
+		 
+		skb->priority = sk->priority;
+		skb->dst = dst_clone(&rt->u.dst);
+		skb_reserve(skb, hh_len);
+
+		/*
+		 *	Find where to start putting bytes.
+		 */
+		 
+		data = skb_put(skb, fraglen);
+		skb->nh.iph = (struct iphdr *)data;
+
+		/*
+		 *	Only write IP header onto non-raw packets 
+		 */
+		 
+		{
+			struct iphdr *iph = (struct iphdr *)data;
+
+			iph->version = 4;
+			iph->ihl = 5;
+			if (opt) {
+				iph->ihl += opt->optlen>>2;
+				ip_options_build(skb, opt,
+						 ipc->addr, rt, offset);
+			}
+			iph->tos = sk->ip_tos;
+			iph->tot_len = htons(fraglen - fragheaderlen + iph->ihl*4);
+			iph->id = id;
+			iph->frag_off = htons(offset>>3);
+			iph->frag_off |= mf|df;
+			if (rt->rt_type == RTN_MULTICAST)
+				iph->ttl = sk->ip_mc_ttl;
+			else
+				iph->ttl = sk->ip_ttl;
+			iph->protocol = sk->protocol;
+			iph->check = 0;
+			iph->saddr = rt->rt_src;
+			iph->daddr = rt->rt_dst;
+			iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
+			data += iph->ihl*4;
+			
+			/*
+			 *	Any further fragments will have MF set.
+			 */
+			 
+			mf = htons(IP_MF);
+		}
+		
+		/*
+		 *	User data callback
+		 */
+
+		if (getfrag(frag, data, offset, fraglen-fragheaderlen)) {
+			err = -EFAULT;
+			kfree_skb(skb);
+			goto error;
+		}
+
+		offset -= (maxfraglen-fragheaderlen);
+		fraglen = maxfraglen;
+
+		nfrags++;
+
+#ifdef CONFIG_FIREWALL
+		switch (call_out_firewall(PF_INET, rt->u.dst.dev, skb->nh.iph, NULL, &skb)) {
+		case FW_QUEUE:
+			kfree_skb(skb);
+			continue;
+		case FW_BLOCK:
+		case FW_REJECT:
+			kfree_skb(skb);
+			err = -EPERM;
+			goto error;
+		}
+#endif
+
+		err = -ENETDOWN;
+		if (rt->u.dst.output(skb))
+			goto error;
+	} while (offset >= 0);
+
+	if (nfrags>1)
+		ip_statistics.IpFragCreates += nfrags;
+	dev_unlock_list();
+	return 0;
+
+error:
+	ip_statistics.IpOutDiscards++;
+	if (nfrags>1)
+		ip_statistics.IpFragCreates += nfrags;
+	dev_unlock_list();
+	return err; 
+}
+
+
+/*
+ *	Fast path for unfragmented packets.
+ */
+int ip_build_xmit(struct sock *sk, 
+		  int getfrag (const void *,
+			       char *,
+			       unsigned int,	
+			       unsigned int),
+		  const void *frag,
+		  unsigned length,
+		  struct ipcm_cookie *ipc,
+		  struct rtable *rt,
+		  int flags)
+{
+	int err;
+	struct sk_buff *skb;
+	int df;
+	struct iphdr *iph;
+
+	/*
+	 *	Try the simple case first. This leaves fragmented frames, and by
+	 *	choice RAW frames within 20 bytes of maximum size(rare) to the long path
+	 */
+
+	if (!sk->ip_hdrincl) {
+		length += sizeof(struct iphdr);
+
+		/*
+		 * 	Check for slow path.
+		 */
+		if (length > rt->u.dst.pmtu || ipc->opt != NULL)  
+			return ip_build_xmit_slow(sk,getfrag,frag,length,ipc,rt,flags); 
+	} else {
+		if (length > rt->u.dst.dev->mtu) {
+			ip_local_error(sk, EMSGSIZE, rt->rt_dst, sk->dport, rt->u.dst.dev->mtu);
+			return -EMSGSIZE;
+		}
+	}
+
+	/*
+	 *	Do path mtu discovery if needed.
+	 */
+	df = 0;
+	if (ip_dont_fragment(sk, &rt->u.dst))
+		df = htons(IP_DF);
+
+	/* 
+	 *	Fast path for unfragmented frames without options. 
+	 */ 
+	{
+	int hh_len = (rt->u.dst.dev->hard_header_len + 15)&~15;
+
+	skb = sock_alloc_send_skb(sk, length+hh_len+15,
+				  0, flags&MSG_DONTWAIT, &err);
+	if(skb==NULL)
+		goto error; 
+	skb_reserve(skb, hh_len);
+	}
+	
+	skb->priority = sk->priority;
+	skb->dst = dst_clone(&rt->u.dst);
+
+	skb->nh.iph = iph = (struct iphdr *)skb_put(skb, length);
+	
+	dev_lock_list();
+	
+	if(!sk->ip_hdrincl) {
+		iph->version=4;
+		iph->ihl=5;
+		iph->tos=sk->ip_tos;
+		iph->tot_len = htons(length);
+		iph->id=htons(ip_id_count++);
+		iph->frag_off = df;
+		iph->ttl=sk->ip_mc_ttl;
+		if (rt->rt_type != RTN_MULTICAST)
+			iph->ttl=sk->ip_ttl;
+		iph->protocol=sk->protocol;
+		iph->saddr=rt->rt_src;
+		iph->daddr=rt->rt_dst;
+		iph->check=0;
+		iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
+		err = getfrag(frag, ((char *)iph)+iph->ihl*4,0, length-iph->ihl*4);
+	}
+	else
+		err = getfrag(frag, (void *)iph, 0, length);
+
+	dev_unlock_list();
+
+	if (err)
+		goto error_fault;
+
+#ifdef CONFIG_FIREWALL
+	switch (call_out_firewall(PF_INET, rt->u.dst.dev, iph, NULL, &skb)) {
+	case FW_QUEUE:
+		kfree_skb(skb);
+		return 0;
+	case FW_BLOCK:
+	case FW_REJECT:
+		kfree_skb(skb);
+		err = -EPERM;
+		goto error;
+	}
+#endif
+
+	return rt->u.dst.output(skb);
+
+error_fault:
+	err = -EFAULT;
+	kfree_skb(skb);
+error:
+	ip_statistics.IpOutDiscards++;
+	return err; 
+}
+		       
+
+
+/*
+ *	This IP datagram is too large to be sent in one piece.  Break it up into
+ *	smaller pieces (each of size equal to IP header plus
+ *	a block of the data of the original IP data part) that will yet fit in a
+ *	single device frame, and queue such a frame for sending.
+ *
+ *	Yes this is inefficient, feel free to submit a quicker one.
+ */
+
+void ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
+{
+	struct iphdr *iph;
+	unsigned char *raw;
+	unsigned char *ptr;
+	struct device *dev;
+	struct sk_buff *skb2;
+	unsigned int mtu, hlen, left, len; 
+	int offset;
+	int not_last_frag;
+	struct rtable *rt = (struct rtable*)skb->dst;
+
+	dev = rt->u.dst.dev;
+
+	/*
+	 *	Point into the IP datagram header.
+	 */
+
+	raw = skb->nh.raw;
+	iph = (struct iphdr*)raw;
+
+	/*
+	 *	Setup starting values.
+	 */
+
+	hlen = iph->ihl * 4;
+	left = ntohs(iph->tot_len) - hlen;	/* Space per frame */
+	mtu = rt->u.dst.pmtu - hlen;	/* Size of data space */
+	ptr = raw + hlen;			/* Where to start from */
+
+	/*
+	 *	The protocol doesn't seem to say what to do in the case that the
+	 *	frame + options doesn't fit the mtu. As it used to fall down dead
+	 *	in this case we were fortunate it didn't happen
+	 *
+	 *	It is impossible, because mtu>=68. --ANK (980801)
+	 */
+
+#ifdef CONFIG_NET_PARANOIA
+	if (mtu<8) 
+		goto fail;
+#endif
+
+	/*
+	 *	Fragment the datagram.
+	 */
+
+	offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3;
+	not_last_frag = iph->frag_off & htons(IP_MF);
+
+	/*
+	 *	Keep copying data until we run out.
+	 */
+
+	while(left > 0)	{
+		len = left;
+		/* IF: it doesn't fit, use 'mtu' - the data space left */
+		if (len > mtu)
+			len = mtu;
+		/* IF: we are not sending upto and including the packet end
+		   then align the next start on an eight byte boundary */
+		if (len < left)	{
+			len &= ~7;
+		}
+		/*
+		 *	Allocate buffer.
+		 */
+
+		if ((skb2 = alloc_skb(len+hlen+dev->hard_header_len+15,GFP_ATOMIC)) == NULL) {
+			NETDEBUG(printk(KERN_INFO "IP: frag: no memory for new fragment!\n"));
+			goto fail;
+		}
+
+		/*
+		 *	Set up data on packet
+		 */
+
+		skb2->pkt_type = skb->pkt_type;
+		skb2->priority = skb->priority;
+		skb_reserve(skb2, (dev->hard_header_len+15)&~15);
+		skb_put(skb2, len + hlen);
+		skb2->nh.raw = skb2->data;
+		skb2->h.raw = skb2->data + hlen;
+
+		/*
+		 *	Charge the memory for the fragment to any owner
+		 *	it might possess
+		 */
+
+		if (skb->sk)
+			skb_set_owner_w(skb2, skb->sk);
+		skb2->dst = dst_clone(skb->dst);
+
+		/*
+		 *	Copy the packet header into the new buffer.
+		 */
+
+		memcpy(skb2->nh.raw, raw, hlen);
+
+		/*
+		 *	Copy a block of the IP datagram.
+		 */
+		memcpy(skb2->h.raw, ptr, len);
+		left -= len;
+
+		/*
+		 *	Fill in the new header fields.
+		 */
+		iph = skb2->nh.iph;
+		iph->frag_off = htons((offset >> 3));
+
+		/* ANK: dirty, but effective trick. Upgrade options only if
+		 * the segment to be fragmented was THE FIRST (otherwise,
+		 * options are already fixed) and make it ONCE
+		 * on the initial skb, so that all the following fragments
+		 * will inherit fixed options.
+		 */
+		if (offset == 0)
+			ip_options_fragment(skb);
+
+		/*
+		 *	Added AC : If we are fragmenting a fragment that's not the
+		 *		   last fragment then keep MF on each bit
+		 */
+		if (left > 0 || not_last_frag)
+			iph->frag_off |= htons(IP_MF);
+		ptr += len;
+		offset += len;
+
+		/*
+		 *	Put this fragment into the sending queue.
+		 */
+
+		ip_statistics.IpFragCreates++;
+
+		iph->tot_len = htons(len + hlen);
+
+		ip_send_check(iph);
+
+		output(skb2);
+	}
+	kfree_skb(skb);
+	ip_statistics.IpFragOKs++;
+	return;
+	
+fail:
+	kfree_skb(skb); 
+	ip_statistics.IpFragFails++; 
+}
+
+/*
+ *	Fetch data from kernel space and fill in checksum if needed.
+ */
+static int ip_reply_glue_bits(const void *dptr, char *to, unsigned int offset, 
+			      unsigned int fraglen)
+{
+        struct ip_reply_arg *dp = (struct ip_reply_arg*)dptr;
+	u16 *pktp = (u16 *)to;
+	struct iovec *iov; 
+	int len; 
+	int hdrflag = 1; 
+
+	iov = &dp->iov[0]; 
+	if (offset >= iov->iov_len) { 
+		offset -= iov->iov_len;
+		iov++; 
+		hdrflag = 0; 
+	}
+	len = iov->iov_len - offset;
+	if (fraglen > len) { /* overlapping. */ 
+		dp->csum = csum_partial_copy_nocheck(iov->iov_base+offset, to, len,
+					     dp->csum);
+		offset = 0;
+		fraglen -= len; 
+		to += len; 
+		iov++;
+	}
+
+	dp->csum = csum_partial_copy_nocheck(iov->iov_base+offset, to, fraglen, 
+					     dp->csum); 
+
+	if (hdrflag && dp->csumoffset)
+		*(pktp + dp->csumoffset) = csum_fold(dp->csum); /* fill in checksum */
+	return 0;	       
+}
+
+/* 
+ *	Generic function to send a packet as reply to another packet.
+ *	Used to send TCP resets so far. ICMP should use this function too.
+ *
+ *	Should run single threaded per socket because it uses the sock 
+ *     	structure to pass arguments.
+ */
+void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg,
+		   unsigned int len)
+{
+	struct {
+		struct ip_options	opt;
+		char			data[40];
+	} replyopts;
+	struct ipcm_cookie ipc;
+	u32 daddr;
+	struct rtable *rt = (struct rtable*)skb->dst;
+	
+	if (ip_options_echo(&replyopts.opt, skb))
+		return;
+	
+	sk->ip_tos = skb->nh.iph->tos;
+	sk->priority = skb->priority;
+	sk->protocol = skb->nh.iph->protocol;
+
+	daddr = ipc.addr = rt->rt_src;
+	ipc.opt = &replyopts.opt;
+	
+	if (ipc.opt->srr)
+		daddr = replyopts.opt.faddr;
+	if (ip_route_output(&rt, daddr, rt->rt_spec_dst, RT_TOS(skb->nh.iph->tos), 0))
+		return;
+
+	/* And let IP do all the hard work. */
+	ip_build_xmit(sk, ip_reply_glue_bits, arg, len, &ipc, rt, MSG_DONTWAIT);
+	ip_rt_put(rt);
+}
+
+/*
+ *	IP protocol layer initialiser
+ */
+
+static struct packet_type ip_packet_type =
+{
+	__constant_htons(ETH_P_IP),
+	NULL,	/* All devices */
+	ip_rcv,
+	NULL,
+	NULL,
+};
+
+
+
+#ifdef CONFIG_PROC_FS
+#ifdef CONFIG_IP_MULTICAST
+static struct proc_dir_entry proc_net_igmp = {
+	PROC_NET_IGMP, 4, "igmp",
+	S_IFREG | S_IRUGO, 1, 0, 0,
+	0, &proc_net_inode_operations,
+	ip_mc_procinfo
+};
+#endif
+#endif	
+
+/*
+ *	IP registers the packet type and then calls the subprotocol initialisers
+ */
+
+__initfunc(void ip_init(void))
+{
+	dev_add_pack(&ip_packet_type);
+
+	ip_rt_init();
+
+#ifdef CONFIG_PROC_FS
+#ifdef CONFIG_IP_MULTICAST
+	proc_net_register(&proc_net_igmp);
+#endif
+#endif	
+}
+
diff --git a/pfinet/linux-src/net/ipv4/ip_sockglue.c b/pfinet/linux-src/net/ipv4/ip_sockglue.c
new file mode 100644
index 00000000..369a6770
--- /dev/null
+++ b/pfinet/linux-src/net/ipv4/ip_sockglue.c
@@ -0,0 +1,739 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		The IP to API glue.
+ *		
+ * Version:	$Id: ip_sockglue.c,v 1.42 1999/04/22 10:07:34 davem Exp $
+ *
+ * Authors:	see ip.c
+ *
+ * Fixes:
+ *		Many		:	Split from ip.c , see ip.c for history.
+ *		Martin Mares	:	TOS setting fixed.
+ *		Alan Cox	:	Fixed a couple of oopses in Martin's 
+ *					TOS tweaks.
+ *		Mike McLagan	:	Routing by source
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/icmp.h>
+#include <linux/netdevice.h>
+#include <net/sock.h>
+#include <net/ip.h>
+#include <net/icmp.h>
+#include <net/tcp.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/igmp.h>
+#include <linux/firewall.h>
+#include <linux/ip_fw.h>
+#include <linux/route.h>
+#include <linux/mroute.h>
+#include <net/route.h>
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#include <net/transp_v6.h>
+#endif
+
+#ifdef CONFIG_IP_MASQUERADE
+#include <linux/ip_masq.h>
+#endif
+
+#include <linux/errqueue.h>
+#include <asm/uaccess.h>
+
+#define MAX(a,b) ((a)>(b)?(a):(b))
+
+#define IP_CMSG_PKTINFO		1
+#define IP_CMSG_TTL		2
+#define IP_CMSG_TOS		4
+#define IP_CMSG_RECVOPTS	8
+#define IP_CMSG_RETOPTS		16
+
+/*
+ *	SOL_IP control messages.
+ */
+
+static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
+{
+	struct in_pktinfo info;
+	struct rtable *rt = (struct rtable *)skb->dst;
+
+	info.ipi_addr.s_addr = skb->nh.iph->daddr;
+	if (rt) {
+		info.ipi_ifindex = rt->rt_iif;
+		info.ipi_spec_dst.s_addr = rt->rt_spec_dst;
+	} else {
+		info.ipi_ifindex = 0;
+		info.ipi_spec_dst.s_addr = 0;
+	}
+
+	put_cmsg(msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);
+}
+
+static void ip_cmsg_recv_ttl(struct msghdr *msg, struct sk_buff *skb)
+{
+	int ttl = skb->nh.iph->ttl;
+	put_cmsg(msg, SOL_IP, IP_TTL, sizeof(int), &ttl);
+}
+
+static void ip_cmsg_recv_tos(struct msghdr *msg, struct sk_buff *skb)
+{
+	put_cmsg(msg, SOL_IP, IP_TOS, 1, &skb->nh.iph->tos);
+}
+
+static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb)
+{
+	if (IPCB(skb)->opt.optlen == 0)
+		return;
+
+	put_cmsg(msg, SOL_IP, IP_RECVOPTS, IPCB(skb)->opt.optlen, skb->nh.iph+1);
+}
+
+
+void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb)
+{
+	unsigned char optbuf[sizeof(struct ip_options) + 40];
+	struct ip_options * opt = (struct ip_options*)optbuf;
+
+	if (IPCB(skb)->opt.optlen == 0)
+		return;
+
+	if (ip_options_echo(opt, skb)) {
+		msg->msg_flags |= MSG_CTRUNC;
+		return;
+	}
+	ip_options_undo(opt);
+
+	put_cmsg(msg, SOL_IP, IP_RETOPTS, opt->optlen, opt->__data);
+}
+
+
+void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb)
+{
+	unsigned flags = skb->sk->ip_cmsg_flags;
+
+	/* Ordered by supposed usage frequency */
+	if (flags & 1)
+		ip_cmsg_recv_pktinfo(msg, skb);
+	if ((flags>>=1) == 0)
+		return;
+
+	if (flags & 1)
+		ip_cmsg_recv_ttl(msg, skb);
+	if ((flags>>=1) == 0)
+		return;
+
+	if (flags & 1)
+		ip_cmsg_recv_tos(msg, skb);
+	if ((flags>>=1) == 0)
+		return;
+
+	if (flags & 1)
+		ip_cmsg_recv_opts(msg, skb);
+	if ((flags>>=1) == 0)
+		return;
+
+	if (flags & 1)
+		ip_cmsg_recv_retopts(msg, skb);
+}
+
+int ip_cmsg_send(struct msghdr *msg, struct ipcm_cookie *ipc)
+{
+	int err;
+	struct cmsghdr *cmsg;
+
+	for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
+		if (cmsg->cmsg_len < sizeof(struct cmsghdr) ||
+		    (unsigned long)(((char*)cmsg - (char*)msg->msg_control)
+				    + cmsg->cmsg_len) > msg->msg_controllen) {
+			return -EINVAL;
+		}
+		if (cmsg->cmsg_level != SOL_IP)
+			continue;
+		switch (cmsg->cmsg_type) {
+		case IP_RETOPTS:
+			err = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr));
+			err = ip_options_get(&ipc->opt, CMSG_DATA(cmsg), err < 40 ? err : 40, 0);
+			if (err)
+				return err;
+			break;
+		case IP_PKTINFO:
+		{
+			struct in_pktinfo *info;
+			if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct in_pktinfo)))
+				return -EINVAL;
+			info = (struct in_pktinfo *)CMSG_DATA(cmsg);
+			ipc->oif = info->ipi_ifindex;
+			ipc->addr = info->ipi_spec_dst.s_addr;
+			break;
+		}
+		default:
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
+
+/* Special input handler for packets catched by router alert option.
+   They are selected only by protocol field, and then processed likely
+   local ones; but only if someone wants them! Otherwise, router
+   not running rsvpd will kill RSVP.
+
+   It is user level problem, what it will make with them.
+   I have no idea, how it will masquearde or NAT them (it is joke, joke :-)),
+   but receiver should be enough clever f.e. to forward mtrace requests,
+   sent to multicast group to reach destination designated router.
+ */
+struct ip_ra_chain *ip_ra_chain;
+
+int ip_ra_control(struct sock *sk, unsigned char on, void (*destructor)(struct sock *))
+{
+	struct ip_ra_chain *ra, *new_ra, **rap;
+
+	if (sk->type != SOCK_RAW || sk->num == IPPROTO_RAW)
+		return -EINVAL;
+
+	new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
+
+	for (rap = &ip_ra_chain; (ra=*rap) != NULL; rap = &ra->next) {
+		if (ra->sk == sk) {
+			if (on) {
+				if (new_ra)
+					kfree(new_ra);
+				return -EADDRINUSE;
+			}
+			*rap = ra->next;
+			synchronize_bh();
+
+			if (ra->destructor)
+				ra->destructor(sk);
+			kfree(ra);
+			return 0;
+		}
+	}
+	if (new_ra == NULL)
+		return -ENOBUFS;
+	new_ra->sk = sk;
+	new_ra->destructor = destructor;
+
+	new_ra->next = ra;
+	wmb();
+	*rap = new_ra;
+
+	return 0;
+}
+
+void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, 
+		   u16 port, u32 info, u8 *payload)
+{
+	struct sock_exterr_skb *serr;
+
+	if (!sk->ip_recverr)
+		return;
+
+	skb = skb_clone(skb, GFP_ATOMIC);
+	if (!skb)
+		return;
+
+	serr = SKB_EXT_ERR(skb);  
+	serr->ee.ee_errno = err;
+	serr->ee.ee_origin = SO_EE_ORIGIN_ICMP;
+	serr->ee.ee_type = skb->h.icmph->type; 
+	serr->ee.ee_code = skb->h.icmph->code;
+	serr->ee.ee_pad = 0;
+	serr->ee.ee_info = info;
+	serr->ee.ee_data = 0;
+	serr->addr_offset = (u8*)&(((struct iphdr*)(skb->h.icmph+1))->daddr) - skb->nh.raw;
+	serr->port = port;
+
+	skb->h.raw = payload;
+	skb_pull(skb, payload - skb->data);
+
+	if (sock_queue_err_skb(sk, skb))
+		kfree_skb(skb);
+}
+
+void ip_local_error(struct sock *sk, int err, u32 daddr, u16 port, u32 info)
+{
+	struct sock_exterr_skb *serr;
+	struct iphdr *iph;
+	struct sk_buff *skb;
+
+	if (!sk->ip_recverr)
+		return;
+
+	skb = alloc_skb(sizeof(struct iphdr), GFP_ATOMIC);
+	if (!skb)
+		return;
+
+	iph = (struct iphdr*)skb_put(skb, sizeof(struct iphdr));
+	skb->nh.iph = iph;
+	iph->daddr = daddr;
+
+	serr = SKB_EXT_ERR(skb);  
+	serr->ee.ee_errno = err;
+	serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL;
+	serr->ee.ee_type = 0; 
+	serr->ee.ee_code = 0;
+	serr->ee.ee_pad = 0;
+	serr->ee.ee_info = info;
+	serr->ee.ee_data = 0;
+	serr->addr_offset = (u8*)&iph->daddr - skb->nh.raw;
+	serr->port = port;
+
+	skb->h.raw = skb->tail;
+	skb_pull(skb, skb->tail - skb->data);
+
+	if (sock_queue_err_skb(sk, skb))
+		kfree_skb(skb);
+}
+
+/* 
+ *	Handle MSG_ERRQUEUE
+ */
+int ip_recv_error(struct sock *sk, struct msghdr *msg, int len)
+{
+	struct sock_exterr_skb *serr;
+	struct sk_buff *skb, *skb2;
+	struct sockaddr_in *sin;
+	struct {
+		struct sock_extended_err ee;
+		struct sockaddr_in	 offender;
+	} errhdr;
+	int err;
+	int copied;
+
+	err = -EAGAIN;
+	skb = skb_dequeue(&sk->error_queue);
+	if (skb == NULL)
+		goto out;
+
+	copied = skb->len;
+	if (copied > len) {
+		msg->msg_flags |= MSG_TRUNC;
+		copied = len;
+	}
+	err = memcpy_toiovec(msg->msg_iov, skb->data, copied);
+	if (err)
+		goto out_free_skb;
+
+	serr = SKB_EXT_ERR(skb);
+
+	sin = (struct sockaddr_in *)msg->msg_name;
+	if (sin) {
+		sin->sin_family = AF_INET; 
+		sin->sin_addr.s_addr = *(u32*)(skb->nh.raw + serr->addr_offset);
+		sin->sin_port = serr->port; 
+	}
+
+	memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err));
+	sin = &errhdr.offender;
+	sin->sin_family = AF_UNSPEC;
+	if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP) {
+		sin->sin_family = AF_INET;
+		sin->sin_addr.s_addr = skb->nh.iph->saddr;
+		if (sk->ip_cmsg_flags)
+			ip_cmsg_recv(msg, skb);
+	}
+
+	put_cmsg(msg, SOL_IP, IP_RECVERR, sizeof(errhdr), &errhdr);
+
+	/* Now we could try to dump offended packet options */
+
+	msg->msg_flags |= MSG_ERRQUEUE;
+	err = copied;
+
+	/* Reset and regenerate socket error */
+	sk->err = 0;
+	if ((skb2 = skb_peek(&sk->error_queue)) != NULL) {
+		sk->err = SKB_EXT_ERR(skb2)->ee.ee_errno;
+		sk->error_report(sk);
+	}
+
+out_free_skb:	
+	kfree_skb(skb);
+out:
+	return err;
+}
+
+
+/*
+ *	Socket option code for IP. This is the end of the line after any TCP,UDP etc options on
+ *	an IP socket.
+ *
+ *	We implement IP_TOS (type of service), IP_TTL (time to live).
+ */
+
+int ip_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen)
+{
+	int val=0,err;
+#if defined(CONFIG_IP_FIREWALL)
+	char tmp_fw[MAX(sizeof(struct ip_fwtest),sizeof(struct ip_fwnew))];
+#endif
+	if(optlen>=sizeof(int)) {
+		if(get_user(val, (int *) optval))
+			return -EFAULT;
+	} else if(optlen>=sizeof(char)) {
+		unsigned char ucval;
+		if(get_user(ucval, (unsigned char *) optval))
+			return -EFAULT;
+		val = (int)ucval;
+	}
+	/* If optlen==0, it is equivalent to val == 0 */
+	
+	if(level!=SOL_IP)
+		return -ENOPROTOOPT;
+#ifdef CONFIG_IP_MROUTE
+	if(optname>=MRT_BASE && optname <=MRT_BASE+10)
+	{
+		return ip_mroute_setsockopt(sk,optname,optval,optlen);
+	}
+#endif
+	
+	switch(optname)
+	{
+		case IP_OPTIONS:
+		{
+			struct ip_options * opt = NULL;
+			if (optlen > 40 || optlen < 0)
+				return -EINVAL;
+			err = ip_options_get(&opt, optval, optlen, 1);
+			if (err)
+				return err;
+			lock_sock(sk);
+			if (sk->type == SOCK_STREAM) {
+				struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+				if (sk->family == PF_INET ||
+				    ((tcp_connected(sk->state) || sk->state == TCP_SYN_SENT)
+				     && sk->daddr != LOOPBACK4_IPV6)) {
+#endif
+					if (opt)
+						tp->ext_header_len = opt->optlen;
+					tcp_sync_mss(sk, tp->pmtu_cookie);
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+				}
+#endif
+			}
+			opt = xchg(&sk->opt, opt);
+			release_sock(sk);
+			if (opt)
+				kfree_s(opt, sizeof(struct ip_options) + opt->optlen);
+			return 0;
+		}
+		case IP_PKTINFO:
+			if (val)
+				sk->ip_cmsg_flags |= IP_CMSG_PKTINFO;
+			else
+				sk->ip_cmsg_flags &= ~IP_CMSG_PKTINFO;
+			return 0;
+		case IP_RECVTTL:
+			if (val)
+				sk->ip_cmsg_flags |=  IP_CMSG_TTL;
+			else
+				sk->ip_cmsg_flags &= ~IP_CMSG_TTL;
+			return 0;
+		case IP_RECVTOS:
+			if (val)
+				sk->ip_cmsg_flags |=  IP_CMSG_TOS;
+			else
+				sk->ip_cmsg_flags &= ~IP_CMSG_TOS;
+			return 0;
+		case IP_RECVOPTS:
+			if (val)
+				sk->ip_cmsg_flags |=  IP_CMSG_RECVOPTS;
+			else
+				sk->ip_cmsg_flags &= ~IP_CMSG_RECVOPTS;
+			return 0;
+		case IP_RETOPTS:
+			if (val)
+				sk->ip_cmsg_flags |= IP_CMSG_RETOPTS;
+			else
+				sk->ip_cmsg_flags &= ~IP_CMSG_RETOPTS;
+			return 0;
+		case IP_TOS:	/* This sets both TOS and Precedence */
+			  /* Reject setting of unused bits */
+			if (val & ~(IPTOS_TOS_MASK|IPTOS_PREC_MASK))
+				return -EINVAL;
+			if (IPTOS_PREC(val) >= IPTOS_PREC_CRITIC_ECP && 
+			    !capable(CAP_NET_ADMIN))
+				return -EPERM;
+			if (sk->ip_tos != val) {
+				lock_sock(sk);
+				sk->ip_tos=val;
+				sk->priority = rt_tos2priority(val);
+				dst_release(xchg(&sk->dst_cache, NULL)); 
+				release_sock(sk);
+			}
+			return 0;
+		case IP_TTL:
+			if (optlen<1)
+				return -EINVAL;
+			if(val==-1)
+				val = ip_statistics.IpDefaultTTL;
+			if(val<1||val>255)
+				return -EINVAL;
+			sk->ip_ttl=val;
+			return 0;
+		case IP_HDRINCL:
+			if(sk->type!=SOCK_RAW)
+				return -ENOPROTOOPT;
+			sk->ip_hdrincl=val?1:0;
+			return 0;
+		case IP_MTU_DISCOVER:
+			if (val<0 || val>2)
+				return -EINVAL;
+			sk->ip_pmtudisc = val;
+			return 0;
+		case IP_RECVERR:
+			sk->ip_recverr = !!val;
+			if (!val)
+				skb_queue_purge(&sk->error_queue);
+			return 0;
+		case IP_MULTICAST_TTL: 
+			if (optlen<1)
+				return -EINVAL;
+			if (val==-1)
+				val = 1;
+			if (val < 0 || val > 255)
+				return -EINVAL;
+			sk->ip_mc_ttl=val;
+	                return 0;
+		case IP_MULTICAST_LOOP: 
+			if (optlen<1)
+				return -EINVAL;
+			sk->ip_mc_loop = val ? 1 : 0;
+			return 0;
+		case IP_MULTICAST_IF: 
+		{
+			struct ip_mreqn mreq;
+			struct device *dev = NULL;
+			
+			/*
+			 *	Check the arguments are allowable
+			 */
+
+			if (optlen >= sizeof(struct ip_mreqn)) {
+				if (copy_from_user(&mreq,optval,sizeof(mreq)))
+					return -EFAULT;
+			} else {
+				memset(&mreq, 0, sizeof(mreq));
+				if (optlen >= sizeof(struct in_addr) &&
+				    copy_from_user(&mreq.imr_address,optval,sizeof(struct in_addr)))
+					return -EFAULT;
+			}
+
+			if (!mreq.imr_ifindex) {
+				if (mreq.imr_address.s_addr == INADDR_ANY) {
+					sk->ip_mc_index = 0;
+					sk->ip_mc_addr  = 0;
+					return 0;
+				}
+				dev = ip_dev_find(mreq.imr_address.s_addr);
+			} else
+				dev = dev_get_by_index(mreq.imr_ifindex);
+
+			if (!dev)
+				return -EADDRNOTAVAIL;
+
+			if (sk->bound_dev_if && dev->ifindex != sk->bound_dev_if)
+				return -EINVAL;
+
+			sk->ip_mc_index = mreq.imr_ifindex;
+			sk->ip_mc_addr  = mreq.imr_address.s_addr;
+			return 0;
+		}
+
+		case IP_ADD_MEMBERSHIP:
+		case IP_DROP_MEMBERSHIP: 
+		{
+			struct ip_mreqn mreq;
+			
+			if (optlen < sizeof(struct ip_mreq))
+				return -EINVAL;
+			if (optlen >= sizeof(struct ip_mreqn)) {
+				if(copy_from_user(&mreq,optval,sizeof(mreq)))
+					return -EFAULT;
+			} else {
+				memset(&mreq, 0, sizeof(mreq));
+				if (copy_from_user(&mreq,optval,sizeof(struct ip_mreq)))
+					return -EFAULT; 
+			}
+
+			if (optname == IP_ADD_MEMBERSHIP)
+				return ip_mc_join_group(sk,&mreq);
+			else
+				return ip_mc_leave_group(sk,&mreq);
+		}
+		case IP_ROUTER_ALERT:	
+			return ip_ra_control(sk, val ? 1 : 0, NULL);
+		
+#ifdef CONFIG_IP_FIREWALL
+		case IP_FW_MASQ_TIMEOUTS:
+		case IP_FW_APPEND:
+		case IP_FW_REPLACE:
+		case IP_FW_DELETE:
+		case IP_FW_DELETE_NUM:
+		case IP_FW_INSERT:
+		case IP_FW_FLUSH:
+		case IP_FW_ZERO:
+		case IP_FW_CHECK:
+		case IP_FW_CREATECHAIN:
+		case IP_FW_DELETECHAIN:
+		case IP_FW_POLICY:
+			if(!capable(CAP_NET_ADMIN))
+				return -EACCES;
+			if(optlen>sizeof(tmp_fw) || optlen<1)
+				return -EINVAL;
+			if(copy_from_user(&tmp_fw,optval,optlen))
+				return -EFAULT;
+			err=ip_fw_ctl(optname, &tmp_fw,optlen);
+			return -err;	/* -0 is 0 after all */
+#endif /* CONFIG_IP_FIREWALL */
+#ifdef CONFIG_IP_MASQUERADE
+		case IP_FW_MASQ_CTL:
+			if(!capable(CAP_NET_ADMIN))
+				return -EPERM;
+			if(optlen<1)
+				return -EINVAL;
+			err=ip_masq_uctl(optname, optval ,optlen);
+			return err;
+			
+#endif
+		default:
+			return(-ENOPROTOOPT);
+	}
+}
+
+/*
+ *	Get the options. Note for future reference. The GET of IP options gets the
+ *	_received_ ones. The set sets the _sent_ ones.
+ */
+
+int ip_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen)
+{
+	int val;
+	int len;
+	
+	if(level!=SOL_IP)
+		return -EOPNOTSUPP;
+
+#ifdef CONFIG_IP_MROUTE
+	if(optname>=MRT_BASE && optname <=MRT_BASE+10)
+	{
+		return ip_mroute_getsockopt(sk,optname,optval,optlen);
+	}
+#endif
+
+	if(get_user(len,optlen))
+		return -EFAULT;
+
+	switch(optname)
+	{
+		case IP_OPTIONS:
+			{
+				unsigned char optbuf[sizeof(struct ip_options)+40];
+				struct ip_options * opt = (struct ip_options*)optbuf;
+				lock_sock(sk);
+				opt->optlen = 0;
+				if (sk->opt)
+					memcpy(optbuf, sk->opt, sizeof(struct ip_options)+sk->opt->optlen);
+				release_sock(sk);
+				if (opt->optlen == 0) 
+					return put_user(0, optlen);
+
+				ip_options_undo(opt);
+
+				len=min(len, opt->optlen);
+				if(put_user(len, optlen))
+					return -EFAULT;
+				if(copy_to_user(optval, opt->__data, len))
+					return -EFAULT;
+				return 0;
+			}
+		case IP_PKTINFO:
+			val = (sk->ip_cmsg_flags & IP_CMSG_PKTINFO) != 0;
+			break;
+		case IP_RECVTTL:
+			val = (sk->ip_cmsg_flags & IP_CMSG_TTL) != 0;
+			break;
+		case IP_RECVTOS:
+			val = (sk->ip_cmsg_flags & IP_CMSG_TOS) != 0;
+			break;
+		case IP_RECVOPTS:
+			val = (sk->ip_cmsg_flags & IP_CMSG_RECVOPTS) != 0;
+			break;
+		case IP_RETOPTS:
+			val = (sk->ip_cmsg_flags & IP_CMSG_RETOPTS) != 0;
+			break;
+		case IP_TOS:
+			val=sk->ip_tos;
+			break;
+		case IP_TTL:
+			val=sk->ip_ttl;
+			break;
+		case IP_HDRINCL:
+			val=sk->ip_hdrincl;
+			break;
+		case IP_MTU_DISCOVER:
+			val=sk->ip_pmtudisc;
+			break;
+		case IP_MTU:
+			val = 0;	
+			lock_sock(sk);
+			if (sk->dst_cache)		
+				val = sk->dst_cache->pmtu;
+			release_sock(sk);
+			if (!val)
+				return -ENOTCONN;
+			break;
+		case IP_RECVERR:
+			val=sk->ip_recverr;
+			break;
+		case IP_MULTICAST_TTL:
+			val=sk->ip_mc_ttl;
+			break;
+		case IP_MULTICAST_LOOP:
+			val=sk->ip_mc_loop;
+			break;
+		case IP_MULTICAST_IF:
+		{
+			struct ip_mreqn mreq;
+			len = min(len,sizeof(struct ip_mreqn));
+  			if(put_user(len, optlen))
+  				return -EFAULT;
+			mreq.imr_ifindex = sk->ip_mc_index;
+			mreq.imr_address.s_addr = sk->ip_mc_addr;
+			mreq.imr_multiaddr.s_addr = 0;
+			if(copy_to_user((void *)optval, &mreq, len))
+				return -EFAULT;
+			return 0;
+		}
+		default:
+			return(-ENOPROTOOPT);
+	}
+	
+	if (len < sizeof(int) && len > 0 && val>=0 && val<255) {
+		unsigned char ucval = (unsigned char)val;
+		len = 1;
+		if(put_user(len, optlen))
+			return -EFAULT;
+		if(copy_to_user(optval,&ucval,1))
+			return -EFAULT;
+	} else {
+		len=min(sizeof(int),len);
+		if(put_user(len, optlen))
+			return -EFAULT;
+		if(copy_to_user(optval,&val,len))
+			return -EFAULT;
+	}
+	return 0;
+}
diff --git a/pfinet/linux-src/net/ipv4/ipconfig.c b/pfinet/linux-src/net/ipv4/ipconfig.c
new file mode 100644
index 00000000..0770bad1
--- /dev/null
+++ b/pfinet/linux-src/net/ipv4/ipconfig.c
@@ -0,0 +1,970 @@
+/*
+ *  $Id: ipconfig.c,v 1.20.2.1 1999/06/28 11:33:27 davem Exp $
+ *
+ *  Automatic Configuration of IP -- use BOOTP or RARP or user-supplied
+ *  information to configure own IP address and routes.
+ *
+ *  Copyright (C) 1996--1998 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+ *
+ *  Derived from network configuration code in fs/nfs/nfsroot.c,
+ *  originally Copyright (C) 1995, 1996 Gero Kuhlmann and me.
+ *
+ *  BOOTP rewritten to construct and analyse packets itself instead
+ *  of misusing the IP layer. num_bugs_causing_wrong_arp_replies--;
+ *					     -- MJ, December 1998
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/random.h>
+#include <linux/init.h>
+#include <linux/utsname.h>
+#include <linux/in.h>
+#include <linux/if.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/socket.h>
+#include <linux/route.h>
+#include <linux/udp.h>
+#include <net/arp.h>
+#include <net/ip.h>
+#include <net/ipconfig.h>
+
+#include <asm/segment.h>
+#include <asm/uaccess.h>
+#include <asm/checksum.h>
+
+/* Define this to allow debugging output */
+#undef IPCONFIG_DEBUG
+
+#ifdef IPCONFIG_DEBUG
+#define DBG(x) printk x
+#else
+#define DBG(x) do { } while(0)
+#endif
+
+/* Define the timeout for waiting for a RARP/BOOTP reply */
+#define CONF_BASE_TIMEOUT	(HZ*5)	/* Initial timeout: 5 seconds */
+#define CONF_RETRIES	 	10	/* 10 retries */
+#define CONF_TIMEOUT_RANDOM	(HZ)	/* Maximum amount of randomization */
+#define CONF_TIMEOUT_MULT	*5/4	/* Rate of timeout growth */
+#define CONF_TIMEOUT_MAX	(HZ*30)	/* Maximum allowed timeout */
+
+/* IP configuration */
+static char user_dev_name[IFNAMSIZ] __initdata = { 0, };/* Name of user-selected boot device */
+u32 ic_myaddr __initdata = INADDR_NONE;		/* My IP address */
+u32 ic_servaddr __initdata = INADDR_NONE;	/* Server IP address */
+u32 ic_gateway __initdata = INADDR_NONE;	/* Gateway IP address */
+u32 ic_netmask __initdata = INADDR_NONE;	/* Netmask for local subnet */
+int ic_enable __initdata = 1;			/* Automatic IP configuration enabled */
+int ic_host_name_set __initdata = 0;		/* Host name configured manually */
+int ic_set_manually __initdata = 0;		/* IPconfig parameters set manually */
+
+u32 root_server_addr __initdata = INADDR_NONE;		/* Address of boot server */
+u8 root_server_path[256] __initdata = { 0, };		/* Path to mount as root */
+
+#if defined(CONFIG_IP_PNP_BOOTP) || defined(CONFIG_IP_PNP_RARP)
+
+#define CONFIG_IP_PNP_DYNAMIC
+
+static int ic_proto_enabled __initdata = 0			/* Protocols enabled */
+#ifdef CONFIG_IP_PNP_BOOTP
+			| IC_BOOTP
+#endif
+#ifdef CONFIG_IP_PNP_RARP
+			| IC_RARP
+#endif
+			;
+static int ic_got_reply __initdata = 0;				/* Protocol(s) we got reply from */
+
+#else
+
+static int ic_proto_enabled __initdata = 0;
+
+#endif
+
+static int ic_proto_have_if __initdata = 0;
+
+/*
+ *	Network devices
+ */
+
+struct ic_device {
+	struct ic_device *next;
+	struct device *dev;
+	unsigned short flags;
+	int able;
+};
+
+static struct ic_device *ic_first_dev __initdata = NULL;/* List of open device */
+static struct device *ic_dev __initdata = NULL;		/* Selected device */
+
+static int __init ic_open_devs(void)
+{
+	struct ic_device *d, **last;
+	struct device *dev;
+	unsigned short oflags;
+
+	last = &ic_first_dev;
+	for (dev = dev_base; dev; dev = dev->next)
+		if (user_dev_name[0] ? !strcmp(dev->name, user_dev_name) :
+		    (!(dev->flags & IFF_LOOPBACK) &&
+		     (dev->flags & (IFF_POINTOPOINT|IFF_BROADCAST)) &&
+		     strncmp(dev->name, "dummy", 5))) {
+			int able = 0;
+			if (dev->mtu >= 364)
+				able |= IC_BOOTP;
+			else
+				printk(KERN_WARNING "BOOTP: Ignoring device %s, MTU %d too small", dev->name, dev->mtu);
+			if (!(dev->flags & IFF_NOARP))
+				able |= IC_RARP;
+			able &= ic_proto_enabled;
+			if (ic_proto_enabled && !able)
+				continue;
+			oflags = dev->flags;
+			if (dev_change_flags(dev, oflags | IFF_UP) < 0) {
+				printk(KERN_ERR "IP-Config: Failed to open %s\n", dev->name);
+				continue;
+			}
+			if (!(d = kmalloc(sizeof(struct ic_device), GFP_KERNEL)))
+				return -1;
+			d->dev = dev;
+			*last = d;
+			last = &d->next;
+			d->flags = oflags;
+			d->able = able;
+			ic_proto_have_if |= able;
+			DBG(("IP-Config: Opened %s (able=%d)\n", dev->name, able));
+		}
+	*last = NULL;
+
+	if (!ic_first_dev) {
+		if (user_dev_name[0])
+			printk(KERN_ERR "IP-Config: Device `%s' not found.\n", user_dev_name);
+		else
+			printk(KERN_ERR "IP-Config: No network devices available.\n");
+		return -1;
+	}
+	return 0;
+}
+
+static void __init ic_close_devs(void)
+{
+	struct ic_device *d, *next;
+	struct device *dev;
+
+	next = ic_first_dev;
+	while ((d = next)) {
+		next = d->next;
+		dev = d->dev;
+		if (dev != ic_dev) {
+			DBG(("IP-Config: Downing %s\n", dev->name));
+			dev_change_flags(dev, d->flags);
+		}
+		kfree_s(d, sizeof(struct ic_device));
+	}
+}
+
+/*
+ *	Interface to various network functions.
+ */
+
+static inline void
+set_sockaddr(struct sockaddr_in *sin, u32 addr, u16 port)
+{
+	sin->sin_family = AF_INET;
+	sin->sin_addr.s_addr = addr;
+	sin->sin_port = port;
+}
+
+static int __init ic_dev_ioctl(unsigned int cmd, struct ifreq *arg)
+{
+	int res;
+
+	mm_segment_t oldfs = get_fs();
+	set_fs(get_ds());
+	res = devinet_ioctl(cmd, arg);
+	set_fs(oldfs);
+	return res;
+}
+
+static int __init ic_route_ioctl(unsigned int cmd, struct rtentry *arg)
+{
+	int res;
+
+	mm_segment_t oldfs = get_fs();
+	set_fs(get_ds());
+	res = ip_rt_ioctl(cmd, arg);
+	set_fs(oldfs);
+	return res;
+}
+
+/*
+ *	Set up interface addresses and routes.
+ */
+
+static int __init ic_setup_if(void)
+{
+	struct ifreq ir;
+	struct sockaddr_in *sin = (void *) &ir.ifr_ifru.ifru_addr;
+	int err;
+
+	memset(&ir, 0, sizeof(ir));
+	strcpy(ir.ifr_ifrn.ifrn_name, ic_dev->name);
+	set_sockaddr(sin, ic_myaddr, 0);
+	if ((err = ic_dev_ioctl(SIOCSIFADDR, &ir)) < 0) {
+		printk(KERN_ERR "IP-Config: Unable to set interface address (%d).\n", err);
+		return -1;
+	}
+	set_sockaddr(sin, ic_netmask, 0);
+	if ((err = ic_dev_ioctl(SIOCSIFNETMASK, &ir)) < 0) {
+		printk(KERN_ERR "IP-Config: Unable to set interface netmask (%d).\n", err);
+		return -1;
+	}
+	set_sockaddr(sin, ic_myaddr | ~ic_netmask, 0);
+	if ((err = ic_dev_ioctl(SIOCSIFBRDADDR, &ir)) < 0) {
+		printk(KERN_ERR "IP-Config: Unable to set interface broadcast address (%d).\n", err);
+		return -1;
+	}
+	return 0;
+}
+
+static int __init ic_setup_routes(void)
+{
+	/* No need to setup device routes, only the default route... */
+
+	if (ic_gateway != INADDR_NONE) {
+		struct rtentry rm;
+		int err;
+
+		memset(&rm, 0, sizeof(rm));
+		if ((ic_gateway ^ ic_myaddr) & ic_netmask) {
+			printk(KERN_ERR "IP-Config: Gateway not on directly connected network.\n");
+			return -1;
+		}
+		set_sockaddr((struct sockaddr_in *) &rm.rt_dst, 0, 0);
+		set_sockaddr((struct sockaddr_in *) &rm.rt_genmask, 0, 0);
+		set_sockaddr((struct sockaddr_in *) &rm.rt_gateway, ic_gateway, 0);
+		rm.rt_flags = RTF_UP | RTF_GATEWAY;
+		if ((err = ic_route_ioctl(SIOCADDRT, &rm)) < 0) {
+			printk(KERN_ERR "IP-Config: Cannot add default route (%d).\n", err);
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ *	Fill in default values for all missing parameters.
+ */
+
+static int __init ic_defaults(void)
+{
+	/*
+	 *	At this point we have no userspace running so need not
+	 *	claim locks on system_utsname
+	 */
+	 
+	if (!ic_host_name_set)
+		strcpy(system_utsname.nodename, in_ntoa(ic_myaddr));
+
+	if (root_server_addr == INADDR_NONE)
+		root_server_addr = ic_servaddr;
+
+	if (ic_netmask == INADDR_NONE) {
+		if (IN_CLASSA(ntohl(ic_myaddr)))
+			ic_netmask = htonl(IN_CLASSA_NET);
+		else if (IN_CLASSB(ntohl(ic_myaddr)))
+			ic_netmask = htonl(IN_CLASSB_NET);
+		else if (IN_CLASSC(ntohl(ic_myaddr)))
+			ic_netmask = htonl(IN_CLASSC_NET);
+		else {
+			printk(KERN_ERR "IP-Config: Unable to guess netmask for address %08x\n", ic_myaddr);
+			return -1;
+		}
+		printk("IP-Config: Guessing netmask %s\n", in_ntoa(ic_netmask));
+	}
+
+	return 0;
+}
+
+/*
+ *	RARP support.
+ */
+
+#ifdef CONFIG_IP_PNP_RARP
+
+static int ic_rarp_recv(struct sk_buff *skb, struct device *dev, struct packet_type *pt);
+
+static struct packet_type rarp_packet_type __initdata = {
+	__constant_htons(ETH_P_RARP),
+	NULL,			/* Listen to all devices */
+	ic_rarp_recv,
+	NULL,
+	NULL
+};
+
+static inline void ic_rarp_init(void)
+{
+	dev_add_pack(&rarp_packet_type);
+}
+
+static inline void ic_rarp_cleanup(void)
+{
+	dev_remove_pack(&rarp_packet_type);
+}
+
+/*
+ *  Process received RARP packet.
+ */
+static int __init
+ic_rarp_recv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
+{
+	struct arphdr *rarp = (struct arphdr *)skb->h.raw;
+	unsigned char *rarp_ptr = (unsigned char *) (rarp + 1);
+	unsigned long sip, tip;
+	unsigned char *sha, *tha;		/* s for "source", t for "target" */
+
+	/* If we already have a reply, just drop the packet */
+	if (ic_got_reply)
+		goto drop;
+
+	/* If this test doesn't pass, it's not IP, or we should ignore it anyway */
+	if (rarp->ar_hln != dev->addr_len || dev->type != ntohs(rarp->ar_hrd))
+		goto drop;
+
+	/* If it's not a RARP reply, delete it. */
+	if (rarp->ar_op != htons(ARPOP_RREPLY))
+		goto drop;
+
+	/* If it's not Ethernet, delete it. */
+	if (rarp->ar_pro != htons(ETH_P_IP))
+		goto drop;
+
+	/* Extract variable-width fields */
+	sha = rarp_ptr;
+	rarp_ptr += dev->addr_len;
+	memcpy(&sip, rarp_ptr, 4);
+	rarp_ptr += 4;
+	tha = rarp_ptr;
+	rarp_ptr += dev->addr_len;
+	memcpy(&tip, rarp_ptr, 4);
+
+	/* Discard packets which are not meant for us. */
+	if (memcmp(tha, dev->dev_addr, dev->addr_len))
+		goto drop;
+
+	/* Discard packets which are not from specified server. */
+	if (ic_servaddr != INADDR_NONE && ic_servaddr != sip)
+		goto drop;
+
+	/* Victory! The packet is what we were looking for! */
+	if (!ic_got_reply) {
+		ic_got_reply = IC_RARP;
+		ic_dev = dev;
+		if (ic_myaddr == INADDR_NONE)
+			ic_myaddr = tip;
+		ic_servaddr = sip;
+	}
+
+	/* And throw the packet out... */
+drop:
+	kfree_skb(skb);
+	return 0;
+}
+
+
+/*
+ *  Send RARP request packet over all devices which allow RARP.
+ */
+static void __init ic_rarp_send(void)
+{
+	struct ic_device *d;
+
+	for (d=ic_first_dev; d; d=d->next)
+		if (d->able & IC_RARP) {
+			struct device *dev = d->dev;
+			arp_send(ARPOP_RREQUEST, ETH_P_RARP, 0, dev, 0, NULL,
+				 dev->dev_addr, dev->dev_addr);
+		}
+}
+
+#endif
+
+/*
+ *	BOOTP support.
+ */
+
+#ifdef CONFIG_IP_PNP_BOOTP
+
+struct bootp_pkt {		/* BOOTP packet format */
+	struct iphdr iph;	/* IP header */
+	struct udphdr udph;	/* UDP header */
+	u8 op;			/* 1=request, 2=reply */
+	u8 htype;		/* HW address type */
+	u8 hlen;		/* HW address length */
+	u8 hops;		/* Used only by gateways */
+	u32 xid;		/* Transaction ID */
+	u16 secs;		/* Seconds since we started */
+	u16 flags;		/* Just what it says */
+	u32 client_ip;		/* Client's IP address if known */
+	u32 your_ip;		/* Assigned IP address */
+	u32 server_ip;		/* Server's IP address */
+	u32 relay_ip;		/* IP address of BOOTP relay */
+	u8 hw_addr[16];		/* Client's HW address */
+	u8 serv_name[64];	/* Server host name */
+	u8 boot_file[128];	/* Name of boot file */
+	u8 vendor_area[128];	/* Area for extensions */
+};
+
+#define BOOTP_REQUEST 1
+#define BOOTP_REPLY 2
+
+static u32 ic_bootp_xid;
+
+static int ic_bootp_recv(struct sk_buff *skb, struct device *dev, struct packet_type *pt);
+
+static struct packet_type bootp_packet_type __initdata = {
+	__constant_htons(ETH_P_IP),
+	NULL,			/* Listen to all devices */
+	ic_bootp_recv,
+	NULL,
+	NULL
+};
+
+
+/*
+ *  Initialize BOOTP extension fields in the request.
+ */
+static void __init ic_bootp_init_ext(u8 *e)
+{
+	*e++ = 99;		/* RFC1048 Magic Cookie */
+	*e++ = 130;
+	*e++ = 83;
+	*e++ = 99;
+	*e++ = 1;		/* Subnet mask request */
+	*e++ = 4;
+	e += 4;
+	*e++ = 3;		/* Default gateway request */
+	*e++ = 4;
+	e += 4;
+	*e++ = 12;		/* Host name request */
+	*e++ = 32;
+	e += 32;
+	*e++ = 40;		/* NIS Domain name request */
+	*e++ = 32;
+	e += 32;
+	*e++ = 17;		/* Boot path */
+	*e++ = 32;
+	e += 32;
+	*e = 255;		/* End of the list */
+}
+
+
+/*
+ *  Initialize the BOOTP mechanism.
+ */
+static inline void ic_bootp_init(void)
+{
+	get_random_bytes(&ic_bootp_xid, sizeof(u32));
+	DBG(("BOOTP: XID=%08x\n", ic_bootp_xid));
+	dev_add_pack(&bootp_packet_type);
+}
+
+
+/*
+ *  BOOTP cleanup.
+ */
+static inline void ic_bootp_cleanup(void)
+{
+	dev_remove_pack(&bootp_packet_type);
+}
+
+
+/*
+ *  Send BOOTP request to single interface.
+ */
+static void __init ic_bootp_send_if(struct ic_device *d, u32 jiffies)
+{
+	struct device *dev = d->dev;
+	struct sk_buff *skb;
+	struct bootp_pkt *b;
+	int hh_len = (dev->hard_header_len + 15) & ~15;
+	struct iphdr *h;
+
+	/* Allocate packet */
+	skb = alloc_skb(sizeof(struct bootp_pkt) + hh_len + 15, GFP_KERNEL);
+	if (!skb)
+		return;
+	skb_reserve(skb, hh_len);
+	b = (struct bootp_pkt *) skb_put(skb, sizeof(struct bootp_pkt));
+	memset(b, 0, sizeof(struct bootp_pkt));
+
+	/* Construct IP header */
+	skb->nh.iph = h = &b->iph;
+	h->version = 4;
+	h->ihl = 5;
+	h->tot_len = htons(sizeof(struct bootp_pkt));
+	h->frag_off = htons(IP_DF);
+	h->ttl = 64;
+	h->protocol = IPPROTO_UDP;
+	h->daddr = INADDR_BROADCAST;
+	h->check = ip_fast_csum((unsigned char *) h, h->ihl);
+
+	/* Construct UDP header */
+	b->udph.source = htons(68);
+	b->udph.dest = htons(67);
+	b->udph.len = htons(sizeof(struct bootp_pkt) - sizeof(struct iphdr));
+	/* UDP checksum not calculated -- explicitly allowed in BOOTP RFC */
+
+	/* Construct BOOTP header */
+	b->op = BOOTP_REQUEST;
+	b->htype = dev->type;
+	b->hlen = dev->addr_len;
+	memcpy(b->hw_addr, dev->dev_addr, dev->addr_len);
+	b->secs = htons(jiffies / HZ);
+	b->xid = ic_bootp_xid;
+	ic_bootp_init_ext(b->vendor_area);
+
+	/* Chain packet down the line... */
+	skb->dev = dev;
+	skb->protocol = __constant_htons(ETH_P_IP);
+	if ((dev->hard_header &&
+	     dev->hard_header(skb, dev, ntohs(skb->protocol), dev->broadcast, dev->dev_addr, skb->len) < 0) ||
+	    dev_queue_xmit(skb) < 0)
+		printk("E");
+}
+
+
+/*
+ *  Send BOOTP requests to all interfaces.
+ */
+static void __init ic_bootp_send(u32 jiffies)
+{
+	struct ic_device *d;
+
+	for(d=ic_first_dev; d; d=d->next)
+		if (d->able & IC_BOOTP)
+			ic_bootp_send_if(d, jiffies);
+}
+
+
+/*
+ *  Copy BOOTP-supplied string if not already set.
+ */
+static int __init ic_bootp_string(char *dest, char *src, int len, int max)
+{
+	if (!len)
+		return 0;
+	if (len > max-1)
+		len = max-1;
+	strncpy(dest, src, len);
+	dest[len] = '\0';
+	return 1;
+}
+
+
+/*
+ *  Process BOOTP extension.
+ */
+static void __init ic_do_bootp_ext(u8 *ext)
+{
+#ifdef IPCONFIG_DEBUG
+	u8 *c;
+
+	printk("BOOTP: Got extension %02x",*ext);
+	for(c=ext+2; c<ext+2+ext[1]; c++)
+		printk(" %02x", *c);
+	printk("\n");
+#endif
+
+	switch (*ext++) {
+		case 1:		/* Subnet mask */
+			if (ic_netmask == INADDR_NONE)
+				memcpy(&ic_netmask, ext+1, 4);
+			break;
+		case 3:		/* Default gateway */
+			if (ic_gateway == INADDR_NONE)
+				memcpy(&ic_gateway, ext+1, 4);
+			break;
+		case 12:	/* Host name */
+			ic_bootp_string(system_utsname.nodename, ext+1, *ext, __NEW_UTS_LEN);
+			ic_host_name_set = 1;
+			break;
+		case 40:	/* NIS Domain name */
+			ic_bootp_string(system_utsname.domainname, ext+1, *ext, __NEW_UTS_LEN);
+			break;
+		case 17:	/* Root path */
+			if (!root_server_path[0])
+				ic_bootp_string(root_server_path, ext+1, *ext, sizeof(root_server_path));
+			break;
+	}
+}
+
+
+/*
+ *  Receive BOOTP reply.
+ */
+static int __init ic_bootp_recv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
+{
+	struct bootp_pkt *b = (struct bootp_pkt *) skb->nh.iph;
+	struct iphdr *h = &b->iph;
+	int len;
+
+	/* If we already have a reply, just drop the packet */
+	if (ic_got_reply)
+		goto drop;
+
+	/* Check whether it's a BOOTP packet */
+	if (skb->pkt_type == PACKET_OTHERHOST ||
+	    skb->len < sizeof(struct udphdr) + sizeof(struct iphdr) ||
+	    h->ihl != 5 ||
+	    h->version != 4 ||
+	    ip_fast_csum((char *) h, h->ihl) != 0 ||
+	    skb->len < ntohs(h->tot_len) ||
+	    h->protocol != IPPROTO_UDP ||
+	    b->udph.source != htons(67) ||
+	    b->udph.dest != htons(68) ||
+	    ntohs(h->tot_len) < ntohs(b->udph.len) + sizeof(struct iphdr))
+		goto drop;
+
+	/* Fragments are not supported */
+	if (h->frag_off & htons(IP_OFFSET|IP_MF)) {
+		printk(KERN_ERR "BOOTP: Ignoring fragmented reply.\n");
+		goto drop;
+	}
+
+	/* Is it a reply to our BOOTP request? */
+	len = ntohs(b->udph.len) - sizeof(struct udphdr);
+	if (len < 300 ||				    /* See RFC 951:2.1 */
+	    b->op != BOOTP_REPLY ||
+	    b->xid != ic_bootp_xid) {
+		printk("?");
+		goto drop;
+	}
+
+	/* Extract basic fields */
+	ic_myaddr = b->your_ip;
+	ic_servaddr = b->server_ip;
+	ic_got_reply = IC_BOOTP;
+	ic_dev = dev;
+
+	/* Parse extensions */
+	if (b->vendor_area[0] == 99 &&	/* Check magic cookie */
+	    b->vendor_area[1] == 130 &&
+	    b->vendor_area[2] == 83 &&
+	    b->vendor_area[3] == 99) {
+		u8 *ext = &b->vendor_area[4];
+                u8 *end = (u8 *) b + ntohs(b->iph.tot_len);
+		while (ext < end && *ext != 0xff) {
+			if (*ext == 0)		/* Padding */
+				ext++;
+			else {
+				u8 *opt = ext;
+				ext += ext[1] + 2;
+				if (ext <= end)
+					ic_do_bootp_ext(opt);
+			}
+		}
+	}
+
+	if (ic_gateway == INADDR_NONE && b->relay_ip)
+		ic_gateway = b->relay_ip;
+
+drop:
+	kfree_skb(skb);
+	return 0;
+}	
+
+
+#endif
+
+
+/*
+ *	Dynamic IP configuration -- BOOTP and RARP.
+ */
+
+#ifdef CONFIG_IP_PNP_DYNAMIC
+
+static int __init ic_dynamic(void)
+{
+	int retries;
+	unsigned long timeout, jiff;
+	unsigned long start_jiffies;
+	int do_rarp = ic_proto_have_if & IC_RARP;
+	int do_bootp = ic_proto_have_if & IC_BOOTP;
+
+	/*
+	 * If neither BOOTP nor RARP was selected, return with an error. This
+	 * routine gets only called when some pieces of information are mis-
+	 * sing, and without BOOTP and RARP we are not able to get that in-
+	 * formation.
+	 */
+	if (!ic_proto_enabled) {
+		printk(KERN_ERR "IP-Config: Incomplete network configuration information.\n");
+		return -1;
+	}
+
+#ifdef CONFIG_IP_PNP_BOOTP
+	if ((ic_proto_enabled ^ ic_proto_have_if) & IC_BOOTP)
+		printk(KERN_ERR "BOOTP: No suitable device found.\n");
+#endif
+
+#ifdef CONFIG_IP_PNP_RARP
+	if ((ic_proto_enabled ^ ic_proto_have_if) & IC_RARP)
+		printk(KERN_ERR "RARP: No suitable device found.\n");
+#endif
+
+	if (!ic_proto_have_if)
+		/* Error message already printed */
+		return -1;
+
+	/*
+	 * Setup RARP and BOOTP protocols
+	 */
+#ifdef CONFIG_IP_PNP_RARP
+	if (do_rarp)
+		ic_rarp_init();
+#endif
+#ifdef CONFIG_IP_PNP_BOOTP
+	if (do_bootp)
+		ic_bootp_init();
+#endif
+
+	/*
+	 * Send requests and wait, until we get an answer. This loop
+	 * seems to be a terrible waste of CPU time, but actually there is
+	 * only one process running at all, so we don't need to use any
+	 * scheduler functions.
+	 * [Actually we could now, but the nothing else running note still 
+	 *  applies.. - AC]
+	 */
+	printk(KERN_NOTICE "Sending %s%s%s requests...",
+	        do_bootp ? "BOOTP" : "",
+		do_bootp && do_rarp ? " and " : "",
+		do_rarp ? "RARP" : "");
+	start_jiffies = jiffies;
+	retries = CONF_RETRIES;
+	get_random_bytes(&timeout, sizeof(timeout));
+	timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned) CONF_TIMEOUT_RANDOM);
+	for(;;) {
+#ifdef CONFIG_IP_PNP_BOOTP
+		if (do_bootp)
+			ic_bootp_send(jiffies - start_jiffies);
+#endif
+#ifdef CONFIG_IP_PNP_RARP
+		if (do_rarp)
+			ic_rarp_send();
+#endif
+		printk(".");
+		jiff = jiffies + timeout;
+		while (jiffies < jiff && !ic_got_reply)
+			;
+		if (ic_got_reply) {
+			printk(" OK\n");
+			break;
+		}
+		if (! --retries) {
+			printk(" timed out!\n");
+			break;
+		}
+		timeout = timeout CONF_TIMEOUT_MULT;
+		if (timeout > CONF_TIMEOUT_MAX)
+			timeout = CONF_TIMEOUT_MAX;
+	}
+
+#ifdef CONFIG_IP_PNP_RARP
+	if (do_rarp)
+		ic_rarp_cleanup();
+#endif
+#ifdef CONFIG_IP_PNP_BOOTP
+	if (do_bootp)
+		ic_bootp_cleanup();
+#endif
+
+	if (!ic_got_reply)
+		return -1;
+
+	printk("IP-Config: Got %s answer from %s, ",
+		(ic_got_reply & IC_BOOTP) ? "BOOTP" : "RARP",
+		in_ntoa(ic_servaddr));
+	printk("my address is %s\n", in_ntoa(ic_myaddr));
+
+	return 0;
+}
+
+#endif
+
+/*
+ *	IP Autoconfig dispatcher.
+ */
+
+int __init ip_auto_config(void)
+{
+	if (!ic_enable)
+		return 0;
+
+	DBG(("IP-Config: Entered.\n"));
+
+	/* Setup all network devices */
+	if (ic_open_devs() < 0)
+		return -1;
+
+	/*
+	 * If the config information is insufficient (e.g., our IP address or
+	 * IP address of the boot server is missing or we have multiple network
+	 * interfaces and no default was set), use BOOTP or RARP to get the
+	 * missing values.
+	 */
+	if (ic_myaddr == INADDR_NONE ||
+#ifdef CONFIG_ROOT_NFS
+	    (root_server_addr == INADDR_NONE && ic_servaddr == INADDR_NONE) ||
+#endif
+	    ic_first_dev->next) {
+#ifdef CONFIG_IP_PNP_DYNAMIC
+		if (ic_dynamic() < 0) {
+			printk(KERN_ERR "IP-Config: Auto-configuration of network failed.\n");
+			ic_close_devs();
+			return -1;
+		}
+#else
+		printk(KERN_ERR "IP-Config: Incomplete network configuration information.\n");
+		ic_close_devs();
+		return -1;
+#endif
+	} else {
+		ic_dev = ic_first_dev->dev;	/* Device selected manually or only one device -> use it */
+	}
+
+	/*
+	 * Use defaults whereever applicable.
+	 */
+	if (ic_defaults() < 0)
+		return -1;
+
+	/*
+	 * Close all network devices except the device we've
+	 * autoconfigured and set up routes.
+	 */
+	ic_close_devs();
+	if (ic_setup_if() < 0 || ic_setup_routes() < 0)
+		return -1;
+
+	DBG(("IP-Config: device=%s, local=%08x, server=%08x, boot=%08x, gw=%08x, mask=%08x\n",
+	    ic_dev->name, ic_myaddr, ic_servaddr, root_server_addr, ic_gateway, ic_netmask));
+	DBG(("IP-Config: host=%s, domain=%s, path=`%s'\n", system_utsname.nodename,
+	    system_utsname.domainname, root_server_path));
+	return 0;
+}
+
+/*
+ *  Decode any IP configuration options in the "ip=" or "nfsaddrs=" kernel
+ *  command line parameter. It consists of option fields separated by colons in
+ *  the following order:
+ *
+ *  <client-ip>:<server-ip>:<gw-ip>:<netmask>:<host name>:<device>:<bootp|rarp>
+ *
+ *  Any of the fields can be empty which means to use a default value:
+ *	<client-ip>	- address given by BOOTP or RARP
+ *	<server-ip>	- address of host returning BOOTP or RARP packet
+ *	<gw-ip>		- none, or the address returned by BOOTP
+ *	<netmask>	- automatically determined from <client-ip>, or the
+ *			  one returned by BOOTP
+ *	<host name>	- <client-ip> in ASCII notation, or the name returned
+ *			  by BOOTP
+ *	<device>	- use all available devices
+ *	<bootp|rarp|both|off> - use both protocols to determine my own address
+ */
+static int __init ic_proto_name(char *name)
+{
+	if (!strcmp(name, "off")) {
+		ic_proto_enabled = 0;
+		return 1;
+	}
+#ifdef CONFIG_IP_PNP_BOOTP
+	else if (!strcmp(name, "bootp")) {
+		ic_proto_enabled &= ~IC_RARP;
+		return 1;
+	}
+#endif
+#ifdef CONFIG_IP_PNP_RARP
+	else if (!strcmp(name, "rarp")) {
+		ic_proto_enabled &= ~IC_BOOTP;
+		return 1;
+	}
+#endif
+#ifdef CONFIG_IP_PNP_DYNAMIC
+	else if (!strcmp(name, "both")) {
+		return 1;
+	}
+#endif
+	return 0;
+}
+
+void __init ip_auto_config_setup(char *addrs, int *ints)
+{
+	char *cp, *ip, *dp;
+	int num = 0;
+
+	ic_set_manually = 1;
+	if (!strcmp(addrs, "off")) {
+		ic_enable = 0;
+		return;
+	}
+	if (ic_proto_name(addrs))
+		return;
+
+	/* Parse the whole string */
+	ip = addrs;
+	while (ip && *ip) {
+		if ((cp = strchr(ip, ':')))
+			*cp++ = '\0';
+		if (strlen(ip) > 0) {
+			DBG(("IP-Config: Parameter #%d: `%s'\n", num, ip));
+			switch (num) {
+			case 0:
+				if ((ic_myaddr = in_aton(ip)) == INADDR_ANY)
+					ic_myaddr = INADDR_NONE;
+				break;
+			case 1:
+				if ((ic_servaddr = in_aton(ip)) == INADDR_ANY)
+					ic_servaddr = INADDR_NONE;
+				break;
+			case 2:
+				if ((ic_gateway = in_aton(ip)) == INADDR_ANY)
+					ic_gateway = INADDR_NONE;
+				break;
+			case 3:
+				if ((ic_netmask = in_aton(ip)) == INADDR_ANY)
+					ic_netmask = INADDR_NONE;
+				break;
+			case 4:
+				if ((dp = strchr(ip, '.'))) {
+					*dp++ = '\0';
+					strncpy(system_utsname.domainname, dp, __NEW_UTS_LEN);
+					system_utsname.domainname[__NEW_UTS_LEN] = '\0';
+				}
+				strncpy(system_utsname.nodename, ip, __NEW_UTS_LEN);
+				system_utsname.nodename[__NEW_UTS_LEN] = '\0';
+				ic_host_name_set = 1;
+				break;
+			case 5:
+				strncpy(user_dev_name, ip, IFNAMSIZ);
+				user_dev_name[IFNAMSIZ-1] = '\0';
+				break;
+			case 6:
+				ic_proto_name(ip);
+				break;
+			}
+		}
+		ip = cp;
+		num++;
+	}
+}
diff --git a/pfinet/linux-src/net/ipv4/ipip.c b/pfinet/linux-src/net/ipv4/ipip.c
new file mode 100644
index 00000000..0aeef4a3
--- /dev/null
+++ b/pfinet/linux-src/net/ipv4/ipip.c
@@ -0,0 +1,870 @@
+/*
+ *	Linux NET3:	IP/IP protocol decoder. 
+ *
+ *	Version: $Id: ipip.c,v 1.26 1999/03/25 10:04:32 davem Exp $
+ *
+ *	Authors:
+ *		Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
+ *
+ *	Fixes:
+ *		Alan Cox	:	Merged and made usable non modular (its so tiny its silly as
+ *					a module taking up 2 pages).
+ *		Alan Cox	: 	Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
+ *					to keep ip_forward happy.
+ *		Alan Cox	:	More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
+ *		Kai Schulte	:	Fixed #defines for IP_FIREWALL->FIREWALL
+ *              David Woodhouse :       Perform some basic ICMP handling.
+ *                                      IPIP Routing without decapsulation.
+ *              Carlos Picoto   :       GRE over IP support
+ *		Alexey Kuznetsov:	Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
+ *					I do not want to merge them together.
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *
+ */
+
+/* tunnel.c: an IP tunnel driver
+
+	The purpose of this driver is to provide an IP tunnel through
+	which you can tunnel network traffic transparently across subnets.
+
+	This was written by looking at Nick Holloway's dummy driver
+	Thanks for the great code!
+
+		-Sam Lantinga	(slouken@cs.ucdavis.edu)  02/01/95
+		
+	Minor tweaks:
+		Cleaned up the code a little and added some pre-1.3.0 tweaks.
+		dev->hard_header/hard_header_len changed to use no headers.
+		Comments/bracketing tweaked.
+		Made the tunnels use dev->name not tunnel: when error reporting.
+		Added tx_dropped stat
+		
+		-Alan Cox	(Alan.Cox@linux.org) 21 March 95
+
+	Reworked:
+		Changed to tunnel to destination gateway in addition to the
+			tunnel's pointopoint address
+		Almost completely rewritten
+		Note:  There is currently no firewall or ICMP handling done.
+
+		-Sam Lantinga	(slouken@cs.ucdavis.edu) 02/13/96
+		
+*/
+
+/* Things I wish I had known when writing the tunnel driver:
+
+	When the tunnel_xmit() function is called, the skb contains the
+	packet to be sent (plus a great deal of extra info), and dev
+	contains the tunnel device that _we_ are.
+
+	When we are passed a packet, we are expected to fill in the
+	source address with our source IP address.
+
+	What is the proper way to allocate, copy and free a buffer?
+	After you allocate it, it is a "0 length" chunk of memory
+	starting at zero.  If you want to add headers to the buffer
+	later, you'll have to call "skb_reserve(skb, amount)" with
+	the amount of memory you want reserved.  Then, you call
+	"skb_put(skb, amount)" with the amount of space you want in
+	the buffer.  skb_put() returns a pointer to the top (#0) of
+	that buffer.  skb->len is set to the amount of space you have
+	"allocated" with skb_put().  You can then write up to skb->len
+	bytes to that buffer.  If you need more, you can call skb_put()
+	again with the additional amount of space you need.  You can
+	find out how much more space you can allocate by calling 
+	"skb_tailroom(skb)".
+	Now, to add header space, call "skb_push(skb, header_len)".
+	This creates space at the beginning of the buffer and returns
+	a pointer to this new space.  If later you need to strip a
+	header from a buffer, call "skb_pull(skb, header_len)".
+	skb_headroom() will return how much space is left at the top
+	of the buffer (before the main data).  Remember, this headroom
+	space must be reserved before the skb_put() function is called.
+	*/
+
+/*
+   This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
+
+   For comments look at net/ipv4/ip_gre.c --ANK
+ */
+
+ 
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <asm/uaccess.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/in.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/if_arp.h>
+#include <linux/mroute.h>
+#include <linux/init.h>
+
+#include <net/sock.h>
+#include <net/ip.h>
+#include <net/icmp.h>
+#include <net/protocol.h>
+#include <net/ipip.h>
+
+#define HASH_SIZE  16
+#define HASH(addr) ((addr^(addr>>4))&0xF)
+
+static int ipip_fb_tunnel_init(struct device *dev);
+static int ipip_tunnel_init(struct device *dev);
+
+static struct device ipip_fb_tunnel_dev = {
+	NULL, 0x0, 0x0, 0x0, 0x0, 0, 0, 0, 0, 0, NULL, ipip_fb_tunnel_init,
+};
+
+static struct ip_tunnel ipip_fb_tunnel = {
+	NULL, &ipip_fb_tunnel_dev, {0, }, 0, 0, 0, 0, 0, 0, 0, {"tunl0", }
+};
+
+static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
+static struct ip_tunnel *tunnels_r[HASH_SIZE];
+static struct ip_tunnel *tunnels_l[HASH_SIZE];
+static struct ip_tunnel *tunnels_wc[1];
+static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
+
+static struct ip_tunnel * ipip_tunnel_lookup(u32 remote, u32 local)
+{
+	unsigned h0 = HASH(remote);
+	unsigned h1 = HASH(local);
+	struct ip_tunnel *t;
+
+	for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
+		if (local == t->parms.iph.saddr &&
+		    remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
+			return t;
+	}
+	for (t = tunnels_r[h0]; t; t = t->next) {
+		if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
+			return t;
+	}
+	for (t = tunnels_l[h1]; t; t = t->next) {
+		if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
+			return t;
+	}
+	if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
+		return t;
+	return NULL;
+}
+
+static struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
+{
+	u32 remote = t->parms.iph.daddr;
+	u32 local = t->parms.iph.saddr;
+	unsigned h = 0;
+	int prio = 0;
+
+	if (remote) {
+		prio |= 2;
+		h ^= HASH(remote);
+	}
+	if (local) {
+		prio |= 1;
+		h ^= HASH(local);
+	}
+	return &tunnels[prio][h];
+}
+
+
+static void ipip_tunnel_unlink(struct ip_tunnel *t)
+{
+	struct ip_tunnel **tp;
+
+	for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) {
+		if (t == *tp) {
+			*tp = t->next;
+			synchronize_bh();
+			break;
+		}
+	}
+}
+
+static void ipip_tunnel_link(struct ip_tunnel *t)
+{
+	struct ip_tunnel **tp = ipip_bucket(t);
+
+	t->next = *tp;
+	wmb();
+	*tp = t;
+}
+
+struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
+{
+	u32 remote = parms->iph.daddr;
+	u32 local = parms->iph.saddr;
+	struct ip_tunnel *t, **tp, *nt;
+	struct device *dev;
+	unsigned h = 0;
+	int prio = 0;
+
+	if (remote) {
+		prio |= 2;
+		h ^= HASH(remote);
+	}
+	if (local) {
+		prio |= 1;
+		h ^= HASH(local);
+	}
+	for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
+		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
+			return t;
+	}
+	if (!create)
+		return NULL;
+
+	MOD_INC_USE_COUNT;
+	dev = kmalloc(sizeof(*dev) + sizeof(*t), GFP_KERNEL);
+	if (dev == NULL) {
+		MOD_DEC_USE_COUNT;
+		return NULL;
+	}
+	memset(dev, 0, sizeof(*dev) + sizeof(*t));
+	dev->priv = (void*)(dev+1);
+	nt = (struct ip_tunnel*)dev->priv;
+	nt->dev = dev;
+	dev->name = nt->parms.name;
+	dev->init = ipip_tunnel_init;
+	memcpy(&nt->parms, parms, sizeof(*parms));
+	if (dev->name[0] == 0) {
+		int i;
+		for (i=1; i<100; i++) {
+			sprintf(dev->name, "tunl%d", i);
+			if (dev_get(dev->name) == NULL)
+				break;
+		}
+		if (i==100)
+			goto failed;
+		memcpy(parms->name, dev->name, IFNAMSIZ);
+	}
+	if (register_netdevice(dev) < 0)
+		goto failed;
+
+	ipip_tunnel_link(nt);
+	/* Do not decrement MOD_USE_COUNT here. */
+	return nt;
+
+failed:
+	kfree(dev);
+	MOD_DEC_USE_COUNT;
+	return NULL;
+}
+
+
+static void ipip_tunnel_destroy(struct device *dev)
+{
+	if (dev == &ipip_fb_tunnel_dev) {
+		tunnels_wc[0] = NULL;
+		synchronize_bh();
+	} else {
+		ipip_tunnel_unlink((struct ip_tunnel*)dev->priv);
+		kfree(dev);
+		MOD_DEC_USE_COUNT;
+	}
+}
+
+void ipip_err(struct sk_buff *skb, unsigned char *dp, int len)
+{
+#ifndef I_WISH_WORLD_WERE_PERFECT
+
+/* It is not :-( All the routers (except for Linux) return only
+   8 bytes of packet payload. It means, that precise relaying of
+   ICMP in the real Internet is absolutely infeasible.
+ */
+	struct iphdr *iph = (struct iphdr*)dp;
+	int type = skb->h.icmph->type;
+	int code = skb->h.icmph->code;
+	struct ip_tunnel *t;
+
+	if (len < sizeof(struct iphdr))
+		return;
+
+	switch (type) {
+	default:
+	case ICMP_PARAMETERPROB:
+		return;
+
+	case ICMP_DEST_UNREACH:
+		switch (code) {
+		case ICMP_SR_FAILED:
+		case ICMP_PORT_UNREACH:
+			/* Impossible event. */
+			return;
+		case ICMP_FRAG_NEEDED:
+			/* Soft state for pmtu is maintained by IP core. */
+			return;
+		default:
+			/* All others are translated to HOST_UNREACH.
+			   rfc2003 contains "deep thoughts" about NET_UNREACH,
+			   I believe they are just ether pollution. --ANK
+			 */
+			break;
+		}
+		break;
+	case ICMP_TIME_EXCEEDED:
+		if (code != ICMP_EXC_TTL)
+			return;
+		break;
+	}
+
+	t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
+	if (t == NULL || t->parms.iph.daddr == 0)
+		return;
+	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
+		return;
+
+	if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
+		t->err_count++;
+	else
+		t->err_count = 1;
+	t->err_time = jiffies;
+	return;
+#else
+	struct iphdr *iph = (struct iphdr*)dp;
+	int hlen = iph->ihl<<2;
+	struct iphdr *eiph;
+	int type = skb->h.icmph->type;
+	int code = skb->h.icmph->code;
+	int rel_type = 0;
+	int rel_code = 0;
+	int rel_info = 0;
+	struct sk_buff *skb2;
+	struct rtable *rt;
+
+	if (len < hlen + sizeof(struct iphdr))
+		return;
+	eiph = (struct iphdr*)(dp + hlen);
+
+	switch (type) {
+	default:
+		return;
+	case ICMP_PARAMETERPROB:
+		if (skb->h.icmph->un.gateway < hlen)
+			return;
+
+		/* So... This guy found something strange INSIDE encapsulated
+		   packet. Well, he is fool, but what can we do ?
+		 */
+		rel_type = ICMP_PARAMETERPROB;
+		rel_info = skb->h.icmph->un.gateway - hlen;
+		break;
+
+	case ICMP_DEST_UNREACH:
+		switch (code) {
+		case ICMP_SR_FAILED:
+		case ICMP_PORT_UNREACH:
+			/* Impossible event. */
+			return;
+		case ICMP_FRAG_NEEDED:
+			/* And it is the only really necesary thing :-) */
+			rel_info = ntohs(skb->h.icmph->un.frag.mtu);
+			if (rel_info < hlen+68)
+				return;
+			rel_info -= hlen;
+			/* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
+			if (rel_info > ntohs(eiph->tot_len))
+				return;
+			break;
+		default:
+			/* All others are translated to HOST_UNREACH.
+			   rfc2003 contains "deep thoughts" about NET_UNREACH,
+			   I believe, it is just ether pollution. --ANK
+			 */
+			rel_type = ICMP_DEST_UNREACH;
+			rel_code = ICMP_HOST_UNREACH;
+			break;
+		}
+		break;
+	case ICMP_TIME_EXCEEDED:
+		if (code != ICMP_EXC_TTL)
+			return;
+		break;
+	}
+
+	/* Prepare fake skb to feed it to icmp_send */
+	skb2 = skb_clone(skb, GFP_ATOMIC);
+	if (skb2 == NULL)
+		return;
+	dst_release(skb2->dst);
+	skb2->dst = NULL;
+	skb_pull(skb2, skb->data - (u8*)eiph);
+	skb2->nh.raw = skb2->data;
+
+	/* Try to guess incoming interface */
+	if (ip_route_output(&rt, eiph->saddr, 0, RT_TOS(eiph->tos), 0)) {
+		kfree_skb(skb2);
+		return;
+	}
+	skb2->dev = rt->u.dst.dev;
+
+	/* route "incoming" packet */
+	if (rt->rt_flags&RTCF_LOCAL) {
+		ip_rt_put(rt);
+		rt = NULL;
+		if (ip_route_output(&rt, eiph->daddr, eiph->saddr, eiph->tos, 0) ||
+		    rt->u.dst.dev->type != ARPHRD_IPGRE) {
+			ip_rt_put(rt);
+			kfree_skb(skb2);
+			return;
+		}
+	} else {
+		ip_rt_put(rt);
+		if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
+		    skb2->dst->dev->type != ARPHRD_IPGRE) {
+			kfree_skb(skb2);
+			return;
+		}
+	}
+
+	/* change mtu on this route */
+	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
+		if (rel_info > skb2->dst->pmtu) {
+			kfree_skb(skb2);
+			return;
+		}
+		skb2->dst->pmtu = rel_info;
+		rel_info = htonl(rel_info);
+	} else if (type == ICMP_TIME_EXCEEDED) {
+		struct ip_tunnel *t = (struct ip_tunnel*)skb2->dev->priv;
+		if (t->parms.iph.ttl) {
+			rel_type = ICMP_DEST_UNREACH;
+			rel_code = ICMP_HOST_UNREACH;
+		}
+	}
+
+	icmp_send(skb2, rel_type, rel_code, rel_info);
+	kfree_skb(skb2);
+	return;
+#endif
+}
+
+int ipip_rcv(struct sk_buff *skb, unsigned short len)
+{
+	struct iphdr *iph;
+	struct ip_tunnel *tunnel;
+
+	iph = skb->nh.iph;
+	skb->mac.raw = skb->nh.raw;
+	skb->nh.raw = skb_pull(skb, skb->h.raw - skb->data);
+	memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
+	skb->protocol = __constant_htons(ETH_P_IP);
+	skb->ip_summed = 0;
+	skb->pkt_type = PACKET_HOST;
+
+	if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
+		tunnel->stat.rx_packets++;
+		tunnel->stat.rx_bytes += skb->len;
+		skb->dev = tunnel->dev;
+		dst_release(skb->dst);
+		skb->dst = NULL;
+		netif_rx(skb);
+		return 0;
+	}
+
+	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0);
+	kfree_skb(skb);
+	return 0;
+}
+
+/*
+ *	This function assumes it is being called from dev_queue_xmit()
+ *	and that skb is filled properly by that function.
+ */
+
+static int ipip_tunnel_xmit(struct sk_buff *skb, struct device *dev)
+{
+	struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
+	struct net_device_stats *stats = &tunnel->stat;
+	struct iphdr  *tiph = &tunnel->parms.iph;
+	u8     tos = tunnel->parms.iph.tos;
+	u16    df = tiph->frag_off;
+	struct rtable *rt;     			/* Route to the other host */
+	struct device *tdev;			/* Device to other host */
+	struct iphdr  *old_iph = skb->nh.iph;
+	struct iphdr  *iph;			/* Our new IP header */
+	int    max_headroom;			/* The extra header space needed */
+	u32    dst = tiph->daddr;
+	int    mtu;
+
+	if (tunnel->recursion++) {
+		tunnel->stat.collisions++;
+		goto tx_error;
+	}
+
+	if (skb->protocol != __constant_htons(ETH_P_IP))
+		goto tx_error;
+
+	if (tos&1)
+		tos = old_iph->tos;
+
+	if (!dst) {
+		/* NBMA tunnel */
+		if ((rt = (struct rtable*)skb->dst) == NULL) {
+			tunnel->stat.tx_fifo_errors++;
+			goto tx_error;
+		}
+		if ((dst = rt->rt_gateway) == 0)
+			goto tx_error_icmp;
+	}
+
+	if (ip_route_output(&rt, dst, tiph->saddr, RT_TOS(tos), tunnel->parms.link)) {
+		tunnel->stat.tx_carrier_errors++;
+		goto tx_error_icmp;
+	}
+	tdev = rt->u.dst.dev;
+
+	if (tdev == dev) {
+		ip_rt_put(rt);
+		tunnel->stat.collisions++;
+		goto tx_error;
+	}
+
+	mtu = rt->u.dst.pmtu - sizeof(struct iphdr);
+	if (mtu < 68) {
+		tunnel->stat.collisions++;
+		ip_rt_put(rt);
+		goto tx_error;
+	}
+	if (skb->dst && mtu < skb->dst->pmtu)
+		skb->dst->pmtu = mtu;
+
+	df |= (old_iph->frag_off&__constant_htons(IP_DF));
+
+	if ((old_iph->frag_off&__constant_htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
+		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
+		ip_rt_put(rt);
+		goto tx_error;
+	}
+
+	if (tunnel->err_count > 0) {
+		if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
+			tunnel->err_count--;
+			dst_link_failure(skb);
+		} else
+			tunnel->err_count = 0;
+	}
+
+	skb->h.raw = skb->nh.raw;
+
+	/*
+	 * Okay, now see if we can stuff it in the buffer as-is.
+	 */
+	max_headroom = (((tdev->hard_header_len+15)&~15)+sizeof(struct iphdr));
+
+	if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
+		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
+		if (!new_skb) {
+			ip_rt_put(rt);
+  			stats->tx_dropped++;
+			dev_kfree_skb(skb);
+			tunnel->recursion--;
+			return 0;
+		}
+		if (skb->sk)
+			skb_set_owner_w(new_skb, skb->sk);
+		dev_kfree_skb(skb);
+		skb = new_skb;
+	}
+
+	skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
+	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+	dst_release(skb->dst);
+	skb->dst = &rt->u.dst;
+
+	/*
+	 *	Push down and install the IPIP header.
+	 */
+
+	iph 			=	skb->nh.iph;
+	iph->version		=	4;
+	iph->ihl		=	sizeof(struct iphdr)>>2;
+	iph->frag_off		=	df;
+	iph->protocol		=	IPPROTO_IPIP;
+	iph->tos		=	tos;
+	iph->daddr		=	rt->rt_dst;
+	iph->saddr		=	rt->rt_src;
+
+	if ((iph->ttl = tiph->ttl) == 0)
+		iph->ttl	=	old_iph->ttl;
+
+	iph->tot_len		=	htons(skb->len);
+	iph->id			=	htons(ip_id_count++);
+	ip_send_check(iph);
+
+	stats->tx_bytes += skb->len;
+	stats->tx_packets++;
+	ip_send(skb);
+	tunnel->recursion--;
+	return 0;
+
+tx_error_icmp:
+	dst_link_failure(skb);
+tx_error:
+	stats->tx_errors++;
+	dev_kfree_skb(skb);
+	tunnel->recursion--;
+	return 0;
+}
+
+static int
+ipip_tunnel_ioctl (struct device *dev, struct ifreq *ifr, int cmd)
+{
+	int err = 0;
+	struct ip_tunnel_parm p;
+	struct ip_tunnel *t;
+
+	MOD_INC_USE_COUNT;
+
+	switch (cmd) {
+	case SIOCGETTUNNEL:
+		t = NULL;
+		if (dev == &ipip_fb_tunnel_dev) {
+			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
+				err = -EFAULT;
+				break;
+			}
+			t = ipip_tunnel_locate(&p, 0);
+		}
+		if (t == NULL)
+			t = (struct ip_tunnel*)dev->priv;
+		memcpy(&p, &t->parms, sizeof(p));
+		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+			err = -EFAULT;
+		break;
+
+	case SIOCADDTUNNEL:
+	case SIOCCHGTUNNEL:
+		err = -EPERM;
+		if (!capable(CAP_NET_ADMIN))
+			goto done;
+
+		err = -EFAULT;
+		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+			goto done;
+
+		err = -EINVAL;
+		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
+		    p.iph.ihl != 5 || (p.iph.frag_off&__constant_htons(~IP_DF)))
+			goto done;
+		if (p.iph.ttl)
+			p.iph.frag_off |= __constant_htons(IP_DF);
+
+		t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
+
+		if (dev != &ipip_fb_tunnel_dev && cmd == SIOCCHGTUNNEL &&
+		    t != &ipip_fb_tunnel) {
+			if (t != NULL) {
+				if (t->dev != dev) {
+					err = -EEXIST;
+					break;
+				}
+			} else {
+				if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
+				    (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
+					err = -EINVAL;
+					break;
+				}
+				t = (struct ip_tunnel*)dev->priv;
+				start_bh_atomic();
+				ipip_tunnel_unlink(t);
+				t->parms.iph.saddr = p.iph.saddr;
+				t->parms.iph.daddr = p.iph.daddr;
+				memcpy(dev->dev_addr, &p.iph.saddr, 4);
+				memcpy(dev->broadcast, &p.iph.daddr, 4);
+				ipip_tunnel_link(t);
+				end_bh_atomic();
+				netdev_state_change(dev);
+			}
+		}
+
+		if (t) {
+			err = 0;
+			if (cmd == SIOCCHGTUNNEL) {
+				t->parms.iph.ttl = p.iph.ttl;
+				t->parms.iph.tos = p.iph.tos;
+				t->parms.iph.frag_off = p.iph.frag_off;
+			}
+			if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
+				err = -EFAULT;
+		} else
+			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
+		break;
+
+	case SIOCDELTUNNEL:
+		err = -EPERM;
+		if (!capable(CAP_NET_ADMIN))
+			goto done;
+
+		if (dev == &ipip_fb_tunnel_dev) {
+			err = -EFAULT;
+			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+				goto done;
+			err = -ENOENT;
+			if ((t = ipip_tunnel_locate(&p, 0)) == NULL)
+				goto done;
+			err = -EPERM;
+			if (t == &ipip_fb_tunnel)
+				goto done;
+		}
+		err = unregister_netdevice(dev);
+		break;
+
+	default:
+		err = -EINVAL;
+	}
+
+done:
+	MOD_DEC_USE_COUNT;
+	return err;
+}
+
+static struct net_device_stats *ipip_tunnel_get_stats(struct device *dev)
+{
+	return &(((struct ip_tunnel*)dev->priv)->stat);
+}
+
+static int ipip_tunnel_change_mtu(struct device *dev, int new_mtu)
+{
+	if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
+		return -EINVAL;
+	dev->mtu = new_mtu;
+	return 0;
+}
+
+static void ipip_tunnel_init_gen(struct device *dev)
+{
+	struct ip_tunnel *t = (struct ip_tunnel*)dev->priv;
+
+	dev->destructor		= ipip_tunnel_destroy;
+	dev->hard_start_xmit	= ipip_tunnel_xmit;
+	dev->get_stats		= ipip_tunnel_get_stats;
+	dev->do_ioctl		= ipip_tunnel_ioctl;
+	dev->change_mtu		= ipip_tunnel_change_mtu;
+
+	dev_init_buffers(dev);
+
+	dev->type		= ARPHRD_TUNNEL;
+	dev->hard_header_len 	= LL_MAX_HEADER + sizeof(struct iphdr);
+	dev->mtu		= 1500 - sizeof(struct iphdr);
+	dev->flags		= IFF_NOARP;
+	dev->iflink		= 0;
+	dev->addr_len		= 4;
+	memcpy(dev->dev_addr, &t->parms.iph.saddr, 4);
+	memcpy(dev->broadcast, &t->parms.iph.daddr, 4);
+}
+
+static int ipip_tunnel_init(struct device *dev)
+{
+	struct device *tdev = NULL;
+	struct ip_tunnel *tunnel;
+	struct iphdr *iph;
+
+	tunnel = (struct ip_tunnel*)dev->priv;
+	iph = &tunnel->parms.iph;
+
+	ipip_tunnel_init_gen(dev);
+
+	if (iph->daddr) {
+		struct rtable *rt;
+		if (!ip_route_output(&rt, iph->daddr, iph->saddr, RT_TOS(iph->tos), tunnel->parms.link)) {
+			tdev = rt->u.dst.dev;
+			ip_rt_put(rt);
+		}
+		dev->flags |= IFF_POINTOPOINT;
+	}
+
+	if (!tdev && tunnel->parms.link)
+		tdev = dev_get_by_index(tunnel->parms.link);
+
+	if (tdev) {
+		dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
+		dev->mtu = tdev->mtu - sizeof(struct iphdr);
+	}
+	dev->iflink = tunnel->parms.link;
+
+	return 0;
+}
+
+#ifdef MODULE
+static int ipip_fb_tunnel_open(struct device *dev)
+{
+	MOD_INC_USE_COUNT;
+	return 0;
+}
+
+static int ipip_fb_tunnel_close(struct device *dev)
+{
+	MOD_DEC_USE_COUNT;
+	return 0;
+}
+#endif
+
+__initfunc(int ipip_fb_tunnel_init(struct device *dev))
+{
+	struct iphdr *iph;
+
+	ipip_tunnel_init_gen(dev);
+#ifdef MODULE
+	dev->open		= ipip_fb_tunnel_open;
+	dev->stop		= ipip_fb_tunnel_close;
+#endif
+
+	iph = &ipip_fb_tunnel.parms.iph;
+	iph->version		= 4;
+	iph->protocol		= IPPROTO_IPIP;
+	iph->ihl		= 5;
+
+	tunnels_wc[0]		= &ipip_fb_tunnel;
+	return 0;
+}
+
+static struct inet_protocol ipip_protocol = {
+  ipip_rcv,             /* IPIP handler          */
+  ipip_err,             /* TUNNEL error control */
+  0,                    /* next                 */
+  IPPROTO_IPIP,         /* protocol ID          */
+  0,                    /* copy                 */
+  NULL,                 /* data                 */
+  "IPIP"                /* name                 */
+};
+
+#ifdef MODULE
+int init_module(void) 
+#else
+__initfunc(int ipip_init(void))
+#endif
+{
+	printk(KERN_INFO "IPv4 over IPv4 tunneling driver\n");
+
+	ipip_fb_tunnel_dev.priv = (void*)&ipip_fb_tunnel;
+	ipip_fb_tunnel_dev.name = ipip_fb_tunnel.parms.name;
+#ifdef MODULE
+	register_netdev(&ipip_fb_tunnel_dev);
+#else
+	register_netdevice(&ipip_fb_tunnel_dev);
+#endif
+
+	inet_add_protocol(&ipip_protocol);
+	return 0;
+}
+
+#ifdef MODULE
+
+void cleanup_module(void)
+{
+	if ( inet_del_protocol(&ipip_protocol) < 0 )
+		printk(KERN_INFO "ipip close: can't remove protocol\n");
+
+	unregister_netdevice(&ipip_fb_tunnel_dev);
+}
+
+#endif
diff --git a/pfinet/linux-src/net/ipv4/ipmr.c b/pfinet/linux-src/net/ipv4/ipmr.c
new file mode 100644
index 00000000..cd51cd9a
--- /dev/null
+++ b/pfinet/linux-src/net/ipv4/ipmr.c
@@ -0,0 +1,1609 @@
+/*
+ *	IP multicast routing support for mrouted 3.6/3.8
+ *
+ *		(c) 1995 Alan Cox, <alan@redhat.com>
+ *	  Linux Consultancy and Custom Driver Development
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *
+ *	Version: $Id: ipmr.c,v 1.40.2.2 1999/06/20 21:27:44 davem Exp $
+ *
+ *	Fixes:
+ *	Michael Chastain	:	Incorrect size of copying.
+ *	Alan Cox		:	Added the cache manager code
+ *	Alan Cox		:	Fixed the clone/copy bug and device race.
+ *	Mike McLagan		:	Routing by source
+ *	Malcolm Beattie		:	Buffer handling fixes.
+ *	Alexey Kuznetsov	:	Double buffer free and other fixes.
+ *	SVR Anand		:	Fixed several multicast bugs and problems.
+ *	Alexey Kuznetsov	:	Status, optimisations and more.
+ *	Brad Parker		:	Better behaviour on mrouted upcall
+ *					overflow.
+ *      Carlos Picoto           :       PIMv1 Support
+ *	Pavlin Ivanov Radoslavov:	PIMv2 Registers must checksum only PIM header
+ *					Relax this requrement to work with older peers.
+ *
+ */
+
+#include <linux/config.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/timer.h>
+#include <linux/mm.h>
+#include <linux/kernel.h>
+#include <linux/fcntl.h>
+#include <linux/stat.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/igmp.h>
+#include <linux/proc_fs.h>
+#include <linux/mroute.h>
+#include <linux/init.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/icmp.h>
+#include <net/udp.h>
+#include <net/raw.h>
+#include <linux/notifier.h>
+#include <linux/if_arp.h>
+#include <linux/ip_fw.h>
+#include <linux/firewall.h>
+#include <net/ipip.h>
+#include <net/checksum.h>
+
+#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
+#define CONFIG_IP_PIMSM	1
+#endif
+
+/*
+ *	Multicast router control variables
+ */
+
+static struct vif_device vif_table[MAXVIFS];		/* Devices 		*/
+static unsigned long vifc_map;				/* Active device map	*/
+static int maxvif;
+int mroute_do_assert = 0;				/* Set in PIM assert	*/
+int mroute_do_pim = 0;
+static struct mfc_cache *mfc_cache_array[MFC_LINES];	/* Forwarding cache	*/
+int cache_resolve_queue_len = 0;			/* Size of unresolved	*/
+
+static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
+static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
+static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
+
+extern struct inet_protocol pim_protocol;
+
+static
+struct device *ipmr_new_tunnel(struct vifctl *v)
+{
+	struct device  *dev = NULL;
+
+	rtnl_lock();
+	dev = dev_get("tunl0");
+
+	if (dev) {
+		int err;
+		struct ifreq ifr;
+		mm_segment_t	oldfs;
+		struct ip_tunnel_parm p;
+		struct in_device  *in_dev;
+
+		memset(&p, 0, sizeof(p));
+		p.iph.daddr = v->vifc_rmt_addr.s_addr;
+		p.iph.saddr = v->vifc_lcl_addr.s_addr;
+		p.iph.version = 4;
+		p.iph.ihl = 5;
+		p.iph.protocol = IPPROTO_IPIP;
+		sprintf(p.name, "dvmrp%d", v->vifc_vifi);
+		ifr.ifr_ifru.ifru_data = (void*)&p;
+
+		oldfs = get_fs(); set_fs(KERNEL_DS);
+		err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL);
+		set_fs(oldfs);
+
+		if (err == 0 && (dev = dev_get(p.name)) != NULL) {
+			dev->flags |= IFF_MULTICAST;
+
+			in_dev = dev->ip_ptr;
+			if (in_dev == NULL && (in_dev = inetdev_init(dev)) == NULL)
+				goto failure;
+			in_dev->cnf.rp_filter = 0;
+
+			if (dev_open(dev))
+				goto failure;
+		}
+	}
+	rtnl_unlock();
+	return dev;
+
+failure:
+	unregister_netdevice(dev);
+	rtnl_unlock();
+	return NULL;
+}
+
+#ifdef CONFIG_IP_PIMSM
+
+static int reg_vif_num = -1;
+static struct device * reg_dev;
+
+static int reg_vif_xmit(struct sk_buff *skb, struct device *dev)
+{
+	((struct net_device_stats*)dev->priv)->tx_bytes += skb->len;
+	((struct net_device_stats*)dev->priv)->tx_packets++;
+	ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
+	kfree_skb(skb);
+	return 0;
+}
+
+static struct net_device_stats *reg_vif_get_stats(struct device *dev)
+{
+	return (struct net_device_stats*)dev->priv;
+}
+
+static
+struct device *ipmr_reg_vif(struct vifctl *v)
+{
+	struct device  *dev;
+	struct in_device *in_dev;
+	int size;
+
+	size = sizeof(*dev) + IFNAMSIZ + sizeof(struct net_device_stats);
+	dev = kmalloc(size, GFP_KERNEL);
+	if (!dev)
+		return NULL;
+
+	memset(dev, 0, size);
+
+	dev->priv = dev + 1;
+	dev->name = dev->priv + sizeof(struct net_device_stats);
+
+	strcpy(dev->name, "pimreg");
+
+	dev->type		= ARPHRD_PIMREG;
+	dev->mtu		= 1500 - sizeof(struct iphdr) - 8;
+	dev->flags		= IFF_NOARP;
+	dev->hard_start_xmit	= reg_vif_xmit;
+	dev->get_stats		= reg_vif_get_stats;
+
+	rtnl_lock();
+
+	if (register_netdevice(dev)) {
+		rtnl_unlock();
+		kfree(dev);
+		return NULL;
+	}
+	dev->iflink = 0;
+
+	if ((in_dev = inetdev_init(dev)) == NULL)
+		goto failure;
+
+	in_dev->cnf.rp_filter = 0;
+
+	if (dev_open(dev))
+		goto failure;
+
+	rtnl_unlock();
+	reg_dev = dev;
+	return dev;
+
+failure:
+	unregister_netdevice(dev);
+	rtnl_unlock();
+	kfree(dev);
+	return NULL;
+}
+#endif
+
+/*
+ *	Delete a VIF entry
+ */
+ 
+static int vif_delete(int vifi)
+{
+	struct vif_device *v;
+	struct device *dev;
+	struct in_device *in_dev;
+	
+	if (vifi < 0 || vifi >= maxvif || !(vifc_map&(1<<vifi)))
+		return -EADDRNOTAVAIL;
+
+	v = &vif_table[vifi];
+
+	dev = v->dev;
+	v->dev = NULL;
+	vifc_map &= ~(1<<vifi);
+
+	if ((in_dev = dev->ip_ptr) != NULL)
+		in_dev->cnf.mc_forwarding = 0;
+
+	dev_set_allmulti(dev, -1);
+	ip_rt_multicast_event(in_dev);
+
+	if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER)) {
+#ifdef CONFIG_IP_PIMSM
+		if (vifi == reg_vif_num) {
+			reg_vif_num = -1;
+			reg_dev = NULL;
+		}
+#endif
+		unregister_netdevice(dev);
+		if (v->flags&VIFF_REGISTER)
+			kfree(dev);
+	}
+
+	if (vifi+1 == maxvif) {
+		int tmp;
+		for (tmp=vifi-1; tmp>=0; tmp--) {
+			if (vifc_map&(1<<tmp))
+				break;
+		}
+		maxvif = tmp+1;
+	}
+	return 0;
+}
+
+static void ipmr_update_threshoulds(struct mfc_cache *cache, unsigned char *ttls)
+{
+	int vifi;
+
+	start_bh_atomic();
+
+	cache->mfc_minvif = MAXVIFS;
+	cache->mfc_maxvif = 0;
+	memset(cache->mfc_ttls, 255, MAXVIFS);
+
+	for (vifi=0; vifi<maxvif; vifi++) {
+		if (vifc_map&(1<<vifi) && ttls[vifi] && ttls[vifi] < 255) {
+			cache->mfc_ttls[vifi] = ttls[vifi];
+			if (cache->mfc_minvif > vifi)
+				cache->mfc_minvif = vifi;
+			if (cache->mfc_maxvif <= vifi)
+				cache->mfc_maxvif = vifi + 1;
+		}
+	}
+	end_bh_atomic();
+}
+
+/*
+ *	Delete a multicast route cache entry
+ */
+ 
+static void ipmr_cache_delete(struct mfc_cache *cache)
+{
+	struct sk_buff *skb;
+	int line;
+	struct mfc_cache **cp;
+	
+	/*
+	 *	Find the right cache line
+	 */
+
+	line=MFC_HASH(cache->mfc_mcastgrp,cache->mfc_origin);
+	cp=&(mfc_cache_array[line]);
+
+	if(cache->mfc_flags&MFC_QUEUED)
+		del_timer(&cache->mfc_timer);
+	
+	/*
+	 *	Unlink the buffer
+	 */
+
+	while(*cp!=NULL)
+	{
+		if(*cp==cache)
+		{
+			*cp=cache->next;
+			break;
+		}
+		cp=&((*cp)->next);
+	}
+
+	/*
+	 *	Free the buffer. If it is a pending resolution
+	 *	clean up the other resources.
+	 */
+
+	if(cache->mfc_flags&MFC_QUEUED)
+	{
+		cache_resolve_queue_len--;
+		while((skb=skb_dequeue(&cache->mfc_unresolved))) {
+#ifdef CONFIG_RTNETLINK
+			if (skb->nh.iph->version == 0) {
+				struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
+				nlh->nlmsg_type = NLMSG_ERROR;
+				nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
+				skb_trim(skb, nlh->nlmsg_len);
+				((struct nlmsgerr*)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
+				netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT);
+			} else
+#endif
+			kfree_skb(skb);
+		}
+	}
+	kfree_s(cache,sizeof(cache));
+}
+
+/*
+ *	Cache expiry timer
+ */	
+ 
+static void ipmr_cache_timer(unsigned long data)
+{
+	struct mfc_cache *cache=(struct mfc_cache *)data;
+	ipmr_cache_delete(cache);
+}
+
+/*
+ *	Insert a multicast cache entry
+ */
+
+static void ipmr_cache_insert(struct mfc_cache *c)
+{
+	int line=MFC_HASH(c->mfc_mcastgrp,c->mfc_origin);
+	c->next=mfc_cache_array[line];
+	mfc_cache_array[line]=c;
+}
+ 
+/*
+ *	Find a multicast cache entry
+ */
+ 
+struct mfc_cache *ipmr_cache_find(__u32 origin, __u32 mcastgrp)
+{
+	int line=MFC_HASH(mcastgrp,origin);
+	struct mfc_cache *cache;
+
+	cache=mfc_cache_array[line];
+	while(cache!=NULL)
+	{
+		if(cache->mfc_origin==origin && cache->mfc_mcastgrp==mcastgrp)
+			return cache;
+		cache=cache->next;
+	}
+	return NULL;
+}
+
+/*
+ *	Allocate a multicast cache entry
+ */
+ 
+static struct mfc_cache *ipmr_cache_alloc(int priority)
+{
+	struct mfc_cache *c=(struct mfc_cache *)kmalloc(sizeof(struct mfc_cache), priority);
+	if(c==NULL)
+		return NULL;
+	memset(c, 0, sizeof(*c));
+	skb_queue_head_init(&c->mfc_unresolved);
+	init_timer(&c->mfc_timer);
+	c->mfc_timer.data=(long)c;
+	c->mfc_timer.function=ipmr_cache_timer;
+	c->mfc_minvif = MAXVIFS;
+	return c;
+}
+ 
+/*
+ *	A cache entry has gone into a resolved state from queued
+ */
+ 
+static void ipmr_cache_resolve(struct mfc_cache *cache)
+{
+	struct sk_buff *skb;
+
+	start_bh_atomic();
+
+	/*
+	 *	Kill the queue entry timer.
+	 */
+
+	del_timer(&cache->mfc_timer);
+
+	if (cache->mfc_flags&MFC_QUEUED) {
+		cache->mfc_flags&=~MFC_QUEUED;
+		cache_resolve_queue_len--;
+	}
+
+	end_bh_atomic();
+
+	/*
+	 *	Play the pending entries through our router
+	 */
+	while((skb=skb_dequeue(&cache->mfc_unresolved))) {
+#ifdef CONFIG_RTNETLINK
+		if (skb->nh.iph->version == 0) {
+			int err;
+			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
+
+			if (ipmr_fill_mroute(skb, cache, NLMSG_DATA(nlh)) > 0) {
+				nlh->nlmsg_len = skb->tail - (u8*)nlh;
+			} else {
+				nlh->nlmsg_type = NLMSG_ERROR;
+				nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
+				skb_trim(skb, nlh->nlmsg_len);
+				((struct nlmsgerr*)NLMSG_DATA(nlh))->error = -EMSGSIZE;
+			}
+			err = netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT);
+		} else
+#endif
+			ip_mr_forward(skb, cache, 0);
+	}
+}
+
+/*
+ *	Bounce a cache query up to mrouted. We could use netlink for this but mrouted
+ *	expects the following bizarre scheme..
+ */
+ 
+static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
+{
+	struct sk_buff *skb;
+	int ihl = pkt->nh.iph->ihl<<2;
+	struct igmphdr *igmp;
+	struct igmpmsg *msg;
+	int ret;
+
+	if (mroute_socket==NULL)
+		return -EINVAL;
+
+#ifdef CONFIG_IP_PIMSM
+	if (assert == IGMPMSG_WHOLEPKT)
+		skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
+	else
+#endif
+		skb = alloc_skb(128, GFP_ATOMIC);
+
+	if(!skb)
+		return -ENOBUFS;
+
+#ifdef CONFIG_IP_PIMSM
+	if (assert == IGMPMSG_WHOLEPKT) {
+		/* Ugly, but we have no choice with this interface.
+		   Duplicate old header, fix ihl, length etc.
+		   And all this only to mangle msg->im_msgtype and
+		   to set msg->im_mbz to "mbz" :-)
+		 */
+		msg = (struct igmpmsg*)skb_push(skb, sizeof(struct iphdr));
+		skb->nh.raw = skb->h.raw = (u8*)msg;
+		memcpy(msg, pkt->nh.raw, sizeof(struct iphdr));
+		msg->im_msgtype = IGMPMSG_WHOLEPKT;
+		msg->im_mbz = 0;
+ 		msg->im_vif = reg_vif_num;
+		skb->nh.iph->ihl = sizeof(struct iphdr) >> 2;
+		skb->nh.iph->tot_len = htons(ntohs(pkt->nh.iph->tot_len) + sizeof(struct iphdr));
+	} else 
+#endif
+	{	
+		
+	/*
+	 *	Copy the IP header
+	 */
+
+	skb->nh.iph = (struct iphdr *)skb_put(skb, ihl);
+	memcpy(skb->data,pkt->data,ihl);
+	skb->nh.iph->protocol = 0;			/* Flag to the kernel this is a route add */
+	msg = (struct igmpmsg*)skb->nh.iph;
+	msg->im_vif = vifi;
+	skb->dst = dst_clone(pkt->dst);
+
+	/*
+	 *	Add our header
+	 */
+
+	igmp=(struct igmphdr *)skb_put(skb,sizeof(struct igmphdr));
+	igmp->type	=
+	msg->im_msgtype = assert;
+	igmp->code 	=	0;
+	skb->nh.iph->tot_len=htons(skb->len);			/* Fix the length */
+	skb->h.raw = skb->nh.raw;
+        }
+	
+	/*
+	 *	Deliver to mrouted
+	 */
+	if ((ret=sock_queue_rcv_skb(mroute_socket,skb))<0) {
+		if (net_ratelimit())
+			printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
+		kfree_skb(skb);
+	}
+
+	return ret;
+}
+
+/*
+ *	Queue a packet for resolution
+ */
+ 
+static int ipmr_cache_unresolved(struct mfc_cache *cache, vifi_t vifi, struct sk_buff *skb)
+{
+	if(cache==NULL)
+	{	
+		/*
+		 *	Create a new entry if allowable
+		 */
+		if(cache_resolve_queue_len>=10 || (cache=ipmr_cache_alloc(GFP_ATOMIC))==NULL)
+		{
+			kfree_skb(skb);
+			return -ENOBUFS;
+		}
+		/*
+		 *	Fill in the new cache entry
+		 */
+		cache->mfc_parent=ALL_VIFS;
+		cache->mfc_origin=skb->nh.iph->saddr;
+		cache->mfc_mcastgrp=skb->nh.iph->daddr;
+		cache->mfc_flags=MFC_QUEUED;
+		/*
+		 *	Link to the unresolved list
+		 */
+		ipmr_cache_insert(cache);
+		cache_resolve_queue_len++;
+		/*
+		 *	Fire off the expiry timer
+		 */
+		cache->mfc_timer.expires=jiffies+10*HZ;
+		add_timer(&cache->mfc_timer);
+		/*
+		 *	Reflect first query at mrouted.
+		 */
+		if(mroute_socket)
+		{
+			/* If the report failed throw the cache entry 
+			   out - Brad Parker
+
+			   OK, OK, Brad. Only do not forget to free skb
+			   and return :-) --ANK
+			 */
+			if (ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE)<0) {
+				ipmr_cache_delete(cache);
+				kfree_skb(skb);
+				return -ENOBUFS;
+			}
+		}
+	}
+	/*
+	 *	See if we can append the packet
+	 */
+	if(cache->mfc_queuelen>3)
+	{
+		kfree_skb(skb);
+		return -ENOBUFS;
+	}
+	cache->mfc_queuelen++;
+	skb_queue_tail(&cache->mfc_unresolved,skb);
+	return 0;
+}
+
+/*
+ *	MFC cache manipulation by user space mroute daemon
+ */
+ 
+int ipmr_mfc_modify(int action, struct mfcctl *mfc)
+{
+	struct mfc_cache *cache;
+
+	if(!MULTICAST(mfc->mfcc_mcastgrp.s_addr))
+		return -EINVAL;
+	/*
+	 *	Find the cache line
+	 */
+	
+	start_bh_atomic();
+
+	cache=ipmr_cache_find(mfc->mfcc_origin.s_addr,mfc->mfcc_mcastgrp.s_addr);
+	
+	/*
+	 *	Delete an entry
+	 */
+	if(action==MRT_DEL_MFC)
+	{
+		if(cache)
+		{
+			ipmr_cache_delete(cache);
+			end_bh_atomic();
+			return 0;
+		}
+		end_bh_atomic();
+		return -ENOENT;
+	}
+	if(cache)
+	{
+
+		/*
+		 *	Update the cache, see if it frees a pending queue
+		 */
+
+		cache->mfc_flags|=MFC_RESOLVED;
+		cache->mfc_parent=mfc->mfcc_parent;
+		ipmr_update_threshoulds(cache, mfc->mfcc_ttls);
+		 
+		/*
+		 *	Check to see if we resolved a queued list. If so we
+		 *	need to send on the frames and tidy up.
+		 */
+		 
+		if(cache->mfc_flags&MFC_QUEUED)
+			ipmr_cache_resolve(cache);	/* Unhook & send the frames */
+		end_bh_atomic();
+		return 0;
+	}
+
+	/*
+	 *	Unsolicited update - that's ok, add anyway.
+	 */
+	 
+	
+	cache=ipmr_cache_alloc(GFP_ATOMIC);
+	if(cache==NULL)
+	{
+		end_bh_atomic();
+		return -ENOMEM;
+	}
+	cache->mfc_flags=MFC_RESOLVED;
+	cache->mfc_origin=mfc->mfcc_origin.s_addr;
+	cache->mfc_mcastgrp=mfc->mfcc_mcastgrp.s_addr;
+	cache->mfc_parent=mfc->mfcc_parent;
+	ipmr_update_threshoulds(cache, mfc->mfcc_ttls);
+	ipmr_cache_insert(cache);
+	end_bh_atomic();
+	return 0;
+}
+
+static void mrtsock_destruct(struct sock *sk)
+{
+	if (sk == mroute_socket) {
+		ipv4_devconf.mc_forwarding = 0;
+
+		mroute_socket=NULL;
+		synchronize_bh();
+
+		mroute_close(sk);
+	}
+}
+
+/*
+ *	Socket options and virtual interface manipulation. The whole
+ *	virtual interface system is a complete heap, but unfortunately
+ *	that's how BSD mrouted happens to think. Maybe one day with a proper
+ *	MOSPF/PIM router set up we can clean this up.
+ */
+ 
+int ip_mroute_setsockopt(struct sock *sk,int optname,char *optval,int optlen)
+{
+	struct vifctl vif;
+	struct mfcctl mfc;
+	
+	if(optname!=MRT_INIT)
+	{
+		if(sk!=mroute_socket)
+			return -EACCES;
+	}
+	
+	switch(optname)
+	{
+		case MRT_INIT:
+			if(sk->type!=SOCK_RAW || sk->num!=IPPROTO_IGMP)
+				return -EOPNOTSUPP;
+			if(optlen!=sizeof(int))
+				return -ENOPROTOOPT;
+			{
+				int opt;
+				if (get_user(opt,(int *)optval))
+					return -EFAULT;
+				if (opt != 1)
+					return -ENOPROTOOPT;
+			}
+			if(mroute_socket)
+				return -EADDRINUSE;
+			mroute_socket=sk;
+			ipv4_devconf.mc_forwarding = 1;
+			if (ip_ra_control(sk, 1, mrtsock_destruct) == 0)
+				return 0;
+			mrtsock_destruct(sk);
+			return -EADDRINUSE;
+		case MRT_DONE:
+			return ip_ra_control(sk, 0, NULL);
+		case MRT_ADD_VIF:
+		case MRT_DEL_VIF:
+			if(optlen!=sizeof(vif))
+				return -EINVAL;
+			if (copy_from_user(&vif,optval,sizeof(vif)))
+				return -EFAULT; 
+			if(vif.vifc_vifi >= MAXVIFS)
+				return -ENFILE;
+			if(optname==MRT_ADD_VIF)
+			{
+				struct vif_device *v=&vif_table[vif.vifc_vifi];
+				struct device *dev;
+				struct in_device *in_dev;
+
+				/* Is vif busy ? */
+				if (vifc_map&(1<<vif.vifc_vifi))
+					return -EADDRINUSE;
+
+				switch (vif.vifc_flags) {
+#ifdef CONFIG_IP_PIMSM
+				case VIFF_REGISTER:
+
+				/*
+				 * Special Purpose VIF in PIM
+				 * All the packets will be sent to the daemon
+				 */
+					if (reg_vif_num >= 0)
+						return -EADDRINUSE;
+					reg_vif_num = vif.vifc_vifi;
+					dev = ipmr_reg_vif(&vif);
+					if (!dev) {
+						reg_vif_num = -1;
+						return -ENOBUFS;
+					}
+					break;
+#endif
+				case VIFF_TUNNEL:	
+					dev = ipmr_new_tunnel(&vif);
+					if (!dev)
+						return -ENOBUFS;
+					break;
+				case 0:	
+					dev=ip_dev_find(vif.vifc_lcl_addr.s_addr);
+					if (!dev)
+						return -EADDRNOTAVAIL;
+					break;
+				default:
+#if 0
+					printk(KERN_DEBUG "ipmr_add_vif: flags %02x\n", vif.vifc_flags);
+#endif
+					return -EINVAL;
+				}
+
+				if ((in_dev = dev->ip_ptr) == NULL)
+					return -EADDRNOTAVAIL;
+				if (in_dev->cnf.mc_forwarding)
+					return -EADDRINUSE;
+				in_dev->cnf.mc_forwarding = 1;
+				dev_set_allmulti(dev, +1);
+				ip_rt_multicast_event(in_dev);
+
+				/*
+				 *	Fill in the VIF structures
+				 */
+				start_bh_atomic();
+				v->rate_limit=vif.vifc_rate_limit;
+				v->local=vif.vifc_lcl_addr.s_addr;
+				v->remote=vif.vifc_rmt_addr.s_addr;
+				v->flags=vif.vifc_flags;
+				v->threshold=vif.vifc_threshold;
+				v->dev=dev;
+				v->bytes_in = 0;
+				v->bytes_out = 0;
+				v->pkt_in = 0;
+				v->pkt_out = 0;
+				v->link = dev->ifindex;
+				if (vif.vifc_flags&(VIFF_TUNNEL|VIFF_REGISTER))
+					v->link = dev->iflink;
+				vifc_map|=(1<<vif.vifc_vifi);
+				if (vif.vifc_vifi+1 > maxvif)
+					maxvif = vif.vifc_vifi+1;
+				end_bh_atomic();
+				return 0;
+			} else {
+				int ret;
+				rtnl_lock();
+				ret = vif_delete(vif.vifc_vifi);
+				rtnl_unlock();
+				return ret;
+			}
+
+		/*
+		 *	Manipulate the forwarding caches. These live
+		 *	in a sort of kernel/user symbiosis.
+		 */
+		case MRT_ADD_MFC:
+		case MRT_DEL_MFC:
+			if(optlen!=sizeof(mfc))
+				return -EINVAL;
+			if (copy_from_user(&mfc,optval, sizeof(mfc)))
+				return -EFAULT;
+			return ipmr_mfc_modify(optname, &mfc);
+		/*
+		 *	Control PIM assert.
+		 */
+		case MRT_ASSERT:
+		{
+			int v;
+			if(get_user(v,(int *)optval))
+				return -EFAULT;
+			mroute_do_assert=(v)?1:0;
+			return 0;
+		}
+#ifdef CONFIG_IP_PIMSM
+		case MRT_PIM:
+		{
+			int v;
+			if(get_user(v,(int *)optval))
+				return -EFAULT;
+			v = (v)?1:0;
+			if (v != mroute_do_pim) {
+				mroute_do_pim = v;
+				mroute_do_assert = v;
+#ifdef CONFIG_IP_PIMSM_V2
+				if (mroute_do_pim)
+					inet_add_protocol(&pim_protocol);
+				else
+					inet_del_protocol(&pim_protocol);
+#endif
+			}
+			return 0;
+		}
+#endif
+		/*
+		 *	Spurious command, or MRT_VERSION which you cannot
+		 *	set.
+		 */
+		default:
+			return -ENOPROTOOPT;
+	}
+}
+
+/*
+ *	Getsock opt support for the multicast routing system.
+ */
+ 
+int ip_mroute_getsockopt(struct sock *sk,int optname,char *optval,int *optlen)
+{
+	int olr;
+	int val;
+
+	if(sk!=mroute_socket)
+		return -EACCES;
+	if(optname!=MRT_VERSION && 
+#ifdef CONFIG_IP_PIMSM
+	   optname!=MRT_PIM &&
+#endif
+	   optname!=MRT_ASSERT)
+		return -ENOPROTOOPT;
+	
+	if(get_user(olr, optlen))
+		return -EFAULT;
+
+	olr=min(olr,sizeof(int));
+	if(put_user(olr,optlen))
+		return -EFAULT;
+	if(optname==MRT_VERSION)
+		val=0x0305;
+#ifdef CONFIG_IP_PIMSM
+	else if(optname==MRT_PIM)
+		val=mroute_do_pim;
+#endif
+	else
+		val=mroute_do_assert;
+	if(copy_to_user(optval,&val,olr))
+		return -EFAULT;
+	return 0;
+}
+
+/*
+ *	The IP multicast ioctl support routines.
+ */
+ 
+int ipmr_ioctl(struct sock *sk, int cmd, unsigned long arg)
+{
+	struct sioc_sg_req sr;
+	struct sioc_vif_req vr;
+	struct vif_device *vif;
+	struct mfc_cache *c;
+	
+	switch(cmd)
+	{
+		case SIOCGETVIFCNT:
+			if (copy_from_user(&vr,(void *)arg,sizeof(vr)))
+				return -EFAULT; 
+			if(vr.vifi>=maxvif)
+				return -EINVAL;
+			vif=&vif_table[vr.vifi];
+			if(vifc_map&(1<<vr.vifi))
+			{
+				vr.icount=vif->pkt_in;
+				vr.ocount=vif->pkt_out;
+				vr.ibytes=vif->bytes_in;
+				vr.obytes=vif->bytes_out;
+				if (copy_to_user((void *)arg,&vr,sizeof(vr)))
+					return -EFAULT;
+				return 0;
+			}
+			return -EADDRNOTAVAIL;
+		case SIOCGETSGCNT:
+			if (copy_from_user(&sr,(void *)arg,sizeof(sr)))
+				return -EFAULT; 
+			for (c = mfc_cache_array[MFC_HASH(sr.grp.s_addr, sr.src.s_addr)];
+			     c; c = c->next) {
+				if (sr.grp.s_addr == c->mfc_mcastgrp &&
+				    sr.src.s_addr == c->mfc_origin) {
+					sr.pktcnt = c->mfc_pkt;
+					sr.bytecnt = c->mfc_bytes;
+					sr.wrong_if = c->mfc_wrong_if;
+					if (copy_to_user((void *)arg,&sr,sizeof(sr)))
+						return -EFAULT;
+					return 0;
+				}
+			}
+			return -EADDRNOTAVAIL;
+		default:
+			return -ENOIOCTLCMD;
+	}
+}
+
+/*
+ *	Close the multicast socket, and clear the vif tables etc
+ */
+ 
+void mroute_close(struct sock *sk)
+{
+	int i;
+		
+	/*
+	 *	Shut down all active vif entries
+	 */
+	rtnl_lock();
+	for(i=0; i<maxvif; i++)
+		vif_delete(i);
+	rtnl_unlock();
+
+	/*
+	 *	Wipe the cache
+	 */
+	for(i=0;i<MFC_LINES;i++)
+	{
+		start_bh_atomic();
+		while(mfc_cache_array[i]!=NULL)
+			ipmr_cache_delete(mfc_cache_array[i]);
+		end_bh_atomic();
+	}
+}
+
+static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+	struct vif_device *v;
+	int ct;
+	if (event != NETDEV_UNREGISTER)
+		return NOTIFY_DONE;
+	v=&vif_table[0];
+	for(ct=0;ct<maxvif;ct++) {
+		if (vifc_map&(1<<ct) && v->dev==ptr)
+			vif_delete(ct);
+		v++;
+	}
+	return NOTIFY_DONE;
+}
+
+
+static struct notifier_block ip_mr_notifier={
+	ipmr_device_event,
+	NULL,
+	0
+};
+
+/*
+ * 	Encapsulate a packet by attaching a valid IPIP header to it.
+ *	This avoids tunnel drivers and other mess and gives us the speed so
+ *	important for multicast video.
+ */
+ 
+static void ip_encap(struct sk_buff *skb, u32 saddr, u32 daddr)
+{
+	struct iphdr *iph = (struct iphdr *)skb_push(skb,sizeof(struct iphdr));
+
+	iph->version	= 	4;
+	iph->tos	=	skb->nh.iph->tos;
+	iph->ttl	=	skb->nh.iph->ttl;
+	iph->frag_off	=	0;
+	iph->daddr	=	daddr;
+	iph->saddr	=	saddr;
+	iph->protocol	=	IPPROTO_IPIP;
+	iph->ihl	=	5;
+	iph->tot_len	=	htons(skb->len);
+	iph->id		=	htons(ip_id_count++);
+	ip_send_check(iph);
+
+	skb->h.ipiph = skb->nh.iph;
+	skb->nh.iph = iph;
+}
+
+/*
+ *	Processing handlers for ipmr_forward
+ */
+
+static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c,
+			   int vifi, int last)
+{
+	struct iphdr *iph = skb->nh.iph;
+	struct vif_device *vif = &vif_table[vifi];
+	struct device *dev;
+	struct rtable *rt;
+	int    encap = 0;
+	struct sk_buff *skb2;
+
+#ifdef CONFIG_IP_PIMSM
+	if (vif->flags & VIFF_REGISTER) {
+		vif->pkt_out++;
+		vif->bytes_out+=skb->len;
+		((struct net_device_stats*)vif->dev->priv)->tx_bytes += skb->len;
+		((struct net_device_stats*)vif->dev->priv)->tx_packets++;
+		ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
+		return;
+	}
+#endif
+
+	if (vif->flags&VIFF_TUNNEL) {
+		if (ip_route_output(&rt, vif->remote, vif->local, RT_TOS(iph->tos), vif->link))
+			return;
+		encap = sizeof(struct iphdr);
+	} else {
+		if (ip_route_output(&rt, iph->daddr, 0, RT_TOS(iph->tos), vif->link))
+			return;
+	}
+
+	dev = rt->u.dst.dev;
+
+	if (skb->len+encap > rt->u.dst.pmtu && (ntohs(iph->frag_off) & IP_DF)) {
+		/* Do not fragment multicasts. Alas, IPv4 does not
+		   allow to send ICMP, so that packets will disappear
+		   to blackhole.
+		 */
+
+		ip_statistics.IpFragFails++;
+		ip_rt_put(rt);
+		return;
+	}
+
+	encap += dev->hard_header_len;
+
+	if (skb_headroom(skb) < encap || skb_cloned(skb) || !last)
+		skb2 = skb_realloc_headroom(skb, (encap + 15)&~15);
+	else if (atomic_read(&skb->users) != 1)
+		skb2 = skb_clone(skb, GFP_ATOMIC);
+	else {
+		atomic_inc(&skb->users);
+		skb2 = skb;
+	}
+
+	if (skb2 == NULL) {
+		ip_rt_put(rt);
+		return;
+	}
+
+	vif->pkt_out++;
+	vif->bytes_out+=skb->len;
+
+	dst_release(skb2->dst);
+	skb2->dst = &rt->u.dst;
+	iph = skb2->nh.iph;
+	ip_decrease_ttl(iph);
+
+#ifdef CONFIG_FIREWALL
+	if (call_fw_firewall(PF_INET, vif->dev, skb2->nh.iph, NULL, &skb2) < FW_ACCEPT) {
+		kfree_skb(skb2);
+		return;
+	}
+	if (call_out_firewall(PF_INET, vif->dev, skb2->nh.iph, NULL, &skb2) < FW_ACCEPT) {
+		kfree_skb(skb2);
+		return;
+	}
+#endif
+	if (vif->flags & VIFF_TUNNEL) {
+		ip_encap(skb2, vif->local, vif->remote);
+#ifdef CONFIG_FIREWALL
+		/* Double output firewalling on tunnels: one is on tunnel
+		   another one is on real device.
+		 */
+		if (call_out_firewall(PF_INET, dev, skb2->nh.iph, NULL, &skb2) < FW_ACCEPT) {
+			kfree_skb(skb2);
+			return;
+		}
+#endif
+		((struct ip_tunnel *)vif->dev->priv)->stat.tx_packets++;
+		((struct ip_tunnel *)vif->dev->priv)->stat.tx_bytes+=skb2->len;
+	}
+
+	IPCB(skb2)->flags |= IPSKB_FORWARDED;
+
+
+	/*
+	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
+	 * not only before forwarding, but after forwarding on all output
+	 * interfaces. It is clear, if mrouter runs a multicasting
+	 * program, it should receive packets not depending to what interface
+	 * program is joined.
+	 * If we will not make it, the program will have to join on all
+	 * interfaces. On the other hand, multihoming host (or router, but
+	 * not mrouter) cannot join to more than one interface - it will
+	 * result in receiving multiple packets.
+	 */
+	if (skb2->len <= rt->u.dst.pmtu)
+		skb2->dst->output(skb2);
+	else
+		ip_fragment(skb2, skb2->dst->output);
+}
+
+int ipmr_find_vif(struct device *dev)
+{
+	int ct;
+	for (ct=0; ct<maxvif; ct++) {
+		if (vifc_map&(1<<ct) && vif_table[ct].dev == dev)
+			return ct;
+	}
+	return ALL_VIFS;
+}
+
+/* "local" means that we should preserve one skb (for local delivery) */
+
+int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
+{
+	int psend = -1;
+	int vif, ct;
+
+	vif = cache->mfc_parent;
+	cache->mfc_pkt++;
+	cache->mfc_bytes += skb->len;
+
+	/*
+	 * Wrong interface: drop packet and (maybe) send PIM assert.
+	 */
+	if (vif_table[vif].dev != skb->dev) {
+		int true_vifi;
+
+		if (((struct rtable*)skb->dst)->key.iif == 0) {
+			/* It is our own packet, looped back.
+			   Very complicated situation...
+
+			   The best workaround until routing daemons will be
+			   fixed is not to redistribute packet, if it was
+			   send through wrong interface. It means, that
+			   multicast applications WILL NOT work for
+			   (S,G), which have default multicast route pointing
+			   to wrong oif. In any case, it is not a good
+			   idea to use multicasting applications on router.
+			 */
+			goto dont_forward;
+		}
+
+		cache->mfc_wrong_if++;
+		true_vifi = ipmr_find_vif(skb->dev);
+
+		if (true_vifi < MAXVIFS && mroute_do_assert &&
+		    /* pimsm uses asserts, when switching from RPT to SPT,
+		       so that we cannot check that packet arrived on an oif.
+		       It is bad, but otherwise we would need to move pretty
+		       large chunk of pimd to kernel. Ough... --ANK
+		     */
+		    (mroute_do_pim || cache->mfc_ttls[true_vifi] < 255) &&
+		    jiffies - cache->mfc_last_assert > MFC_ASSERT_THRESH) {
+			cache->mfc_last_assert = jiffies;
+			ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF);
+		}
+		goto dont_forward;
+	}
+
+	vif_table[vif].pkt_in++;
+	vif_table[vif].bytes_in+=skb->len;
+
+	/*
+	 *	Forward the frame
+	 */
+	for (ct = cache->mfc_maxvif-1; ct >= cache->mfc_minvif; ct--) {
+		if (skb->nh.iph->ttl > cache->mfc_ttls[ct]) {
+			if (psend != -1)
+				ipmr_queue_xmit(skb, cache, psend, 0);
+			psend=ct;
+		}
+	}
+	if (psend != -1)
+		ipmr_queue_xmit(skb, cache, psend, !local);
+
+dont_forward:
+	if (!local)
+		kfree_skb(skb);
+	return 0;
+}
+
+
+/*
+ *	Multicast packets for forwarding arrive here
+ */
+
+int ip_mr_input(struct sk_buff *skb)
+{
+	struct mfc_cache *cache;
+	int local = ((struct rtable*)skb->dst)->rt_flags&RTCF_LOCAL;
+
+	/* Packet is looped back after forward, it should not be
+	   forwarded second time, but still can be delivered locally.
+	 */
+	if (IPCB(skb)->flags&IPSKB_FORWARDED)
+		goto dont_forward;
+
+	if (!local) {
+		    if (IPCB(skb)->opt.router_alert) {
+			    if (ip_call_ra_chain(skb))
+				    return 0;
+		    } else if (skb->nh.iph->protocol == IPPROTO_IGMP && mroute_socket) {
+			    /* IGMPv1 (and broken IGMPv2 implementations sort of
+			       Cisco IOS <= 11.2(8)) do not put router alert
+			       option to IGMP packets destined to routable
+			       groups. It is very bad, because it means
+			       that we can forward NO IGMP messages.
+			     */
+			    raw_rcv(mroute_socket, skb);
+			    return 0;
+		    }
+	}
+
+	cache = ipmr_cache_find(skb->nh.iph->saddr, skb->nh.iph->daddr);
+
+	/*
+	 *	No usable cache entry
+	 */
+
+	if (cache==NULL || (cache->mfc_flags&MFC_QUEUED)) {
+		int vif;
+
+		if (local) {
+			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
+			ip_local_deliver(skb);
+			if (skb2 == NULL)
+				return -ENOBUFS;
+			skb = skb2;
+		}
+
+		vif = ipmr_find_vif(skb->dev);
+		if (vif != ALL_VIFS) {
+			ipmr_cache_unresolved(cache, vif, skb);
+			return -EAGAIN;
+		}
+		kfree_skb(skb);
+		return 0;
+	}
+
+	ip_mr_forward(skb, cache, local);
+
+	if (local)
+		return ip_local_deliver(skb);
+	return 0;
+
+dont_forward:
+	if (local)
+		return ip_local_deliver(skb);
+	kfree_skb(skb);
+	return 0;
+}
+
+#ifdef CONFIG_IP_PIMSM_V1
+/*
+ * Handle IGMP messages of PIMv1
+ */
+
+int pim_rcv_v1(struct sk_buff * skb, unsigned short len)
+{
+	struct igmphdr *pim = (struct igmphdr*)skb->h.raw;
+	struct iphdr   *encap;
+
+        if (!mroute_do_pim ||
+	    len < sizeof(*pim) + sizeof(*encap) ||
+	    pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER ||
+	    reg_dev == NULL) {
+		kfree_skb(skb);
+                return -EINVAL;
+        }
+
+	encap = (struct iphdr*)(skb->h.raw + sizeof(struct igmphdr));
+	/*
+	   Check that:
+	   a. packet is really destinted to a multicast group
+	   b. packet is not a NULL-REGISTER
+	   c. packet is not truncated
+	 */
+	if (!MULTICAST(encap->daddr) ||
+	    ntohs(encap->tot_len) == 0 ||
+	    ntohs(encap->tot_len) + sizeof(*pim) > len) {
+		kfree_skb(skb);
+		return -EINVAL;
+	}
+	skb->mac.raw = skb->nh.raw;
+	skb_pull(skb, (u8*)encap - skb->data);
+	skb->nh.iph = (struct iphdr *)skb->data;
+	skb->dev = reg_dev;
+	memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
+	skb->protocol = __constant_htons(ETH_P_IP);
+	skb->ip_summed = 0;
+	skb->pkt_type = PACKET_HOST;
+	dst_release(skb->dst);
+	skb->dst = NULL;
+	((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len;
+	((struct net_device_stats*)reg_dev->priv)->rx_packets++;
+	netif_rx(skb);
+	return 0;
+}
+#endif
+
+#ifdef CONFIG_IP_PIMSM_V2
+int pim_rcv(struct sk_buff * skb, unsigned short len)
+{
+	struct pimreghdr *pim = (struct pimreghdr*)skb->h.raw;
+	struct iphdr   *encap;
+
+        if (len < sizeof(*pim) + sizeof(*encap) ||
+	    pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
+	    (pim->flags&PIM_NULL_REGISTER) ||
+	    reg_dev == NULL ||
+	    (ip_compute_csum((void *)pim, sizeof(*pim)) &&
+	     ip_compute_csum((void *)pim, len))) {
+		kfree_skb(skb);
+                return -EINVAL;
+        }
+
+	/* check if the inner packet is destined to mcast group */
+	encap = (struct iphdr*)(skb->h.raw + sizeof(struct pimreghdr));
+	if (!MULTICAST(encap->daddr) ||
+	    ntohs(encap->tot_len) == 0 ||
+	    ntohs(encap->tot_len) + sizeof(*pim) > len) {
+		kfree_skb(skb);
+		return -EINVAL;
+	}
+	skb->mac.raw = skb->nh.raw;
+	skb_pull(skb, (u8*)encap - skb->data);
+	skb->nh.iph = (struct iphdr *)skb->data;
+	skb->dev = reg_dev;
+	memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
+	skb->protocol = __constant_htons(ETH_P_IP);
+	skb->ip_summed = 0;
+	skb->pkt_type = PACKET_HOST;
+	dst_release(skb->dst);
+	((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len;
+	((struct net_device_stats*)reg_dev->priv)->rx_packets++;
+	skb->dst = NULL;
+	netif_rx(skb);
+	return 0;
+}
+#endif
+
+#ifdef CONFIG_RTNETLINK
+
+static int
+ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
+{
+	int ct;
+	struct rtnexthop *nhp;
+	struct device *dev = vif_table[c->mfc_parent].dev;
+	u8 *b = skb->tail;
+	struct rtattr *mp_head;
+
+	if (dev)
+		RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
+
+	mp_head = (struct rtattr*)skb_put(skb, RTA_LENGTH(0));
+
+	for (ct = c->mfc_minvif; ct < c->mfc_maxvif; ct++) {
+		if (c->mfc_ttls[ct] < 255) {
+			if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
+				goto rtattr_failure;
+			nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
+			nhp->rtnh_flags = 0;
+			nhp->rtnh_hops = c->mfc_ttls[ct];
+			nhp->rtnh_ifindex = vif_table[ct].dev->ifindex;
+			nhp->rtnh_len = sizeof(*nhp);
+		}
+	}
+	mp_head->rta_type = RTA_MULTIPATH;
+	mp_head->rta_len = skb->tail - (u8*)mp_head;
+	rtm->rtm_type = RTN_MULTICAST;
+	return 1;
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -EMSGSIZE;
+}
+
+int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
+{
+	struct mfc_cache *cache;
+	struct rtable *rt = (struct rtable*)skb->dst;
+
+	start_bh_atomic();
+	cache = ipmr_cache_find(rt->rt_src, rt->rt_dst);
+	if (cache==NULL || (cache->mfc_flags&MFC_QUEUED)) {
+		struct device *dev;
+		int vif;
+		int err;
+
+		if (nowait) {
+			end_bh_atomic();
+			return -EAGAIN;
+		}
+
+		dev = skb->dev;
+		if (dev == NULL || (vif = ipmr_find_vif(dev)) == ALL_VIFS) {
+			end_bh_atomic();
+			return -ENODEV;
+		}
+		skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
+		skb->nh.iph->ihl = sizeof(struct iphdr)>>2;
+		skb->nh.iph->saddr = rt->rt_src;
+		skb->nh.iph->daddr = rt->rt_dst;
+		skb->nh.iph->version = 0;
+		err = ipmr_cache_unresolved(cache, vif, skb);
+		end_bh_atomic();
+		return err;
+	}
+	/* Resolved cache entry is not changed by net bh,
+	   so that we are allowed to enable it.
+	 */
+	end_bh_atomic();
+
+	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
+		cache->mfc_flags |= MFC_NOTIFY;
+	return ipmr_fill_mroute(skb, cache, rtm);
+}
+#endif
+
+/*
+ *	The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
+ */
+ 
+int ipmr_vif_info(char *buffer, char **start, off_t offset, int length, int dummy)
+{
+	struct vif_device *vif;
+	int len=0;
+	off_t pos=0;
+	off_t begin=0;
+	int size;
+	int ct;
+
+	len += sprintf(buffer,
+		 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
+	pos=len;
+  
+	for (ct=0;ct<maxvif;ct++) 
+	{
+		char *name = "none";
+		vif=&vif_table[ct];
+		if(!(vifc_map&(1<<ct)))
+			continue;
+		if (vif->dev)
+			name = vif->dev->name;
+        	size = sprintf(buffer+len, "%2d %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
+        		ct, name, vif->bytes_in, vif->pkt_in, vif->bytes_out, vif->pkt_out,
+        		vif->flags, vif->local, vif->remote);
+		len+=size;
+		pos+=size;
+		if(pos<offset)
+		{
+			len=0;
+			begin=pos;
+		}
+		if(pos>offset+length)
+			break;
+  	}
+  	
+  	*start=buffer+(offset-begin);
+  	len-=(offset-begin);
+  	if(len>length)
+  		len=length;
+  	return len;
+}
+
+int ipmr_mfc_info(char *buffer, char **start, off_t offset, int length, int dummy)
+{
+	struct mfc_cache *mfc;
+	int len=0;
+	off_t pos=0;
+	off_t begin=0;
+	int size;
+	int ct;
+
+	len += sprintf(buffer,
+		 "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
+	pos=len;
+  
+	for (ct=0;ct<MFC_LINES;ct++) 
+	{
+		start_bh_atomic();
+		mfc=mfc_cache_array[ct];
+		while(mfc!=NULL)
+		{
+			int n;
+
+			/*
+			 *	Interface forwarding map
+			 */
+			size = sprintf(buffer+len, "%08lX %08lX %-3d %8ld %8ld %8ld",
+				(unsigned long)mfc->mfc_mcastgrp,
+				(unsigned long)mfc->mfc_origin,
+				mfc->mfc_parent == ALL_VIFS ? -1 : mfc->mfc_parent,
+				(mfc->mfc_flags & MFC_QUEUED) ? mfc->mfc_unresolved.qlen : mfc->mfc_pkt,
+				mfc->mfc_bytes,
+				mfc->mfc_wrong_if);
+			for(n=mfc->mfc_minvif;n<mfc->mfc_maxvif;n++)
+			{
+				if(vifc_map&(1<<n) && mfc->mfc_ttls[n] < 255)
+					size += sprintf(buffer+len+size, " %2d:%-3d", n, mfc->mfc_ttls[n]);
+			}
+			size += sprintf(buffer+len+size, "\n");
+			len+=size;
+			pos+=size;
+			if(pos<offset)
+			{
+				len=0;
+				begin=pos;
+			}
+			if(pos>offset+length)
+			{
+				end_bh_atomic();
+				goto done;
+			}
+			mfc=mfc->next;
+	  	}
+	  	end_bh_atomic();
+  	}
+done:
+  	*start=buffer+(offset-begin);
+  	len-=(offset-begin);
+  	if(len>length)
+  		len=length;
+	if (len < 0) {
+		len = 0;
+	}
+  	return len;
+}
+
+#ifdef CONFIG_PROC_FS	
+static struct proc_dir_entry proc_net_ipmr_vif = {
+	PROC_NET_IPMR_VIF, 9 ,"ip_mr_vif",
+	S_IFREG | S_IRUGO, 1, 0, 0,
+	0, &proc_net_inode_operations,
+	ipmr_vif_info
+};
+static struct proc_dir_entry proc_net_ipmr_mfc = {
+	PROC_NET_IPMR_MFC, 11 ,"ip_mr_cache",
+	S_IFREG | S_IRUGO, 1, 0, 0,
+	0, &proc_net_inode_operations,
+	ipmr_mfc_info
+};
+#endif	
+
+#ifdef CONFIG_IP_PIMSM_V2
+struct inet_protocol pim_protocol = 
+{
+	pim_rcv,		/* PIM handler		*/
+	NULL,			/* PIM error control	*/
+	NULL,			/* next			*/
+	IPPROTO_PIM,		/* protocol ID		*/
+	0,			/* copy			*/
+	NULL,			/* data			*/
+	"PIM"			/* name			*/
+};
+#endif
+
+
+/*
+ *	Setup for IP multicast routing
+ */
+ 
+__initfunc(void ip_mr_init(void))
+{
+	printk(KERN_INFO "Linux IP multicast router 0.06 plus PIM-SM\n");
+	register_netdevice_notifier(&ip_mr_notifier);
+#ifdef CONFIG_PROC_FS	
+	proc_net_register(&proc_net_ipmr_vif);
+	proc_net_register(&proc_net_ipmr_mfc);
+#endif	
+}
diff --git a/pfinet/linux-src/net/ipv4/proc.c b/pfinet/linux-src/net/ipv4/proc.c
new file mode 100644
index 00000000..1640a056
--- /dev/null
+++ b/pfinet/linux-src/net/ipv4/proc.c
@@ -0,0 +1,387 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		This file implements the various access functions for the
+ *		PROC file system.  It is mainly used for debugging and
+ *		statistics.
+ *
+ * Version:	$Id: proc.c,v 1.34 1999/02/08 11:20:34 davem Exp $
+ *
+ * Authors:	Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *		Gerald J. Heim, <heim@peanuts.informatik.uni-tuebingen.de>
+ *		Fred Baumgarten, <dc6iq@insu1.etec.uni-karlsruhe.de>
+ *		Erik Schoenfelder, <schoenfr@ibr.cs.tu-bs.de>
+ *
+ * Fixes:
+ *		Alan Cox	:	UDP sockets show the rxqueue/txqueue
+ *					using hint flag for the netinfo.
+ *	Pauline Middelink	:	identd support
+ *		Alan Cox	:	Make /proc safer.
+ *	Erik Schoenfelder	:	/proc/net/snmp
+ *		Alan Cox	:	Handle dead sockets properly.
+ *	Gerhard Koerting	:	Show both timers
+ *		Alan Cox	:	Allow inode to be NULL (kernel socket)
+ *	Andi Kleen		:	Add support for open_requests and 
+ *					split functions for more readibility.
+ *	Andi Kleen		:	Add support for /proc/net/netstat
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+#include <asm/system.h>
+#include <linux/sched.h>
+#include <linux/socket.h>
+#include <linux/net.h>
+#include <linux/un.h>
+#include <linux/in.h>
+#include <linux/param.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <net/ip.h>
+#include <net/icmp.h>
+#include <net/protocol.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/raw.h>
+
+/* Format a single open_request into tmpbuf. */
+static inline void get__openreq(struct sock *sk, struct open_request *req, 
+				char *tmpbuf, 
+				int i)
+{
+	sprintf(tmpbuf, "%4d: %08lX:%04X %08lX:%04X"
+		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %u",
+		i,
+		(long unsigned int)req->af.v4_req.loc_addr,
+		ntohs(sk->sport),
+		(long unsigned int)req->af.v4_req.rmt_addr,
+		ntohs(req->rmt_port),
+		TCP_SYN_RECV,
+		0,0, /* could print option size, but that is af dependent. */
+		1,   /* timers active (only the expire timer) */  
+		(unsigned long)(req->expires - jiffies), 
+		req->retrans,
+		sk->socket ? sk->socket->inode->i_uid : 0,
+		0,  /* non standard timer */  
+		0 /* open_requests have no inode */
+		); 
+}
+
+/* Format a single socket into tmpbuf. */
+static inline void get__sock(struct sock *sp, char *tmpbuf, int i, int format)
+{
+	unsigned long  dest, src;
+	unsigned short destp, srcp;
+	int timer_active, timer_active1, timer_active2;
+	int tw_bucket = 0;
+	unsigned long timer_expires;
+	struct tcp_opt *tp = &sp->tp_pinfo.af_tcp;
+
+	dest  = sp->daddr;
+	src   = sp->rcv_saddr;
+	destp = sp->dport;
+	srcp  = sp->sport;
+	
+	/* FIXME: The fact that retransmit_timer occurs as a field
+	 * in two different parts of the socket structure is,
+	 * to say the least, confusing. This code now uses the
+	 * right retransmit_timer variable, but I'm not sure
+	 * the rest of the timer stuff is still correct.
+	 * In particular I'm not sure what the timeout value
+	 * is suppose to reflect (as opposed to tm->when). -- erics
+	 */
+	
+	destp = ntohs(destp);
+	srcp  = ntohs(srcp);
+	if((format == 0) && (sp->state == TCP_TIME_WAIT)) {
+		extern int tcp_tw_death_row_slot;
+		struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sp;
+		int slot_dist;
+
+		tw_bucket	= 1;
+		timer_active1	= timer_active2 = 0;
+		timer_active	= 3;
+		slot_dist	= tw->death_slot;
+		if(slot_dist > tcp_tw_death_row_slot)
+			slot_dist = (TCP_TWKILL_SLOTS - slot_dist) + tcp_tw_death_row_slot;
+		else
+			slot_dist = tcp_tw_death_row_slot - slot_dist;
+		timer_expires	= jiffies + (slot_dist * TCP_TWKILL_PERIOD);
+	} else {
+		timer_active1 = del_timer(&tp->retransmit_timer);
+		timer_active2 = del_timer(&sp->timer);
+		if (!timer_active1) tp->retransmit_timer.expires=0;
+		if (!timer_active2) sp->timer.expires=0;
+		timer_active	= 0;
+		timer_expires	= (unsigned) -1;
+	}
+	if (timer_active1 && tp->retransmit_timer.expires < timer_expires) {
+		timer_active	= 1;
+		timer_expires	= tp->retransmit_timer.expires;
+	}
+	if (timer_active2 && sp->timer.expires < timer_expires) {
+		timer_active	= 2;
+		timer_expires	= sp->timer.expires;
+	}
+	if(timer_active == 0)
+		timer_expires = jiffies;
+	sprintf(tmpbuf, "%4d: %08lX:%04X %08lX:%04X"
+		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %ld",
+		i, src, srcp, dest, destp, sp->state, 
+		(tw_bucket ?
+		 0 :
+		 (format == 0) ?
+		 tp->write_seq-tp->snd_una : atomic_read(&sp->wmem_alloc)),
+		(tw_bucket ?
+		 0 :
+		 (format == 0) ?
+		 tp->rcv_nxt-tp->copied_seq: atomic_read(&sp->rmem_alloc)),
+		timer_active, timer_expires-jiffies,
+		(tw_bucket ? 0 : tp->retransmits),
+		(!tw_bucket && sp->socket) ? sp->socket->inode->i_uid : 0,
+		(!tw_bucket && timer_active) ? sp->timeout : 0,
+		(!tw_bucket && sp->socket) ? sp->socket->inode->i_ino : 0);
+	
+	if (timer_active1) add_timer(&tp->retransmit_timer);
+	if (timer_active2) add_timer(&sp->timer);	
+}
+
+/*
+ * Get__netinfo returns the length of that string.
+ *
+ * KNOWN BUGS
+ *  As in get_unix_netinfo, the buffer might be too small. If this
+ *  happens, get__netinfo returns only part of the available infos.
+ *
+ *  Assumes that buffer length is a multiply of 128 - if not it will
+ *  write past the end.   
+ */
+static int
+get__netinfo(struct proto *pro, char *buffer, int format, char **start, off_t offset, int length)
+{
+	struct sock *sp, *next;
+	int len=0, i = 0;
+	off_t pos=0;
+	off_t begin;
+	char tmpbuf[129];
+  
+	if (offset < 128) 
+		len += sprintf(buffer, "%-127s\n",
+			       "  sl  local_address rem_address   st tx_queue "
+			       "rx_queue tr tm->when retrnsmt   uid  timeout inode");
+	pos = 128;
+	SOCKHASH_LOCK(); 
+	sp = pro->sklist_next;
+	while(sp != (struct sock *)pro) {
+		if (format == 0 && sp->state == TCP_LISTEN) {
+			struct open_request *req;
+
+			for (req = sp->tp_pinfo.af_tcp.syn_wait_queue; req;
+			     i++, req = req->dl_next) {
+				if (req->sk)
+					continue;
+				pos += 128;
+				if (pos < offset) 
+					continue;
+				get__openreq(sp, req, tmpbuf, i); 
+				len += sprintf(buffer+len, "%-127s\n", tmpbuf);
+				if(len >= length) 
+					goto out;
+			}
+		}
+		
+		pos += 128;
+		if (pos < offset)
+			goto next;
+		
+		get__sock(sp, tmpbuf, i, format);
+		
+		len += sprintf(buffer+len, "%-127s\n", tmpbuf);
+		if(len >= length)
+			break;
+	next:
+		next = sp->sklist_next;
+		sp = next;
+		i++;
+	}
+out: 
+	SOCKHASH_UNLOCK();
+	
+	begin = len - (pos - offset);
+	*start = buffer + begin;
+	len -= begin;
+	if(len>length)
+		len = length;
+	if (len<0)
+		len = 0; 
+	return len;
+} 
+
+int tcp_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
+{
+	return get__netinfo(&tcp_prot, buffer,0, start, offset, length);
+}
+
+int udp_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
+{
+	return get__netinfo(&udp_prot, buffer,1, start, offset, length);
+}
+
+int raw_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
+{
+	return get__netinfo(&raw_prot, buffer,1, start, offset, length);
+}
+
+/*
+ *	Report socket allocation statistics [mea@utu.fi]
+ */
+int afinet_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
+{
+	/* From  net/socket.c  */
+	extern int socket_get_info(char *, char **, off_t, int);
+
+	int len  = socket_get_info(buffer,start,offset,length);
+
+	len += sprintf(buffer+len,"TCP: inuse %d highest %d\n",
+		       tcp_prot.inuse, tcp_prot.highestinuse);
+	len += sprintf(buffer+len,"UDP: inuse %d highest %d\n",
+		       udp_prot.inuse, udp_prot.highestinuse);
+	len += sprintf(buffer+len,"RAW: inuse %d highest %d\n",
+		       raw_prot.inuse, raw_prot.highestinuse);
+	if (offset >= len)
+	{
+		*start = buffer;
+		return 0;
+	}
+	*start = buffer + offset;
+	len -= offset;
+	if (len > length)
+		len = length;
+	if (len < 0)
+		len = 0;
+	return len;
+}
+
+
+/* 
+ *	Called from the PROCfs module. This outputs /proc/net/snmp.
+ */
+ 
+int snmp_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
+{
+	extern struct tcp_mib tcp_statistics;
+	extern struct udp_mib udp_statistics;
+	int len;
+/*
+  extern unsigned long tcp_rx_miss, tcp_rx_hit1,tcp_rx_hit2;
+*/
+
+	len = sprintf (buffer,
+		"Ip: Forwarding DefaultTTL InReceives InHdrErrors InAddrErrors ForwDatagrams InUnknownProtos InDiscards InDelivers OutRequests OutDiscards OutNoRoutes ReasmTimeout ReasmReqds ReasmOKs ReasmFails FragOKs FragFails FragCreates\n"
+		"Ip: %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n",
+		    ip_statistics.IpForwarding, ip_statistics.IpDefaultTTL, 
+		    ip_statistics.IpInReceives, ip_statistics.IpInHdrErrors, 
+		    ip_statistics.IpInAddrErrors, ip_statistics.IpForwDatagrams, 
+		    ip_statistics.IpInUnknownProtos, ip_statistics.IpInDiscards, 
+		    ip_statistics.IpInDelivers, ip_statistics.IpOutRequests, 
+		    ip_statistics.IpOutDiscards, ip_statistics.IpOutNoRoutes, 
+		    ip_statistics.IpReasmTimeout, ip_statistics.IpReasmReqds, 
+		    ip_statistics.IpReasmOKs, ip_statistics.IpReasmFails, 
+		    ip_statistics.IpFragOKs, ip_statistics.IpFragFails, 
+		    ip_statistics.IpFragCreates);
+		    		
+	len += sprintf (buffer + len,
+		"Icmp: InMsgs InErrors InDestUnreachs InTimeExcds InParmProbs InSrcQuenchs InRedirects InEchos InEchoReps InTimestamps InTimestampReps InAddrMasks InAddrMaskReps OutMsgs OutErrors OutDestUnreachs OutTimeExcds OutParmProbs OutSrcQuenchs OutRedirects OutEchos OutEchoReps OutTimestamps OutTimestampReps OutAddrMasks OutAddrMaskReps\n"
+		"Icmp: %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n",
+		    icmp_statistics.IcmpInMsgs, icmp_statistics.IcmpInErrors,
+		    icmp_statistics.IcmpInDestUnreachs, icmp_statistics.IcmpInTimeExcds,
+		    icmp_statistics.IcmpInParmProbs, icmp_statistics.IcmpInSrcQuenchs,
+		    icmp_statistics.IcmpInRedirects, icmp_statistics.IcmpInEchos,
+		    icmp_statistics.IcmpInEchoReps, icmp_statistics.IcmpInTimestamps,
+		    icmp_statistics.IcmpInTimestampReps, icmp_statistics.IcmpInAddrMasks,
+		    icmp_statistics.IcmpInAddrMaskReps, icmp_statistics.IcmpOutMsgs,
+		    icmp_statistics.IcmpOutErrors, icmp_statistics.IcmpOutDestUnreachs,
+		    icmp_statistics.IcmpOutTimeExcds, icmp_statistics.IcmpOutParmProbs,
+		    icmp_statistics.IcmpOutSrcQuenchs, icmp_statistics.IcmpOutRedirects,
+		    icmp_statistics.IcmpOutEchos, icmp_statistics.IcmpOutEchoReps,
+		    icmp_statistics.IcmpOutTimestamps, icmp_statistics.IcmpOutTimestampReps,
+		    icmp_statistics.IcmpOutAddrMasks, icmp_statistics.IcmpOutAddrMaskReps);
+	
+	len += sprintf (buffer + len,
+		"Tcp: RtoAlgorithm RtoMin RtoMax MaxConn ActiveOpens PassiveOpens AttemptFails EstabResets CurrEstab InSegs OutSegs RetransSegs InErrs OutRsts\n"
+		"Tcp: %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n",
+		    tcp_statistics.TcpRtoAlgorithm, tcp_statistics.TcpRtoMin,
+		    tcp_statistics.TcpRtoMax, tcp_statistics.TcpMaxConn,
+		    tcp_statistics.TcpActiveOpens, tcp_statistics.TcpPassiveOpens,
+		    tcp_statistics.TcpAttemptFails, tcp_statistics.TcpEstabResets,
+		    tcp_statistics.TcpCurrEstab, tcp_statistics.TcpInSegs,
+		    tcp_statistics.TcpOutSegs, tcp_statistics.TcpRetransSegs,
+		    tcp_statistics.TcpInErrs, tcp_statistics.TcpOutRsts);
+		
+	len += sprintf (buffer + len,
+		"Udp: InDatagrams NoPorts InErrors OutDatagrams\nUdp: %lu %lu %lu %lu\n",
+		    udp_statistics.UdpInDatagrams, udp_statistics.UdpNoPorts,
+		    udp_statistics.UdpInErrors, udp_statistics.UdpOutDatagrams);	    
+/*	
+	  len += sprintf( buffer + len,
+	  	"TCP fast path RX:  H2: %ul H1: %ul L: %ul\n",
+	  		tcp_rx_hit2,tcp_rx_hit1,tcp_rx_miss);
+*/
+	
+	if (offset >= len)
+	{
+		*start = buffer;
+		return 0;
+	}
+	*start = buffer + offset;
+	len -= offset;
+	if (len > length)
+		len = length;
+	if (len < 0)
+		len = 0; 
+	return len;
+}
+
+/* 
+ *	Output /proc/net/netstat
+ */
+ 
+int netstat_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
+{
+	extern struct linux_mib net_statistics;
+	int len;
+
+	len = sprintf(buffer,
+		      "TcpExt: SyncookiesSent SyncookiesRecv SyncookiesFailed"
+		      " EmbryonicRsts PruneCalled RcvPruned OfoPruned"
+		      " OutOfWindowIcmps LockDroppedIcmps\n" 	
+		      "TcpExt: %lu %lu %lu %lu %lu %lu %lu %lu %lu\n",
+		      net_statistics.SyncookiesSent,
+		      net_statistics.SyncookiesRecv,
+		      net_statistics.SyncookiesFailed,
+		      net_statistics.EmbryonicRsts,
+		      net_statistics.PruneCalled,
+		      net_statistics.RcvPruned,
+		      net_statistics.OfoPruned,
+		      net_statistics.OutOfWindowIcmps,
+		      net_statistics.LockDroppedIcmps);
+
+	if (offset >= len)
+	{
+		*start = buffer;
+		return 0;
+	}
+	*start = buffer + offset;
+	len -= offset;
+	if (len > length)
+		len = length;
+	if (len < 0)
+		len = 0; 
+	return len;
+}
diff --git a/pfinet/linux-src/net/ipv4/protocol.c b/pfinet/linux-src/net/ipv4/protocol.c
new file mode 100644
index 00000000..b47480be
--- /dev/null
+++ b/pfinet/linux-src/net/ipv4/protocol.c
@@ -0,0 +1,211 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		INET protocol dispatch tables.
+ *
+ * Version:	$Id: protocol.c,v 1.9 1997/10/29 20:27:34 kuznet Exp $
+ *
+ * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
+ *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *
+ * Fixes:
+ *		Alan Cox	: Ahah! udp icmp errors don't work because
+ *				  udp_err is never called!
+ *		Alan Cox	: Added new fields for init and ready for
+ *				  proper fragmentation (_NO_ 4K limits!)
+ *		Richard Colella	: Hang on hash collision
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/config.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/timer.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/tcp.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/icmp.h>
+#include <net/udp.h>
+#include <net/ipip.h>
+#include <linux/igmp.h>
+
+#define IPPROTO_PREVIOUS NULL
+
+#ifdef CONFIG_IP_MULTICAST
+
+static struct inet_protocol igmp_protocol = 
+{
+	igmp_rcv,		/* IGMP handler		*/
+	NULL,			/* IGMP error control	*/
+	IPPROTO_PREVIOUS,	/* next			*/
+	IPPROTO_IGMP,		/* protocol ID		*/
+	0,			/* copy			*/
+	NULL,			/* data			*/
+	"IGMP"			/* name			*/
+};
+
+#undef  IPPROTO_PREVIOUS
+#define IPPROTO_PREVIOUS &igmp_protocol
+
+#endif
+
+static struct inet_protocol tcp_protocol = 
+{
+	tcp_v4_rcv,		/* TCP handler		*/
+	tcp_v4_err,		/* TCP error control	*/  
+	IPPROTO_PREVIOUS,
+	IPPROTO_TCP,		/* protocol ID		*/
+	0,			/* copy			*/
+	NULL,			/* data			*/
+	"TCP"			/* name			*/
+};
+
+#undef  IPPROTO_PREVIOUS
+#define IPPROTO_PREVIOUS &tcp_protocol
+
+static struct inet_protocol udp_protocol = 
+{
+	udp_rcv,		/* UDP handler		*/
+	udp_err,		/* UDP error control	*/
+	IPPROTO_PREVIOUS,	/* next			*/
+	IPPROTO_UDP,		/* protocol ID		*/
+	0,			/* copy			*/
+	NULL,			/* data			*/
+	"UDP"			/* name			*/
+};
+
+#undef  IPPROTO_PREVIOUS
+#define IPPROTO_PREVIOUS &udp_protocol
+
+
+static struct inet_protocol icmp_protocol = 
+{
+	icmp_rcv,		/* ICMP handler		*/
+	NULL,			/* ICMP error control	*/
+	IPPROTO_PREVIOUS,	/* next			*/
+	IPPROTO_ICMP,		/* protocol ID		*/
+	0,			/* copy			*/
+	NULL,			/* data			*/
+	"ICMP"			/* name			*/
+};
+
+#undef  IPPROTO_PREVIOUS
+#define IPPROTO_PREVIOUS &icmp_protocol
+
+
+struct inet_protocol *inet_protocol_base = IPPROTO_PREVIOUS;
+
+struct inet_protocol *inet_protos[MAX_INET_PROTOS] = 
+{
+	NULL
+};
+
+
+/*
+ *	Find a protocol in the protocol tables given its
+ *	IP type.
+ */
+
+struct inet_protocol *inet_get_protocol(unsigned char prot)
+{
+	unsigned char hash;
+	struct inet_protocol *p;
+
+	hash = prot & (MAX_INET_PROTOS - 1);
+	for (p = inet_protos[hash] ; p != NULL; p=p->next) 
+	{
+		if (p->protocol == prot) 
+			return((struct inet_protocol *) p);
+	}
+	return(NULL);
+}
+
+/*
+ *	Add a protocol handler to the hash tables
+ */
+
+void inet_add_protocol(struct inet_protocol *prot)
+{
+	unsigned char hash;
+	struct inet_protocol *p2;
+
+	hash = prot->protocol & (MAX_INET_PROTOS - 1);
+	prot ->next = inet_protos[hash];
+	inet_protos[hash] = prot;
+	prot->copy = 0;
+
+	/*
+	 *	Set the copy bit if we need to. 
+	 */
+	 
+	p2 = (struct inet_protocol *) prot->next;
+	while(p2 != NULL) 
+	{
+		if (p2->protocol == prot->protocol) 
+		{
+			prot->copy = 1;
+			break;
+		}
+		p2 = (struct inet_protocol *) p2->next;
+	}
+}
+
+/*
+ *	Remove a protocol from the hash tables.
+ */
+ 
+int inet_del_protocol(struct inet_protocol *prot)
+{
+	struct inet_protocol *p;
+	struct inet_protocol *lp = NULL;
+	unsigned char hash;
+
+	hash = prot->protocol & (MAX_INET_PROTOS - 1);
+	if (prot == inet_protos[hash]) 
+	{
+		inet_protos[hash] = (struct inet_protocol *) inet_protos[hash]->next;
+		return(0);
+	}
+
+	p = (struct inet_protocol *) inet_protos[hash];
+	while(p != NULL) 
+	{
+		/*
+		 * We have to worry if the protocol being deleted is
+		 * the last one on the list, then we may need to reset
+		 * someone's copied bit.
+		 */
+		if (p->next != NULL && p->next == prot) 
+		{
+			/*
+			 * if we are the last one with this protocol and
+			 * there is a previous one, reset its copy bit.
+			 */
+			if (p->copy == 0 && lp != NULL) 
+				lp->copy = 0;
+			p->next = prot->next;
+			return(0);
+		}
+		if (p->next != NULL && p->next->protocol == prot->protocol) 
+			lp = p;
+
+		p = (struct inet_protocol *) p->next;
+	}
+	return(-1);
+}
diff --git a/pfinet/linux-src/net/ipv4/rarp.c b/pfinet/linux-src/net/ipv4/rarp.c
new file mode 100644
index 00000000..7f7c7e3f
--- /dev/null
+++ b/pfinet/linux-src/net/ipv4/rarp.c
@@ -0,0 +1,606 @@
+/* linux/net/inet/rarp.c
+ *
+ * Copyright (C) 1994 by Ross Martin
+ * Based on linux/net/inet/arp.c, Copyright (C) 1994 by Florian La Roche
+ *
+ * $Id: rarp.c,v 1.25 1998/06/19 13:22:34 davem Exp $
+ *
+ * This module implements the Reverse Address Resolution Protocol 
+ * (RARP, RFC 903), which is used to convert low level addresses such
+ * as Ethernet addresses into high level addresses such as IP addresses.
+ * The most common use of RARP is as a means for a diskless workstation 
+ * to discover its IP address during a network boot.
+ *
+ **
+ ***	WARNING:::::::::::::::::::::::::::::::::WARNING
+ ****
+ *****	SUN machines seem determined to boot solely from the person who
+ ****	answered their RARP query. NEVER add a SUN to your RARP table
+ ***	unless you have all the rest to boot the box from it. 
+ **
+ * 
+ * Currently, only Ethernet address -> IP address is likely to work.
+ * (Is RARP ever used for anything else?)
+ *
+ * This code is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Fixes
+ *	Alan Cox	:	Rarp delete on device down needed as
+ *				reported by Walter Wolfgang.
+ *	Mike McLagan	:	Routing by source
+ *
+ */
+
+#include <linux/module.h>
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/errno.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/in.h>
+#include <linux/config.h>
+#include <linux/init.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <stdarg.h>
+#include <linux/inet.h>
+#include <linux/etherdevice.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include <net/protocol.h>
+#include <net/tcp.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/arp.h>
+#include <net/rarp.h>
+#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
+#include <net/ax25.h>
+#endif
+#include <linux/proc_fs.h>
+#include <linux/stat.h>
+
+extern int (*rarp_ioctl_hook)(unsigned int,void*);
+
+/*
+ *	This structure defines the RARP mapping cache. As long as we make 
+ *	changes in this structure, we keep interrupts off.
+ */
+
+struct rarp_table
+{
+	struct rarp_table  *next;             /* Linked entry list           */
+	unsigned long      ip;                /* ip address of entry         */
+	unsigned char      ha[MAX_ADDR_LEN];  /* Hardware address            */
+	unsigned char      hlen;              /* Length of hardware address  */
+	unsigned char      htype;             /* Type of hardware in use     */
+	struct device      *dev;              /* Device the entry is tied to */
+};
+
+struct rarp_table *rarp_tables = NULL;
+
+static int rarp_rcv(struct sk_buff *, struct device *, struct packet_type *);
+
+static struct packet_type rarp_packet_type =
+{
+	0,  /* Should be: __constant_htons(ETH_P_RARP) - but this _doesn't_ come out constant! */
+	0,                /* copy */
+	rarp_rcv,
+	NULL,
+	NULL
+};
+
+static int initflag = 1;
+
+
+/*
+ *	Release the memory for this entry.
+ */
+
+static inline void rarp_release_entry(struct rarp_table *entry)
+{
+	kfree_s(entry, sizeof(struct rarp_table));
+	MOD_DEC_USE_COUNT;
+	return;
+}
+
+/*
+ *	Delete a RARP mapping entry in the cache.
+ */
+
+static void rarp_destroy(unsigned long ip_addr)
+{
+	struct rarp_table *entry;
+	struct rarp_table **pentry;
+  
+	start_bh_atomic();
+	pentry = &rarp_tables;
+	while ((entry = *pentry) != NULL)
+	{
+		if (entry->ip == ip_addr)
+		{
+			*pentry = entry->next;
+			end_bh_atomic();
+			rarp_release_entry(entry);
+			return;
+		}
+		pentry = &entry->next;
+	}
+	end_bh_atomic();
+}
+
+/*
+ *	Flush a device.
+ */
+
+static void rarp_destroy_dev(struct device *dev)
+{
+	struct rarp_table *entry;
+	struct rarp_table **pentry;
+  
+	start_bh_atomic();
+	pentry = &rarp_tables;
+	while ((entry = *pentry) != NULL)
+	{
+		if (entry->dev == dev)
+		{
+			*pentry = entry->next;
+			rarp_release_entry(entry);
+		}
+		else
+			pentry = &entry->next;
+	}
+	end_bh_atomic();
+}
+
+static int rarp_device_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+	if(event!=NETDEV_DOWN)
+		return NOTIFY_DONE;
+	rarp_destroy_dev((struct device *)ptr);
+	return NOTIFY_DONE;
+}
+
+/*
+ *	Called once when data first added to rarp cache with ioctl.
+ */
+ 
+static struct notifier_block rarp_dev_notifier={
+	rarp_device_event,
+	NULL,
+	0
+};
+
+static int rarp_pkt_inited=0;
+ 
+static void rarp_init_pkt (void)
+{
+	/* Register the packet type */
+	rarp_packet_type.type=htons(ETH_P_RARP);
+	dev_add_pack(&rarp_packet_type);
+	register_netdevice_notifier(&rarp_dev_notifier);
+	rarp_pkt_inited=1;
+}
+
+#ifdef MODULE
+
+static void rarp_end_pkt(void)
+{
+	if(!rarp_pkt_inited)
+		return;
+	dev_remove_pack(&rarp_packet_type);
+	unregister_netdevice_notifier(&rarp_dev_notifier);
+	rarp_pkt_inited=0;
+}
+
+#endif
+
+/*
+ *	Receive an arp request by the device layer.  Maybe it should be 
+ *	rewritten to use the incoming packet for the reply. The current 
+ *	"overhead" time isn't that high...
+ */
+
+static int rarp_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
+{
+/*
+ *	We shouldn't use this type conversion. Check later.
+ */
+	struct arphdr *rarp = (struct arphdr *) skb->data;
+	unsigned char *rarp_ptr = skb_pull(skb,sizeof(struct arphdr));
+	struct rarp_table *entry;
+	struct in_device *in_dev = dev->ip_ptr;
+	long sip,tip;
+	unsigned char *sha,*tha;            /* s for "source", t for "target" */
+	
+/*
+ *	If this test doesn't pass, it's not IP, or we should ignore it anyway
+ */
+
+	if (rarp->ar_hln != dev->addr_len || dev->type != ntohs(rarp->ar_hrd) 
+		|| dev->flags&IFF_NOARP || !in_dev || !in_dev->ifa_list)
+	{
+		kfree_skb(skb);
+		return 0;
+	}
+
+/*
+ *	If it's not a RARP request, delete it.
+ */
+	if (rarp->ar_op != htons(ARPOP_RREQUEST))
+	{
+		kfree_skb(skb);
+		return 0;
+	}
+
+/*
+ *	For now we will only deal with IP addresses.
+ */
+
+	if (
+#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
+		(rarp->ar_pro != htons(AX25_P_IP) && dev->type == ARPHRD_AX25) ||
+#endif
+		(rarp->ar_pro != htons(ETH_P_IP) && dev->type != ARPHRD_AX25)
+		|| rarp->ar_pln != 4)
+	{
+		/*
+		 *	This packet is not for us. Remove it. 
+		 */
+		kfree_skb(skb);
+		return 0;
+	}
+  
+/*
+ *	Extract variable width fields
+ */
+
+	sha=rarp_ptr;
+	rarp_ptr+=dev->addr_len;
+	memcpy(&sip,rarp_ptr,4);
+	rarp_ptr+=4;
+	tha=rarp_ptr;
+	rarp_ptr+=dev->addr_len;
+	memcpy(&tip,rarp_ptr,4);
+
+/*
+ *	Process entry. Use tha for table lookup according to RFC903.
+ */
+  
+	for (entry = rarp_tables; entry != NULL; entry = entry->next)
+		if (!memcmp(entry->ha, tha, rarp->ar_hln))
+			break;
+  
+	if (entry != NULL)
+	{
+		sip=entry->ip;
+
+		arp_send(ARPOP_RREPLY, ETH_P_RARP, sip, dev, in_dev->ifa_list->ifa_address, sha, 
+			dev->dev_addr, sha);
+	}
+
+	kfree_skb(skb);
+	return 0;
+}
+
+
+/*
+ *	Set (create) a RARP cache entry.
+ */
+
+static int rarp_req_set(struct arpreq *req)
+{
+	struct arpreq r;
+	struct rarp_table *entry;
+	struct sockaddr_in *si;
+	int htype, hlen;
+	unsigned long ip;
+	struct rtable *rt;
+	struct device * dev;
+	int err; 
+  
+	err = copy_from_user(&r, req, sizeof(r));
+	if (err)
+		return -EFAULT;
+
+	/*
+	 *	We only understand about IP addresses... 
+	 */
+
+	if (r.arp_pa.sa_family != AF_INET)
+		return -EPFNOSUPPORT;
+  
+	switch (r.arp_ha.sa_family) 
+	{
+		case ARPHRD_ETHER:
+			htype = ARPHRD_ETHER;
+			hlen = ETH_ALEN;
+			break;
+#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
+		case ARPHRD_AX25:
+			htype = ARPHRD_AX25;
+			hlen = 7;
+		break;
+#endif
+		default:
+			return -EPFNOSUPPORT;
+	}
+
+	si = (struct sockaddr_in *) &r.arp_pa;
+	ip = si->sin_addr.s_addr;
+	if (ip == 0)
+	{
+		printk(KERN_DEBUG "RARP: SETRARP: requested PA is 0.0.0.0 !\n");
+		return -EINVAL;
+	}
+  
+/*
+ *	Is it reachable directly ?
+ */
+  
+	err = ip_route_output(&rt, ip, 0, 1, 0);
+	if (err)
+		return err;
+	if (rt->rt_flags&(RTCF_LOCAL|RTCF_BROADCAST|RTCF_MULTICAST|RTCF_DNAT)) {
+		ip_rt_put(rt);
+		return -EINVAL;
+	}
+	dev = rt->u.dst.dev;
+
+/*
+ *	Is there an existing entry for this address?  Find out...
+ */
+  
+	for (entry = rarp_tables; entry != NULL; entry = entry->next)
+		if (entry->ip == ip)
+			break;
+  
+/*
+ *	If no entry was found, create a new one.
+ */
+
+	if (entry == NULL)
+	{
+		entry = (struct rarp_table *) kmalloc(sizeof(struct rarp_table),
+				    GFP_ATOMIC);
+		if (entry == NULL)
+		{
+			return -ENOMEM;
+		}
+		if (initflag)
+		{
+			rarp_init_pkt();
+			initflag=0;
+		}
+
+		/* Block interrupts until table modification is finished */
+
+		cli();
+		entry->next = rarp_tables;
+		rarp_tables = entry;
+	}
+	cli();
+	entry->ip = ip;
+	entry->hlen = hlen;
+	entry->htype = htype;
+	memcpy(&entry->ha, &r.arp_ha.sa_data, hlen);
+	entry->dev = dev;
+	sti();
+
+	/* Don't unlink if we have entries to serve. */
+	MOD_INC_USE_COUNT;
+
+	return 0;
+}
+
+
+/*
+ *        Get a RARP cache entry.
+ */
+
+static int rarp_req_get(struct arpreq *req)
+{
+	struct arpreq r;
+	struct rarp_table *entry;
+	struct sockaddr_in *si;
+	unsigned long ip;
+	int err; 
+	
+/*
+ *	We only understand about IP addresses...
+ */
+        
+	err = copy_from_user(&r, req, sizeof(r));
+	if (err)
+		return -EFAULT; 
+
+	if (r.arp_pa.sa_family != AF_INET)
+		return -EPFNOSUPPORT;
+  
+/*
+ *        Is there an existing entry for this address?
+ */
+
+	si = (struct sockaddr_in *) &r.arp_pa;
+	ip = si->sin_addr.s_addr;
+
+	for (entry = rarp_tables; entry != NULL; entry = entry->next)
+		if (entry->ip == ip)
+			break;
+
+	if (entry == NULL)
+	{
+		return -ENXIO;
+	}
+
+/*
+ *        We found it; copy into structure.
+ */
+        
+	memcpy(r.arp_ha.sa_data, &entry->ha, entry->hlen);
+	r.arp_ha.sa_family = entry->htype;
+  
+/*
+ *        Copy the information back
+ */
+  
+	return copy_to_user(req, &r, sizeof(r)) ? -EFAULT : 0;
+}
+
+
+/*
+ *	Handle a RARP layer I/O control request.
+ */
+
+int rarp_ioctl(unsigned int cmd, void *arg)
+{
+	struct arpreq r;
+	struct sockaddr_in *si;
+	int err;
+
+	switch(cmd)
+	{
+		case SIOCDRARP:
+			if (!suser())
+				return -EPERM;
+			err = copy_from_user(&r, arg, sizeof(r));
+			if (err)
+				return -EFAULT; 
+			if (r.arp_pa.sa_family != AF_INET)
+				return -EPFNOSUPPORT;
+			si = (struct sockaddr_in *) &r.arp_pa;
+			rarp_destroy(si->sin_addr.s_addr);
+			return 0;
+
+		case SIOCGRARP:
+
+			return rarp_req_get((struct arpreq *)arg);
+		case SIOCSRARP:
+			if (!suser())
+				return -EPERM;
+			return rarp_req_set((struct arpreq *)arg);
+		default:
+			return -EINVAL;
+	}
+
+	/*NOTREACHED*/
+	return 0;
+}
+
+#ifdef CONFIG_PROC_FS
+int rarp_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
+{
+	int len=0;
+	off_t begin=0;
+	off_t pos=0;
+	int size;
+	struct rarp_table *entry;
+	char ipbuffer[20];
+	unsigned long netip;
+	if (initflag)
+	{
+		size = sprintf(buffer,"RARP disabled until entries added to cache.\n");
+		pos+=size;
+		len+=size;
+	}   
+	else
+	{
+		size = sprintf(buffer,
+			"IP address       HW type             HW address\n");
+		pos+=size;
+		len+=size;
+      
+		for(entry=rarp_tables; entry!=NULL; entry=entry->next)
+		{
+			netip=htonl(entry->ip);          /* switch to network order */
+			sprintf(ipbuffer,"%d.%d.%d.%d",
+				(unsigned int)(netip>>24)&255,
+				(unsigned int)(netip>>16)&255,
+				(unsigned int)(netip>>8)&255,
+				(unsigned int)(netip)&255);
+
+			size = sprintf(buffer+len,
+				"%-17s%-20s%02x:%02x:%02x:%02x:%02x:%02x\n",
+				ipbuffer,
+				"10Mbps Ethernet",
+				(unsigned int)entry->ha[0],
+				(unsigned int)entry->ha[1],
+				(unsigned int)entry->ha[2],
+				(unsigned int)entry->ha[3],
+				(unsigned int)entry->ha[4],
+			 	(unsigned int)entry->ha[5]);
+	  
+			len+=size;
+			pos=begin+len;
+	  
+			if(pos<offset)
+			{
+				len=0;
+				begin=pos;
+			}
+			if(pos>offset+length)
+				break;
+		}
+	}      
+
+	*start = buffer+(offset-begin);	/* Start of wanted data */
+	len   -= (offset-begin);	/* Start slop */
+	if (len>length)
+		len = length;		/* Ending slop */
+	return len;
+}
+
+struct proc_dir_entry proc_net_rarp = {
+	PROC_NET_RARP, 4, "rarp",
+	S_IFREG | S_IRUGO, 1, 0, 0,
+	0, &proc_net_inode_operations,
+	rarp_get_info
+};
+#endif
+
+__initfunc(void
+rarp_init(void))
+{
+#ifdef CONFIG_PROC_FS
+	proc_net_register(&proc_net_rarp);
+#endif
+	rarp_ioctl_hook = rarp_ioctl;
+}
+
+#ifdef MODULE
+
+int init_module(void)
+{
+	rarp_init();
+	return 0;
+}
+
+void cleanup_module(void)
+{
+	struct rarp_table *rt, *rt_next;
+#ifdef CONFIG_PROC_FS
+	proc_net_unregister(PROC_NET_RARP);
+#endif
+	rarp_ioctl_hook = NULL;
+	cli();
+	/* Destroy the RARP-table */
+	rt = rarp_tables;
+	rarp_tables = NULL;
+	sti();
+	/* ... and free it. */
+	for ( ; rt != NULL; rt = rt_next) {
+		rt_next = rt->next;
+		rarp_release_entry(rt);
+	}
+	rarp_end_pkt();
+}
+#endif
diff --git a/pfinet/linux-src/net/ipv4/raw.c b/pfinet/linux-src/net/ipv4/raw.c
new file mode 100644
index 00000000..5e7910dd
--- /dev/null
+++ b/pfinet/linux-src/net/ipv4/raw.c
@@ -0,0 +1,573 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		RAW - implementation of IP "raw" sockets.
+ *
+ * Version:	$Id: raw.c,v 1.39.2.1 1999/06/20 20:14:50 davem Exp $
+ *
+ * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
+ *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *
+ * Fixes:
+ *		Alan Cox	:	verify_area() fixed up
+ *		Alan Cox	:	ICMP error handling
+ *		Alan Cox	:	EMSGSIZE if you send too big a packet
+ *		Alan Cox	: 	Now uses generic datagrams and shared skbuff
+ *					library. No more peek crashes, no more backlogs
+ *		Alan Cox	:	Checks sk->broadcast.
+ *		Alan Cox	:	Uses skb_free_datagram/skb_copy_datagram
+ *		Alan Cox	:	Raw passes ip options too
+ *		Alan Cox	:	Setsocketopt added
+ *		Alan Cox	:	Fixed error return for broadcasts
+ *		Alan Cox	:	Removed wake_up calls
+ *		Alan Cox	:	Use ttl/tos
+ *		Alan Cox	:	Cleaned up old debugging
+ *		Alan Cox	:	Use new kernel side addresses
+ *	Arnt Gulbrandsen	:	Fixed MSG_DONTROUTE in raw sockets.
+ *		Alan Cox	:	BSD style RAW socket demultiplexing.
+ *		Alan Cox	:	Beginnings of mrouted support.
+ *		Alan Cox	:	Added IP_HDRINCL option.
+ *		Alan Cox	:	Skip broadcast check if BSDism set.
+ *		David S. Miller	:	New socket lookup architecture.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+ 
+#include <linux/config.h> 
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/timer.h>
+#include <linux/mm.h>
+#include <linux/kernel.h>
+#include <linux/fcntl.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/mroute.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/icmp.h>
+#include <net/udp.h>
+#include <net/raw.h>
+#include <net/checksum.h>
+
+#ifdef CONFIG_IP_MROUTE
+struct sock *mroute_socket=NULL;
+#endif
+
+struct sock *raw_v4_htable[RAWV4_HTABLE_SIZE];
+
+static void raw_v4_hash(struct sock *sk)
+{
+	struct sock **skp = &raw_v4_htable[sk->num & (RAWV4_HTABLE_SIZE - 1)];
+
+	SOCKHASH_LOCK();
+	if ((sk->next = *skp) != NULL)
+		(*skp)->pprev = &sk->next;
+	*skp = sk;
+	sk->pprev = skp;
+	SOCKHASH_UNLOCK();
+}
+
+static void raw_v4_unhash(struct sock *sk)
+{
+	SOCKHASH_LOCK();
+	if (sk->pprev) {
+		if (sk->next)
+			sk->next->pprev = sk->pprev;
+		*sk->pprev = sk->next;
+		sk->pprev = NULL;
+	}
+	SOCKHASH_UNLOCK();
+}
+
+/* Grumble... icmp and ip_input want to get at this... */
+struct sock *raw_v4_lookup(struct sock *sk, unsigned short num,
+			   unsigned long raddr, unsigned long laddr, int dif)
+{
+	struct sock *s = sk;
+
+	SOCKHASH_LOCK();
+	for(s = sk; s; s = s->next) {
+		if((s->num == num) 				&&
+		   !(s->dead && (s->state == TCP_CLOSE))	&&
+		   !(s->daddr && s->daddr != raddr) 		&&
+		   !(s->rcv_saddr && s->rcv_saddr != laddr)	&&
+		   !(s->bound_dev_if && s->bound_dev_if != dif))
+			break; /* gotcha */
+	}
+	SOCKHASH_UNLOCK();
+	return s;
+}
+
+void raw_err (struct sock *sk, struct sk_buff *skb)
+{
+	int type = skb->h.icmph->type;
+	int code = skb->h.icmph->code;
+	u32 info = 0;
+	int err = 0;
+	int harderr = 0;
+
+	/* Report error on raw socket, if:
+	   1. User requested ip_recverr.
+	   2. Socket is connected (otherwise the error indication
+	      is useless without ip_recverr and error is hard.
+	 */
+	if (!sk->ip_recverr && sk->state != TCP_ESTABLISHED)
+		return;
+
+	switch (type) {
+	default:
+	case ICMP_TIME_EXCEEDED:
+		err = EHOSTUNREACH;
+		break;
+	case ICMP_SOURCE_QUENCH:
+		return;
+	case ICMP_PARAMETERPROB:
+		err = EPROTO;
+		info = ntohl(skb->h.icmph->un.gateway)>>24;
+		harderr = 1;
+		break;
+	case ICMP_DEST_UNREACH:
+		err = EHOSTUNREACH;
+		if (code > NR_ICMP_UNREACH)
+			break;
+		err = icmp_err_convert[code].errno;
+		harderr = icmp_err_convert[code].fatal;
+		if (code == ICMP_FRAG_NEEDED) {
+			harderr = (sk->ip_pmtudisc != IP_PMTUDISC_DONT);
+			err = EMSGSIZE;
+			info = ntohs(skb->h.icmph->un.frag.mtu);
+		}
+	}
+
+	if (sk->ip_recverr)
+		ip_icmp_error(sk, skb, err, 0, info, (u8 *)(skb->h.icmph + 1));
+		
+	if (sk->ip_recverr || harderr) {
+		sk->err = err;
+		sk->error_report(sk);
+	}
+}
+
+static int raw_rcv_skb(struct sock * sk, struct sk_buff * skb)
+{
+	/* Charge it to the socket. */
+	
+	if (sock_queue_rcv_skb(sk,skb)<0)
+	{
+		ip_statistics.IpInDiscards++;
+		kfree_skb(skb);
+		return -1;
+	}
+
+	ip_statistics.IpInDelivers++;
+	return 0;
+}
+
+/*
+ *	This should be the easiest of all, all we do is
+ *	copy it into a buffer. All demultiplexing is done
+ *	in ip.c
+ */
+
+int raw_rcv(struct sock *sk, struct sk_buff *skb)
+{
+	/* Now we need to copy this into memory. */
+	skb_trim(skb, ntohs(skb->nh.iph->tot_len));
+	
+	skb->h.raw = skb->nh.raw;
+
+	raw_rcv_skb(sk, skb);
+	return 0;
+}
+
+struct rawfakehdr 
+{
+	struct  iovec *iov;
+	u32	saddr;
+};
+
+/*
+ *	Send a RAW IP packet.
+ */
+
+/*
+ *	Callback support is trivial for SOCK_RAW
+ */
+  
+static int raw_getfrag(const void *p, char *to, unsigned int offset, unsigned int fraglen)
+{
+	struct rawfakehdr *rfh = (struct rawfakehdr *) p;
+	return memcpy_fromiovecend(to, rfh->iov, offset, fraglen);
+}
+
+/*
+ *	IPPROTO_RAW needs extra work.
+ */
+ 
+static int raw_getrawfrag(const void *p, char *to, unsigned int offset, unsigned int fraglen)
+{
+	struct rawfakehdr *rfh = (struct rawfakehdr *) p;
+
+	if (memcpy_fromiovecend(to, rfh->iov, offset, fraglen))
+		return -EFAULT;
+
+	if (offset==0) {
+		struct iphdr *iph = (struct iphdr *)to;
+		if (!iph->saddr)
+			iph->saddr = rfh->saddr;
+		iph->check=0;
+		iph->tot_len=htons(fraglen);	/* This is right as you can't frag
+						   RAW packets */
+		/*
+	 	 *	Deliberate breach of modularity to keep 
+	 	 *	ip_build_xmit clean (well less messy).
+		 */
+		if (!iph->id)
+			iph->id = htons(ip_id_count++);
+		iph->check=ip_fast_csum((unsigned char *)iph, iph->ihl);
+	}
+	return 0;
+}
+
+static int raw_sendmsg(struct sock *sk, struct msghdr *msg, int len)
+{
+	struct ipcm_cookie ipc;
+	struct rawfakehdr rfh;
+	struct rtable *rt = NULL;
+	int free = 0;
+	u32 daddr;
+	u8  tos;
+	int err;
+
+	/* This check is ONLY to check for arithmetic overflow
+	   on integer(!) len. Not more! Real check will be made
+	   in ip_build_xmit --ANK
+
+	   BTW socket.c -> af_*.c -> ... make multiple
+	   invalid conversions size_t -> int. We MUST repair it f.e.
+	   by replacing all of them with size_t and revise all
+	   the places sort of len += sizeof(struct iphdr)
+	   If len was ULONG_MAX-10 it would be cathastrophe  --ANK
+	 */
+
+	if (len < 0 || len > 0xFFFF)
+		return -EMSGSIZE;
+
+	/*
+	 *	Check the flags.
+	 */
+
+	if (msg->msg_flags & MSG_OOB)		/* Mirror BSD error message compatibility */
+		return -EOPNOTSUPP;
+			 
+	if (msg->msg_flags & ~(MSG_DONTROUTE|MSG_DONTWAIT))
+		return(-EINVAL);
+
+	/*
+	 *	Get and verify the address. 
+	 */
+
+	if (msg->msg_namelen) {
+		struct sockaddr_in *usin = (struct sockaddr_in*)msg->msg_name;
+		if (msg->msg_namelen < sizeof(*usin))
+			return(-EINVAL);
+		if (usin->sin_family != AF_INET) {
+			static int complained;
+			if (!complained++)
+				printk(KERN_INFO "%s forgot to set AF_INET in raw sendmsg. Fix it!\n", current->comm);
+			if (usin->sin_family)
+				return -EINVAL;
+		}
+		daddr = usin->sin_addr.s_addr;
+		/* ANK: I did not forget to get protocol from port field.
+		 * I just do not know, who uses this weirdness.
+		 * IP_HDRINCL is much more convenient.
+		 */
+	} else {
+		if (sk->state != TCP_ESTABLISHED) 
+			return(-EINVAL);
+		daddr = sk->daddr;
+	}
+
+	ipc.addr = sk->saddr;
+	ipc.opt = NULL;
+	ipc.oif = sk->bound_dev_if;
+
+	if (msg->msg_controllen) {
+		int tmp = ip_cmsg_send(msg, &ipc);
+		if (tmp)
+			return tmp;
+		if (ipc.opt)
+			free=1;
+	}
+
+	rfh.saddr = ipc.addr;
+	ipc.addr = daddr;
+
+	if (!ipc.opt)
+		ipc.opt = sk->opt;
+
+	if (ipc.opt) {
+		err = -EINVAL;
+		/* Linux does not mangle headers on raw sockets,
+		 * so that IP options + IP_HDRINCL is non-sense.
+		 */
+		if (sk->ip_hdrincl)
+			goto done;
+		if (ipc.opt->srr) {
+			if (!daddr)
+				goto done;
+			daddr = ipc.opt->faddr;
+		}
+	}
+	tos = RT_TOS(sk->ip_tos) | sk->localroute;
+	if (msg->msg_flags&MSG_DONTROUTE)
+		tos |= RTO_ONLINK;
+
+	if (MULTICAST(daddr)) {
+		if (!ipc.oif)
+			ipc.oif = sk->ip_mc_index;
+		if (!rfh.saddr)
+			rfh.saddr = sk->ip_mc_addr;
+	}
+
+	err = ip_route_output(&rt, daddr, rfh.saddr, tos, ipc.oif);
+
+	if (err)
+		goto done;
+
+	err = -EACCES;
+	if (rt->rt_flags&RTCF_BROADCAST && !sk->broadcast)
+		goto done;
+
+	rfh.iov = msg->msg_iov;
+	rfh.saddr = rt->rt_src;
+	if (!ipc.addr)
+		ipc.addr = rt->rt_dst;
+	err=ip_build_xmit(sk, sk->ip_hdrincl ? raw_getrawfrag : raw_getfrag,
+			  &rfh, len, &ipc, rt, msg->msg_flags);
+
+done:
+	if (free)
+		kfree(ipc.opt);
+	ip_rt_put(rt);
+
+	return err<0 ? err : len;
+}
+
+static void raw_close(struct sock *sk, long timeout)
+{
+	/* Observation: when raw_close is called, processes have
+	   no access to socket anymore. But net still has.
+	   Step one, detach it from networking:
+
+	   A. Remove from hash tables.
+	 */
+	sk->state = TCP_CLOSE;
+	raw_v4_unhash(sk);
+        /*
+	   B. Raw sockets may have direct kernel refereneces. Kill them.
+	 */
+	ip_ra_control(sk, 0, NULL);
+
+	/* In this point socket cannot receive new packets anymore */
+
+
+	/* But we still have packets pending on receive
+	   queue and probably, our own packets waiting in device queues.
+	   sock_destroy will drain receive queue, but transmitted
+	   packets will delay socket destruction.
+	   Set sk->dead=1 in order to prevent wakeups, when these
+	   packet will be freed.
+	 */
+	sk->dead=1;
+	destroy_sock(sk);
+
+	/* That's all. No races here. */
+}
+
+/* This gets rid of all the nasties in af_inet. -DaveM */
+static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+{
+	struct sockaddr_in *addr = (struct sockaddr_in *) uaddr;
+	int chk_addr_ret;
+
+	if((sk->state != TCP_CLOSE) || (addr_len < sizeof(struct sockaddr_in)))
+		return -EINVAL;
+	chk_addr_ret = inet_addr_type(addr->sin_addr.s_addr);
+	if(addr->sin_addr.s_addr != 0 && chk_addr_ret != RTN_LOCAL &&
+	   chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) {
+#ifdef CONFIG_IP_TRANSPARENT_PROXY
+		/* Superuser may bind to any address to allow transparent proxying. */
+		if(chk_addr_ret != RTN_UNICAST || !capable(CAP_NET_ADMIN))
+#endif
+			return -EADDRNOTAVAIL;
+	}
+	sk->rcv_saddr = sk->saddr = addr->sin_addr.s_addr;
+	if(chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
+		sk->saddr = 0;  /* Use device */
+	dst_release(xchg(&sk->dst_cache, NULL));
+	return 0;
+}
+
+/*
+ *	This should be easy, if there is something there
+ *	we return it, otherwise we block.
+ */
+
+int raw_recvmsg(struct sock *sk, struct msghdr *msg, int len,
+		int noblock, int flags,int *addr_len)
+{
+	int copied=0;
+	struct sk_buff *skb;
+	int err;
+	struct sockaddr_in *sin=(struct sockaddr_in *)msg->msg_name;
+
+	if (flags & MSG_OOB)
+		return -EOPNOTSUPP;
+
+	if (addr_len)
+		*addr_len=sizeof(*sin);
+
+	if (flags & MSG_ERRQUEUE)
+		return ip_recv_error(sk, msg, len);
+
+	skb=skb_recv_datagram(sk,flags,noblock,&err);
+	if(skb==NULL)
+ 		return err;
+
+	copied = skb->len;
+	if (len < copied)
+	{
+		msg->msg_flags |= MSG_TRUNC;
+		copied = len;
+	}
+	
+	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	if (err)
+		goto done;
+
+	sk->stamp=skb->stamp;
+
+	/* Copy the address. */
+	if (sin) {
+		sin->sin_family = AF_INET;
+		sin->sin_addr.s_addr = skb->nh.iph->saddr;
+	}
+	if (sk->ip_cmsg_flags)
+		ip_cmsg_recv(msg, skb);
+done:
+	skb_free_datagram(sk, skb);
+	return (err ? : copied);
+}
+
+static int raw_init(struct sock *sk)
+{
+	struct raw_opt *tp = &(sk->tp_pinfo.tp_raw4);
+	if (sk->num == IPPROTO_ICMP)
+		memset(&tp->filter, 0, sizeof(tp->filter));
+	return 0;
+}
+
+static int raw_seticmpfilter(struct sock *sk, char *optval, int optlen)
+{
+	if (optlen > sizeof(struct icmp_filter))
+		optlen = sizeof(struct icmp_filter);
+	if (copy_from_user(&sk->tp_pinfo.tp_raw4.filter, optval, optlen))
+		return -EFAULT;
+	return 0;
+}
+
+static int raw_geticmpfilter(struct sock *sk, char *optval, int *optlen)
+{
+	int len;
+
+	if (get_user(len,optlen))
+		return -EFAULT;
+	if (len > sizeof(struct icmp_filter))
+		len = sizeof(struct icmp_filter);
+	if (put_user(len, optlen))
+		return -EFAULT;
+	if (copy_to_user(optval, &sk->tp_pinfo.tp_raw4.filter, len))
+		return -EFAULT;
+	return 0;
+}
+
+static int raw_setsockopt(struct sock *sk, int level, int optname, 
+			  char *optval, int optlen)
+{
+	if (level != SOL_RAW)
+		return ip_setsockopt(sk, level, optname, optval, optlen);
+
+	switch (optname) {
+	case ICMP_FILTER:
+		if (sk->num != IPPROTO_ICMP)
+			return -EOPNOTSUPP;
+		return raw_seticmpfilter(sk, optval, optlen);
+	};
+
+	return -ENOPROTOOPT;
+}
+
+static int raw_getsockopt(struct sock *sk, int level, int optname, 
+			  char *optval, int *optlen)
+{
+	if (level != SOL_RAW)
+		return ip_getsockopt(sk, level, optname, optval, optlen);
+
+	switch (optname) {
+	case ICMP_FILTER:
+		if (sk->num != IPPROTO_ICMP)
+			return -EOPNOTSUPP;
+		return raw_geticmpfilter(sk, optval, optlen);
+	};
+
+	return -ENOPROTOOPT;
+}
+
+struct proto raw_prot = {
+	(struct sock *)&raw_prot,	/* sklist_next */
+	(struct sock *)&raw_prot,	/* sklist_prev */
+	raw_close,			/* close */
+	udp_connect,			/* connect */
+	NULL,				/* accept */
+	NULL,				/* retransmit */
+	NULL,				/* write_wakeup */
+	NULL,				/* read_wakeup */
+	datagram_poll,			/* poll */
+#ifdef CONFIG_IP_MROUTE
+	ipmr_ioctl,			/* ioctl */
+#else
+	NULL,				/* ioctl */
+#endif
+	raw_init,			/* init */
+	NULL,				/* destroy */
+	NULL,				/* shutdown */
+	raw_setsockopt,			/* setsockopt */
+	raw_getsockopt,			/* getsockopt */
+	raw_sendmsg,			/* sendmsg */
+	raw_recvmsg,			/* recvmsg */
+	raw_bind,			/* bind */
+	raw_rcv_skb,			/* backlog_rcv */
+	raw_v4_hash,			/* hash */
+	raw_v4_unhash,			/* unhash */
+	NULL,				/* get_port */
+	128,				/* max_header */
+	0,				/* retransmits */
+	"RAW",				/* name */
+	0,				/* inuse */
+	0				/* highestinuse */
+};
diff --git a/pfinet/linux-src/net/ipv4/route.c b/pfinet/linux-src/net/ipv4/route.c
new file mode 100644
index 00000000..06eb5fe5
--- /dev/null
+++ b/pfinet/linux-src/net/ipv4/route.c
@@ -0,0 +1,2048 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		ROUTE - implementation of the IP router.
+ *
+ * Version:	$Id: route.c,v 1.67.2.3 1999/08/08 08:43:12 davem Exp $
+ *
+ * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
+ *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *		Alan Cox, <gw4pts@gw4pts.ampr.org>
+ *		Linus Torvalds, <Linus.Torvalds@helsinki.fi>
+ *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ * Fixes:
+ *		Alan Cox	:	Verify area fixes.
+ *		Alan Cox	:	cli() protects routing changes
+ *		Rui Oliveira	:	ICMP routing table updates
+ *		(rco@di.uminho.pt)	Routing table insertion and update
+ *		Linus Torvalds	:	Rewrote bits to be sensible
+ *		Alan Cox	:	Added BSD route gw semantics
+ *		Alan Cox	:	Super /proc >4K 
+ *		Alan Cox	:	MTU in route table
+ *		Alan Cox	: 	MSS actually. Also added the window
+ *					clamper.
+ *		Sam Lantinga	:	Fixed route matching in rt_del()
+ *		Alan Cox	:	Routing cache support.
+ *		Alan Cox	:	Removed compatibility cruft.
+ *		Alan Cox	:	RTF_REJECT support.
+ *		Alan Cox	:	TCP irtt support.
+ *		Jonathan Naylor	:	Added Metric support.
+ *	Miquel van Smoorenburg	:	BSD API fixes.
+ *	Miquel van Smoorenburg	:	Metrics.
+ *		Alan Cox	:	Use __u32 properly
+ *		Alan Cox	:	Aligned routing errors more closely with BSD
+ *					our system is still very different.
+ *		Alan Cox	:	Faster /proc handling
+ *	Alexey Kuznetsov	:	Massive rework to support tree based routing,
+ *					routing caches and better behaviour.
+ *		
+ *		Olaf Erb	:	irtt wasn't being copied right.
+ *		Bjorn Ekwall	:	Kerneld route support.
+ *		Alan Cox	:	Multicast fixed (I hope)
+ * 		Pavel Krauz	:	Limited broadcast fixed
+ *		Mike McLagan	:	Routing by source
+ *	Alexey Kuznetsov	:	End of old history. Splitted to fib.c and
+ *					route.c and rewritten from scratch.
+ *		Andi Kleen	:	Load-limit warning messages.
+ *	Vitaly E. Lavrov	:	Transparent proxy revived after year coma.
+ *	Vitaly E. Lavrov	:	Race condition in ip_route_input_slow.
+ *	Tobias Ringstrom	:	Uninitialized res.type in ip_route_output_slow.
+ *	Vladimir V. Ivanov	:	IP rule info (flowid) is really useful.
+ *		Marc Boucher	:	routing by fwmark
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/errno.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/inetdevice.h>
+#include <linux/igmp.h>
+#include <linux/pkt_sched.h>
+#include <linux/mroute.h>
+#include <net/protocol.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include <net/sock.h>
+#include <net/ip_fib.h>
+#include <net/arp.h>
+#include <net/tcp.h>
+#include <net/icmp.h>
+#ifdef CONFIG_SYSCTL
+#include <linux/sysctl.h>
+#endif
+
+#define IP_MAX_MTU	0xFFF0
+
+#define RT_GC_TIMEOUT (300*HZ)
+
+int ip_rt_min_delay = 2*HZ;
+int ip_rt_max_delay = 10*HZ;
+int ip_rt_gc_thresh = RT_HASH_DIVISOR;
+int ip_rt_max_size = RT_HASH_DIVISOR*16;
+int ip_rt_gc_timeout = RT_GC_TIMEOUT;
+int ip_rt_gc_interval = 60*HZ;
+int ip_rt_gc_min_interval = 5*HZ;
+int ip_rt_redirect_number = 9;
+int ip_rt_redirect_load = HZ/50;
+int ip_rt_redirect_silence = ((HZ/50) << (9+1));
+int ip_rt_error_cost = HZ;
+int ip_rt_error_burst = 5*HZ;
+int ip_rt_gc_elasticity = 8;
+int ip_rt_mtu_expires = 10*60*HZ;
+
+static unsigned long rt_deadline = 0;
+
+#define RTprint(a...)	printk(KERN_DEBUG a)
+
+static void rt_run_flush(unsigned long dummy);
+
+static struct timer_list rt_flush_timer =
+	{ NULL, NULL, 0, 0L, rt_run_flush };
+static struct timer_list rt_periodic_timer =
+	{ NULL, NULL, 0, 0L, NULL };
+
+/*
+ *	Interface to generic destination cache.
+ */
+
+static struct dst_entry * ipv4_dst_check(struct dst_entry * dst, u32);
+static struct dst_entry * ipv4_dst_reroute(struct dst_entry * dst,
+					   struct sk_buff *);
+static struct dst_entry * ipv4_negative_advice(struct dst_entry *);
+static void		  ipv4_link_failure(struct sk_buff *skb);
+static int rt_garbage_collect(void);
+
+
+struct dst_ops ipv4_dst_ops =
+{
+	AF_INET,
+	__constant_htons(ETH_P_IP),
+	RT_HASH_DIVISOR,
+
+	rt_garbage_collect,
+	ipv4_dst_check,
+	ipv4_dst_reroute,
+	NULL,
+	ipv4_negative_advice,
+	ipv4_link_failure,
+};
+
+__u8 ip_tos2prio[16] = {
+	TC_PRIO_BESTEFFORT,
+	TC_PRIO_FILLER,
+	TC_PRIO_BESTEFFORT,
+	TC_PRIO_FILLER,
+	TC_PRIO_BULK,
+	TC_PRIO_FILLER,
+	TC_PRIO_BULK,
+	TC_PRIO_FILLER,
+	TC_PRIO_INTERACTIVE,
+	TC_PRIO_FILLER,
+	TC_PRIO_INTERACTIVE,
+	TC_PRIO_FILLER,
+	TC_PRIO_INTERACTIVE_BULK,
+	TC_PRIO_FILLER,
+	TC_PRIO_INTERACTIVE_BULK,
+	TC_PRIO_FILLER
+};
+
+
+/*
+ * Route cache.
+ */
+
+struct rtable 	*rt_hash_table[RT_HASH_DIVISOR];
+
+static int rt_intern_hash(unsigned hash, struct rtable * rth, struct rtable ** res);
+
+static __inline__ unsigned rt_hash_code(u32 daddr, u32 saddr, u8 tos)
+{
+	unsigned hash = ((daddr&0xF0F0F0F0)>>4)|((daddr&0x0F0F0F0F)<<4);
+	hash = hash^saddr^tos;
+	hash = hash^(hash>>16);
+	return (hash^(hash>>8)) & 0xFF;
+}
+
+#ifdef CONFIG_PROC_FS
+
+static int rt_cache_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
+{
+	int len=0;
+	off_t pos=0;
+	char temp[129];
+	struct rtable *r;
+	int i;
+
+	pos = 128;
+
+	if (offset<128)	{
+		sprintf(buffer,"%-127s\n", "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\tMetric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\tHHUptod\tSpecDst");
+		len = 128;
+  	}
+	
+  	
+	start_bh_atomic();
+
+	for (i = 0; i<RT_HASH_DIVISOR; i++) {
+		for (r = rt_hash_table[i]; r; r = r->u.rt_next) {
+			/*
+			 *	Spin through entries until we are ready
+			 */
+			pos += 128;
+
+			if (pos <= offset) {
+				len = 0;
+				continue;
+			}
+			sprintf(temp, "%s\t%08lX\t%08lX\t%8X\t%d\t%u\t%d\t%08lX\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X",
+				r->u.dst.dev ? r->u.dst.dev->name : "*",
+				(unsigned long)r->rt_dst,
+				(unsigned long)r->rt_gateway,
+				r->rt_flags,
+				atomic_read(&r->u.dst.use),
+				atomic_read(&r->u.dst.refcnt),
+				0,
+				(unsigned long)r->rt_src, (int)r->u.dst.pmtu,
+				r->u.dst.window,
+				(int)r->u.dst.rtt, r->key.tos,
+				r->u.dst.hh ? atomic_read(&r->u.dst.hh->hh_refcnt) : -1,
+				r->u.dst.hh ? (r->u.dst.hh->hh_output == dev_queue_xmit) : 0,
+				r->rt_spec_dst);
+			sprintf(buffer+len,"%-127s\n",temp);
+			len += 128;
+			if (pos >= offset+length)
+				goto done;
+		}
+        }
+
+done:
+	end_bh_atomic();
+  	
+  	*start = buffer+len-(pos-offset);
+  	len = pos-offset;
+  	if (len>length)
+  		len = length;
+  	return len;
+}
+#endif
+  
+static __inline__ void rt_free(struct rtable *rt)
+{
+	dst_free(&rt->u.dst);
+}
+
+static __inline__ void rt_drop(struct rtable *rt)
+{
+	ip_rt_put(rt);
+	dst_free(&rt->u.dst);
+}
+
+static __inline__ int rt_fast_clean(struct rtable *rth)
+{
+	/* Kill broadcast/multicast entries very aggresively, if they
+	   collide in hash table with more useful entries */
+	return ((rth->rt_flags&(RTCF_BROADCAST|RTCF_MULTICAST))
+		&& rth->key.iif && rth->u.rt_next);
+}
+
+static __inline__ int rt_valuable(struct rtable *rth)
+{
+	return ((rth->rt_flags&(RTCF_REDIRECTED|RTCF_NOTIFY))
+		|| rth->u.dst.expires);
+}
+
+static __inline__ int rt_may_expire(struct rtable *rth, int tmo1, int tmo2)
+{
+	int age;
+
+	if (atomic_read(&rth->u.dst.use))
+		return 0;
+
+	if (rth->u.dst.expires && (long)(rth->u.dst.expires - jiffies) <= 0)
+		return 1;
+
+	age = jiffies - rth->u.dst.lastuse;
+	if (age <= tmo1 && !rt_fast_clean(rth))
+		return 0;
+	if (age <= tmo2 && rt_valuable(rth))
+		return 0;
+	return 1;
+}
+
+static void rt_check_expire(unsigned long dummy)
+{
+	int i;
+	static int rover;
+	struct rtable *rth, **rthp;
+	unsigned long now = jiffies;
+
+	for (i=0; i<RT_HASH_DIVISOR/5; i++) {
+		unsigned tmo = ip_rt_gc_timeout;
+
+		rover = (rover + 1) & (RT_HASH_DIVISOR-1);
+		rthp = &rt_hash_table[rover];
+
+		while ((rth = *rthp) != NULL) {
+			if (rth->u.dst.expires) {
+				/* Entrie is expired even if it is in use */
+				if ((long)(now - rth->u.dst.expires) <= 0) {
+					tmo >>= 1;
+					rthp = &rth->u.rt_next;
+					continue;
+				}
+			} else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) {
+				tmo >>= 1;
+				rthp = &rth->u.rt_next;
+				continue;
+			}
+
+			/*
+			 * Cleanup aged off entries.
+			 */
+			*rthp = rth->u.rt_next;
+			rt_free(rth);
+		}
+
+		/* Fallback loop breaker. */
+		if ((jiffies - now) > 0)
+			break;
+	}
+	rt_periodic_timer.expires = now + ip_rt_gc_interval;
+	add_timer(&rt_periodic_timer);
+}
+
+static void rt_run_flush(unsigned long dummy)
+{
+	int i;
+	struct rtable * rth, * next;
+
+	rt_deadline = 0;
+
+	start_bh_atomic();
+	for (i=0; i<RT_HASH_DIVISOR; i++) {
+		if ((rth = xchg(&rt_hash_table[i], NULL)) == NULL)
+			continue;
+		end_bh_atomic();
+
+		for (; rth; rth=next) {
+			next = rth->u.rt_next;
+			rth->u.rt_next = NULL;
+			rt_free(rth);
+		}
+
+		start_bh_atomic();
+	}
+	end_bh_atomic();
+}
+  
+void rt_cache_flush(int delay)
+{
+	unsigned long now = jiffies;
+	int user_mode = !in_interrupt();
+
+	if (delay < 0)
+		delay = ip_rt_min_delay;
+
+	start_bh_atomic();
+
+	if (del_timer(&rt_flush_timer) && delay > 0 && rt_deadline) {
+		long tmo = (long)(rt_deadline - now);
+
+		/* If flush timer is already running
+		   and flush request is not immediate (delay > 0):
+
+		   if deadline is not achieved, prolongate timer to "delay",
+		   otherwise fire it at deadline time.
+		 */
+
+		if (user_mode && tmo < ip_rt_max_delay-ip_rt_min_delay)
+			tmo = 0;
+		
+		if (delay > tmo)
+			delay = tmo;
+	}
+
+	if (delay <= 0) {
+		end_bh_atomic();
+		rt_run_flush(0);
+		return;
+	}
+
+	if (rt_deadline == 0)
+		rt_deadline = now + ip_rt_max_delay;
+
+	rt_flush_timer.expires = now + delay;
+	add_timer(&rt_flush_timer);
+	end_bh_atomic();
+}
+
+/*
+   Short description of GC goals.
+
+   We want to build algorithm, which will keep routing cache
+   at some equilibrium point, when number of aged off entries
+   is kept approximately equal to newly generated ones.
+
+   Current expiration strength is variable "expire".
+   We try to adjust it dynamically, so that if networking
+   is idle expires is large enough to keep enough of warm entries,
+   and when load increases it reduces to limit cache size.
+ */
+
+static int rt_garbage_collect(void)
+{
+	static unsigned expire = RT_GC_TIMEOUT;
+	static unsigned long last_gc;
+	static int rover;
+	static int equilibrium;
+	struct rtable *rth, **rthp;
+	unsigned long now = jiffies;
+	int goal;
+
+	/*
+	 * Garbage collection is pretty expensive,
+	 * do not make it too frequently.
+	 */
+	if (now - last_gc < ip_rt_gc_min_interval &&
+	    atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size)
+		return 0;
+
+	/* Calculate number of entries, which we want to expire now. */
+	goal = atomic_read(&ipv4_dst_ops.entries) - RT_HASH_DIVISOR*ip_rt_gc_elasticity;
+	if (goal <= 0) {
+		if (equilibrium < ipv4_dst_ops.gc_thresh)
+			equilibrium = ipv4_dst_ops.gc_thresh;
+		goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium;
+		if (goal > 0) {
+			equilibrium += min(goal/2, RT_HASH_DIVISOR);
+			goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium;
+		}
+	} else {
+		/* We are in dangerous area. Try to reduce cache really
+		 * aggressively.
+		 */
+		goal = max(goal/2, RT_HASH_DIVISOR);
+		equilibrium = atomic_read(&ipv4_dst_ops.entries) - goal;
+	}
+
+	if (now - last_gc >= ip_rt_gc_min_interval)
+		last_gc = now;
+
+	if (goal <= 0) {
+		equilibrium += goal;
+		goto work_done;
+	}
+
+	do {
+		int i, k;
+
+		start_bh_atomic();
+		for (i=0, k=rover; i<RT_HASH_DIVISOR; i++) {
+			unsigned tmo = expire;
+
+			k = (k + 1) & (RT_HASH_DIVISOR-1);
+			rthp = &rt_hash_table[k];
+			while ((rth = *rthp) != NULL) {
+				if (!rt_may_expire(rth, tmo, expire)) {
+					tmo >>= 1;
+					rthp = &rth->u.rt_next;
+					continue;
+				}
+				*rthp = rth->u.rt_next;
+				rth->u.rt_next = NULL;
+				rt_free(rth);
+				goal--;
+			}
+			if (goal <= 0)
+				break;
+		}
+		rover = k;
+		end_bh_atomic();
+
+		if (goal <= 0)
+			goto work_done;
+
+		/* Goal is not achieved. We stop process if:
+
+		   - if expire reduced to zero. Otherwise, expire is halfed.
+		   - if table is not full.
+		   - if we are called from interrupt.
+		   - jiffies check is just fallback/debug loop breaker.
+		     We will not spin here for long time in any case.
+		 */
+
+		if (expire == 0)
+			break;
+
+		expire >>= 1;
+#if RT_CACHE_DEBUG >= 2
+		printk(KERN_DEBUG "expire>> %u %d %d %d\n", expire, atomic_read(&ipv4_dst_ops.entries), goal, i);
+#endif
+
+		if (atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size)
+			return 0;
+	} while (!in_interrupt() && jiffies - now < 1);
+
+	if (atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size)
+		return 0;
+	if (net_ratelimit())
+		printk("dst cache overflow\n");
+	return 1;
+
+work_done:
+	expire += ip_rt_gc_min_interval;
+	if (expire > ip_rt_gc_timeout ||
+	    atomic_read(&ipv4_dst_ops.entries) < ipv4_dst_ops.gc_thresh)
+		expire = ip_rt_gc_timeout;
+#if RT_CACHE_DEBUG >= 2
+	printk(KERN_DEBUG "expire++ %u %d %d %d\n", expire, atomic_read(&ipv4_dst_ops.entries), goal, rover);
+#endif
+	return 0;
+}
+
+static int rt_intern_hash(unsigned hash, struct rtable * rt, struct rtable ** rp)
+{
+	struct rtable	*rth, **rthp;
+	unsigned long	now = jiffies;
+	int attempts = !in_interrupt();
+
+restart:
+	start_bh_atomic();
+
+	rthp = &rt_hash_table[hash];
+
+	while ((rth = *rthp) != NULL) {
+		if (memcmp(&rth->key, &rt->key, sizeof(rt->key)) == 0) {
+			/* Put it first */
+			*rthp = rth->u.rt_next;
+			rth->u.rt_next = rt_hash_table[hash];
+			rt_hash_table[hash] = rth;
+
+			atomic_inc(&rth->u.dst.refcnt);
+			atomic_inc(&rth->u.dst.use);
+			rth->u.dst.lastuse = now;
+			end_bh_atomic();
+
+			rt_drop(rt);
+			*rp = rth;
+			return 0;
+		}
+
+		rthp = &rth->u.rt_next;
+	}
+
+	/* Try to bind route to arp only if it is output
+	   route or unicast forwarding path.
+	 */
+	if (rt->rt_type == RTN_UNICAST || rt->key.iif == 0) {
+		if (!arp_bind_neighbour(&rt->u.dst)) {
+			end_bh_atomic();
+
+			/* Neighbour tables are full and nothing
+			   can be released. Try to shrink route cache,
+			   it is most likely it holds some neighbour records.
+			 */
+			if (attempts-- > 0) {
+				int saved_elasticity = ip_rt_gc_elasticity;
+				int saved_int = ip_rt_gc_min_interval;
+				ip_rt_gc_elasticity = 1;
+				ip_rt_gc_min_interval = 0;
+				rt_garbage_collect();
+				ip_rt_gc_min_interval = saved_int;
+				ip_rt_gc_elasticity = saved_elasticity;
+				goto restart;
+			}
+
+			rt_drop(rt);
+			if (net_ratelimit())
+				printk("neighbour table overflow\n");
+			return -ENOBUFS;
+		}
+	}
+
+	rt->u.rt_next = rt_hash_table[hash];
+#if RT_CACHE_DEBUG >= 2
+	if (rt->u.rt_next) {
+		struct rtable * trt;
+		printk("rt_cache @%02x: %08x", hash, rt->rt_dst);
+		for (trt=rt->u.rt_next; trt; trt=trt->u.rt_next)
+			printk(" . %08x", trt->rt_dst);
+		printk("\n");
+	}
+#endif
+	rt_hash_table[hash] = rt;
+	end_bh_atomic();
+	*rp = rt;
+	return 0;
+}
+
+static void rt_del(unsigned hash, struct rtable *rt)
+{
+	struct rtable **rthp;
+
+	start_bh_atomic();
+	ip_rt_put(rt);
+	for (rthp = &rt_hash_table[hash]; *rthp; rthp = &(*rthp)->u.rt_next) {
+		if (*rthp == rt) {
+			*rthp = rt->u.rt_next;
+			rt_free(rt);
+			break;
+		}
+	}
+	end_bh_atomic();
+}
+
+void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw,
+		    u32 saddr, u8 tos, struct device *dev)
+{
+	int i, k;
+	struct in_device *in_dev = dev->ip_ptr;
+	struct rtable *rth, **rthp;
+	u32  skeys[2] = { saddr, 0 };
+	int  ikeys[2] = { dev->ifindex, 0 };
+
+	tos &= IPTOS_TOS_MASK;
+
+	if (!in_dev)
+		return;
+
+	if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev)
+	    || MULTICAST(new_gw) || BADCLASS(new_gw) || ZERONET(new_gw))
+		goto reject_redirect;
+
+	if (!IN_DEV_SHARED_MEDIA(in_dev)) {
+		if (!inet_addr_onlink(in_dev, new_gw, old_gw))
+			goto reject_redirect;
+		if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev))
+			goto reject_redirect;
+	} else {
+		if (inet_addr_type(new_gw) != RTN_UNICAST)
+			goto reject_redirect;
+	}
+
+	for (i=0; i<2; i++) {
+		for (k=0; k<2; k++) {
+			unsigned hash = rt_hash_code(daddr, skeys[i]^(ikeys[k]<<5), tos);
+
+			rthp=&rt_hash_table[hash];
+
+			while ( (rth = *rthp) != NULL) {
+				struct rtable *rt;
+
+				if (rth->key.dst != daddr ||
+				    rth->key.src != skeys[i] ||
+				    rth->key.tos != tos ||
+				    rth->key.oif != ikeys[k] ||
+				    rth->key.iif != 0) {
+					rthp = &rth->u.rt_next;
+					continue;
+				}
+
+				if (rth->rt_dst != daddr ||
+				    rth->rt_src != saddr ||
+				    rth->u.dst.error ||
+				    rth->rt_gateway != old_gw ||
+				    rth->u.dst.dev != dev)
+					break;
+
+				dst_clone(&rth->u.dst);
+
+				rt = dst_alloc(sizeof(struct rtable), &ipv4_dst_ops);
+				if (rt == NULL) {
+					ip_rt_put(rth);
+					return;
+				}
+
+				/*
+				 * Copy all the information.
+				 */
+				*rt = *rth;
+				atomic_set(&rt->u.dst.refcnt, 1);
+				atomic_set(&rt->u.dst.use, 1);
+				rt->u.dst.lastuse = jiffies;
+				rt->u.dst.neighbour = NULL;
+				rt->u.dst.hh = NULL;
+				rt->u.dst.obsolete = 0;
+
+				rt->rt_flags |= RTCF_REDIRECTED;
+
+				/* Gateway is different ... */
+				rt->rt_gateway = new_gw;
+
+				/* Redirect received -> path was valid */
+				dst_confirm(&rth->u.dst);
+
+				if (!arp_bind_neighbour(&rt->u.dst) ||
+				    !(rt->u.dst.neighbour->nud_state&NUD_VALID)) {
+					if (rt->u.dst.neighbour)
+						neigh_event_send(rt->u.dst.neighbour, NULL);
+					ip_rt_put(rth);
+					rt_drop(rt);
+					break;
+				}
+
+				rt_del(hash, rth);
+
+				if (!rt_intern_hash(hash, rt, &rt))
+					ip_rt_put(rt);
+				break;
+			}
+		}
+	}
+	return;
+
+reject_redirect:
+#ifdef CONFIG_IP_ROUTE_VERBOSE
+	if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit())
+		printk(KERN_INFO "Redirect from %lX/%s to %lX ignored."
+		       "Path = %lX -> %lX, tos %02x\n",
+		       ntohl(old_gw), dev->name, ntohl(new_gw),
+		       ntohl(saddr), ntohl(daddr), tos);
+#endif
+}
+
+static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
+{
+	struct rtable *rt = (struct rtable*)dst;
+
+	if (rt != NULL) {
+		if (dst->obsolete) {
+			ip_rt_put(rt);
+			return NULL;
+		}
+		if ((rt->rt_flags&RTCF_REDIRECTED) || rt->u.dst.expires) {
+			unsigned hash = rt_hash_code(rt->key.dst, rt->key.src^(rt->key.oif<<5), rt->key.tos);
+#if RT_CACHE_DEBUG >= 1
+			printk(KERN_DEBUG "ip_rt_advice: redirect to %d.%d.%d.%d/%02x dropped\n", NIPQUAD(rt->rt_dst), rt->key.tos);
+#endif
+			rt_del(hash, rt);
+			return NULL;
+		}
+	}
+	return dst;
+}
+
+/*
+ * Algorithm:
+ *	1. The first ip_rt_redirect_number redirects are sent
+ *	   with exponential backoff, then we stop sending them at all,
+ *	   assuming that the host ignores our redirects.
+ *	2. If we did not see packets requiring redirects
+ *	   during ip_rt_redirect_silence, we assume that the host
+ *	   forgot redirected route and start to send redirects again.
+ *
+ * This algorithm is much cheaper and more intelligent than dumb load limiting
+ * in icmp.c.
+ *
+ * NOTE. Do not forget to inhibit load limiting for redirects (redundant)
+ * and "frag. need" (breaks PMTU discovery) in icmp.c.
+ */
+
+void ip_rt_send_redirect(struct sk_buff *skb)
+{
+	struct rtable *rt = (struct rtable*)skb->dst;
+	struct in_device *in_dev = (struct in_device*)rt->u.dst.dev->ip_ptr;
+
+	if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev))
+		return;
+
+	/* No redirected packets during ip_rt_redirect_silence;
+	 * reset the algorithm.
+	 */
+	if (jiffies - rt->u.dst.rate_last > ip_rt_redirect_silence)
+		rt->u.dst.rate_tokens = 0;
+
+	/* Too many ignored redirects; do not send anything
+	 * set u.dst.rate_last to the last seen redirected packet.
+	 */
+	if (rt->u.dst.rate_tokens >= ip_rt_redirect_number) {
+		rt->u.dst.rate_last = jiffies;
+		return;
+	}
+
+	/* Check for load limit; set rate_last to the latest sent
+	 * redirect.
+	 */
+	if (jiffies - rt->u.dst.rate_last > (ip_rt_redirect_load<<rt->u.dst.rate_tokens)) {
+		icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
+		rt->u.dst.rate_last = jiffies;
+		++rt->u.dst.rate_tokens;
+#ifdef CONFIG_IP_ROUTE_VERBOSE
+		if (IN_DEV_LOG_MARTIANS(in_dev) &&
+		    rt->u.dst.rate_tokens == ip_rt_redirect_number && net_ratelimit())
+			printk(KERN_WARNING "host %08x/if%d ignores redirects for %08x to %08x.\n",
+			       rt->rt_src, rt->rt_iif, rt->rt_dst, rt->rt_gateway);
+#endif
+	}
+}
+
+static int ip_error(struct sk_buff *skb)
+{
+	struct rtable *rt = (struct rtable*)skb->dst;
+	unsigned long now;
+	int code;
+
+	switch (rt->u.dst.error) {
+	case EINVAL:
+	default:
+		kfree_skb(skb);
+		return 0;
+	case EHOSTUNREACH:
+		code = ICMP_HOST_UNREACH;
+		break;
+	case ENETUNREACH:
+		code = ICMP_NET_UNREACH;
+		break;
+	case EACCES:
+		code = ICMP_PKT_FILTERED;
+		break;
+	}
+
+	now = jiffies;
+	if ((rt->u.dst.rate_tokens += (now - rt->u.dst.rate_last)) > ip_rt_error_burst)
+		rt->u.dst.rate_tokens = ip_rt_error_burst;
+	rt->u.dst.rate_last = now;
+	if (rt->u.dst.rate_tokens >= ip_rt_error_cost) {
+		rt->u.dst.rate_tokens -= ip_rt_error_cost;
+		icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
+	}
+
+	kfree_skb(skb);
+	return 0;
+} 
+
+/*
+ *	The last two values are not from the RFC but
+ *	are needed for AMPRnet AX.25 paths.
+ */
+
+static unsigned short mtu_plateau[] =
+{32000, 17914, 8166, 4352, 2002, 1492, 576, 296, 216, 128 };
+
+static __inline__ unsigned short guess_mtu(unsigned short old_mtu)
+{
+	int i;
+	
+	for (i = 0; i < sizeof(mtu_plateau)/sizeof(mtu_plateau[0]); i++)
+		if (old_mtu > mtu_plateau[i])
+			return mtu_plateau[i];
+	return 68;
+}
+
+unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu)
+{
+	int i;
+	unsigned short old_mtu = ntohs(iph->tot_len);
+	struct rtable *rth;
+	u32  skeys[2] = { iph->saddr, 0, };
+	u32  daddr = iph->daddr;
+	u8   tos = iph->tos & IPTOS_TOS_MASK;
+	unsigned short est_mtu = 0;
+
+	if (ipv4_config.no_pmtu_disc)
+		return 0;
+
+	for (i=0; i<2; i++) {
+		unsigned hash = rt_hash_code(daddr, skeys[i], tos);
+
+		for (rth = rt_hash_table[hash]; rth; rth = rth->u.rt_next) {
+			if (rth->key.dst == daddr &&
+			    rth->key.src == skeys[i] &&
+			    rth->rt_dst == daddr &&
+			    rth->rt_src == iph->saddr &&
+			    rth->key.tos == tos &&
+			    rth->key.iif == 0 &&
+			    !(rth->u.dst.mxlock&(1<<RTAX_MTU))) {
+				unsigned short mtu = new_mtu;
+
+				if (new_mtu < 68 || new_mtu >= old_mtu) {
+
+					/* BSD 4.2 compatibility hack :-( */
+					if (mtu == 0 && old_mtu >= rth->u.dst.pmtu &&
+					    old_mtu >= 68 + (iph->ihl<<2))
+						old_mtu -= iph->ihl<<2;
+
+					mtu = guess_mtu(old_mtu);
+				}
+				if (mtu <= rth->u.dst.pmtu) {
+					if (mtu < rth->u.dst.pmtu) { 
+						dst_confirm(&rth->u.dst);
+						rth->u.dst.pmtu = mtu;
+						dst_set_expires(&rth->u.dst, ip_rt_mtu_expires);
+					}
+					est_mtu = mtu;
+				}
+			}
+		}
+	}
+	return est_mtu ? : new_mtu;
+}
+
+void ip_rt_update_pmtu(struct dst_entry *dst, unsigned mtu)
+{
+	if (dst->pmtu > mtu && mtu >= 68 &&
+	    !(dst->mxlock&(1<<RTAX_MTU))) {
+		dst->pmtu = mtu;
+		dst_set_expires(dst, ip_rt_mtu_expires);
+	}
+}
+
+static struct dst_entry * ipv4_dst_check(struct dst_entry * dst, u32 cookie)
+{
+	dst_release(dst);
+	return NULL;
+}
+
+static struct dst_entry * ipv4_dst_reroute(struct dst_entry * dst,
+					   struct sk_buff *skb)
+{
+	return NULL;
+}
+
+static void ipv4_link_failure(struct sk_buff *skb)
+{
+	struct rtable *rt;
+
+	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
+
+	rt = (struct rtable *) skb->dst;
+	if (rt)
+		dst_set_expires(&rt->u.dst, 0);
+}
+
+static int ip_rt_bug(struct sk_buff *skb)
+{
+	printk(KERN_DEBUG "ip_rt_bug: %08x -> %08x, %s\n", skb->nh.iph->saddr,
+	       skb->nh.iph->daddr, skb->dev ? skb->dev->name : "?");
+	kfree_skb(skb);
+	return 0;
+}
+
+/*
+   We do not cache source address of outgoing interface,
+   because it is used only by IP RR, TS and SRR options,
+   so that it out of fast path.
+
+   BTW remember: "addr" is allowed to be not aligned
+   in IP options!
+ */
+
+void ip_rt_get_source(u8 *addr, struct rtable *rt)
+{
+	u32 src;
+	struct fib_result res;
+
+	if (rt->key.iif == 0)
+		src = rt->rt_src;
+	else if (fib_lookup(&rt->key, &res) == 0 && res.type != RTN_NAT)
+		src = FIB_RES_PREFSRC(res);
+	else
+		src = inet_select_addr(rt->u.dst.dev, rt->rt_gateway, RT_SCOPE_UNIVERSE);
+	memcpy(addr, &src, 4);
+}
+
+#ifdef CONFIG_NET_CLS_ROUTE
+static void set_class_tag(struct rtable *rt, u32 tag)
+{
+	if (!(rt->u.dst.tclassid&0xFFFF))
+		rt->u.dst.tclassid |= tag&0xFFFF;
+	if (!(rt->u.dst.tclassid&0xFFFF0000))
+		rt->u.dst.tclassid |= tag&0xFFFF0000;
+}
+#endif
+
+static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
+{
+	struct fib_info *fi = res->fi;
+
+	if (fi) {
+		if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
+			rt->rt_gateway = FIB_RES_GW(*res);
+		rt->u.dst.mxlock = fi->fib_metrics[RTAX_LOCK-1];
+		rt->u.dst.pmtu = fi->fib_mtu;
+		if (fi->fib_mtu == 0) {
+			rt->u.dst.pmtu = rt->u.dst.dev->mtu;
+			if (rt->u.dst.pmtu > IP_MAX_MTU)
+				rt->u.dst.pmtu = IP_MAX_MTU;
+			if (rt->u.dst.pmtu < 68)
+				rt->u.dst.pmtu = 68;
+			if (rt->u.dst.mxlock&(1<<RTAX_MTU) &&
+			    rt->rt_gateway != rt->rt_dst &&
+			    rt->u.dst.pmtu > 576)
+				rt->u.dst.pmtu = 576;
+		}
+		rt->u.dst.window= fi->fib_window ? : 0;
+		rt->u.dst.rtt	= fi->fib_rtt ? : TCP_TIMEOUT_INIT;
+#ifdef CONFIG_NET_CLS_ROUTE
+		rt->u.dst.tclassid = FIB_RES_NH(*res).nh_tclassid;
+#endif
+	} else {
+		rt->u.dst.pmtu	= rt->u.dst.dev->mtu;
+		if (rt->u.dst.pmtu > IP_MAX_MTU)
+			rt->u.dst.pmtu = IP_MAX_MTU;
+		if (rt->u.dst.pmtu < 68)
+			rt->u.dst.pmtu = 68;
+		rt->u.dst.window= 0;
+		rt->u.dst.rtt	= TCP_TIMEOUT_INIT;
+	}
+#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+	set_class_tag(rt, fib_rules_tclass(res));
+#endif
+	set_class_tag(rt, itag);
+#endif
+        rt->rt_type = res->type;
+}
+
+static int
+ip_route_input_mc(struct sk_buff *skb, u32 daddr, u32 saddr,
+		  u8 tos, struct device *dev, int our)
+{
+	unsigned hash;
+	struct rtable *rth;
+	u32 spec_dst;
+	struct in_device *in_dev = dev->ip_ptr;
+	u32 itag = 0;
+
+	/* Primary sanity checks. */
+
+	if (MULTICAST(saddr) || BADCLASS(saddr) || LOOPBACK(saddr) ||
+	    in_dev == NULL || skb->protocol != __constant_htons(ETH_P_IP))
+		return -EINVAL;
+
+	if (ZERONET(saddr)) {
+		if (!LOCAL_MCAST(daddr))
+			return -EINVAL;
+		spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
+	} else if (fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, &itag) < 0)
+		return -EINVAL;
+
+	rth = dst_alloc(sizeof(struct rtable), &ipv4_dst_ops);
+	if (!rth)
+		return -ENOBUFS;
+
+	rth->u.dst.output= ip_rt_bug;
+
+	atomic_set(&rth->u.dst.use, 1);
+	rth->key.dst	= daddr;
+	rth->rt_dst	= daddr;
+	rth->key.tos	= tos;
+#ifdef CONFIG_IP_ROUTE_FWMARK
+	rth->key.fwmark	= skb->fwmark;
+#endif
+	rth->key.src	= saddr;
+	rth->rt_src	= saddr;
+#ifdef CONFIG_IP_ROUTE_NAT
+	rth->rt_dst_map	= daddr;
+	rth->rt_src_map	= saddr;
+#endif
+#ifdef CONFIG_NET_CLS_ROUTE
+	rth->u.dst.tclassid = itag;
+#endif
+	rth->rt_iif	=
+	rth->key.iif	= dev->ifindex;
+	rth->u.dst.dev	= &loopback_dev;
+	rth->key.oif	= 0;
+	rth->rt_gateway	= daddr;
+	rth->rt_spec_dst= spec_dst;
+	rth->rt_type	= RTN_MULTICAST;
+	rth->rt_flags	= RTCF_MULTICAST;
+	if (our) {
+		rth->u.dst.input= ip_local_deliver;
+		rth->rt_flags |= RTCF_LOCAL;
+	}
+
+#ifdef CONFIG_IP_MROUTE
+	if (!LOCAL_MCAST(daddr) && IN_DEV_MFORWARD(in_dev))
+		rth->u.dst.input = ip_mr_input;
+#endif
+
+	hash = rt_hash_code(daddr, saddr^(dev->ifindex<<5), tos);
+	return rt_intern_hash(hash, rth, (struct rtable**)&skb->dst);
+}
+
+/*
+ *	NOTE. We drop all the packets that has local source
+ *	addresses, because every properly looped back packet
+ *	must have correct destination already attached by output routine.
+ *
+ *	Such approach solves two big problems:
+ *	1. Not simplex devices are handled properly.
+ *	2. IP spoofing attempts are filtered with 100% of guarantee.
+ */
+
+int ip_route_input_slow(struct sk_buff *skb, u32 daddr, u32 saddr,
+			u8 tos, struct device *dev)
+{
+	struct rt_key	key;
+	struct fib_result res;
+	struct in_device *in_dev = dev->ip_ptr;
+	struct in_device *out_dev;
+	unsigned	flags = 0;
+	u32		itag = 0;
+	struct rtable * rth;
+	unsigned	hash;
+	u32		spec_dst;
+	int		err = -EINVAL;
+
+	/*
+	 *	IP on this device is disabled.
+	 */
+
+	if (!in_dev)
+		return -EINVAL;
+
+	key.dst = daddr;
+	key.src = saddr;
+	key.tos = tos;
+#ifdef CONFIG_IP_ROUTE_FWMARK
+	key.fwmark = skb->fwmark;
+#endif
+	key.iif = dev->ifindex;
+	key.oif = 0;
+	key.scope = RT_SCOPE_UNIVERSE;
+
+	hash = rt_hash_code(daddr, saddr^(key.iif<<5), tos);
+
+	/* Check for the most weird martians, which can be not detected
+	   by fib_lookup.
+	 */
+
+	if (MULTICAST(saddr) || BADCLASS(saddr) || LOOPBACK(saddr))
+		goto martian_source;
+
+	if (daddr == 0xFFFFFFFF || (saddr == 0 && daddr == 0))
+		goto brd_input;
+
+	/* Accept zero addresses only to limited broadcast;
+	 * I even do not know to fix it or not. Waiting for complains :-)
+	 */
+	if (ZERONET(saddr))
+		goto martian_source;
+
+	if (BADCLASS(daddr) || ZERONET(daddr) || LOOPBACK(daddr))
+		goto martian_destination;
+
+	/*
+	 *	Now we are ready to route packet.
+	 */
+	if ((err = fib_lookup(&key, &res))) {
+		if (!IN_DEV_FORWARD(in_dev))
+			return -EINVAL;
+		goto no_route;
+	}
+
+#ifdef CONFIG_IP_ROUTE_NAT
+	/* Policy is applied before mapping destination,
+	   but rerouting after map should be made with old source.
+	 */
+
+	if (1) {
+		u32 src_map = saddr;
+		if (res.r)
+			src_map = fib_rules_policy(saddr, &res, &flags);
+
+		if (res.type == RTN_NAT) {
+			key.dst = fib_rules_map_destination(daddr, &res);
+			if (fib_lookup(&key, &res) || res.type != RTN_UNICAST)
+				return -EINVAL;
+			flags |= RTCF_DNAT;
+		}
+		key.src = src_map;
+	}
+#endif
+
+	if (res.type == RTN_BROADCAST)
+		goto brd_input;
+
+	if (res.type == RTN_LOCAL) {
+		int result;
+		result = fib_validate_source(saddr, daddr, tos, loopback_dev.ifindex,
+					     dev, &spec_dst, &itag);
+		if (result < 0)
+			goto martian_source;
+		if (result)
+			flags |= RTCF_DIRECTSRC;
+		spec_dst = daddr;
+		goto local_input;
+	}
+
+	if (!IN_DEV_FORWARD(in_dev))
+		return -EINVAL;
+	if (res.type != RTN_UNICAST)
+		goto martian_destination;
+
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+	if (res.fi->fib_nhs > 1 && key.oif == 0)
+		fib_select_multipath(&key, &res);
+#endif
+	out_dev = FIB_RES_DEV(res)->ip_ptr;
+	if (out_dev == NULL) {
+		if (net_ratelimit())
+			printk(KERN_CRIT "Bug in ip_route_input_slow(). Please, report\n");
+		return -EINVAL;
+	}
+
+	err = fib_validate_source(saddr, daddr, tos, FIB_RES_OIF(res), dev, &spec_dst, &itag);
+	if (err < 0)
+		goto martian_source;
+
+	if (err)
+		flags |= RTCF_DIRECTSRC;
+
+	if (out_dev == in_dev && err && !(flags&(RTCF_NAT|RTCF_MASQ)) &&
+	    (IN_DEV_SHARED_MEDIA(out_dev)
+	     || inet_addr_onlink(out_dev, saddr, FIB_RES_GW(res))))
+		flags |= RTCF_DOREDIRECT;
+
+	if (skb->protocol != __constant_htons(ETH_P_IP)) {
+		/* Not IP (i.e. ARP). Do not create route, if it is
+		 * invalid for proxy arp. DNAT routes are always valid.
+		 */
+		if (out_dev == in_dev && !(flags&RTCF_DNAT))
+			return -EINVAL;
+	}
+
+	rth = dst_alloc(sizeof(struct rtable), &ipv4_dst_ops);
+	if (!rth)
+		return -ENOBUFS;
+
+	atomic_set(&rth->u.dst.use, 1);
+	rth->key.dst	= daddr;
+	rth->rt_dst	= daddr;
+	rth->key.tos	= tos;
+#ifdef CONFIG_IP_ROUTE_FWMARK
+	rth->key.fwmark	= skb->fwmark;
+#endif
+	rth->key.src	= saddr;
+	rth->rt_src	= saddr;
+	rth->rt_gateway	= daddr;
+#ifdef CONFIG_IP_ROUTE_NAT
+	rth->rt_src_map	= key.src;
+	rth->rt_dst_map	= key.dst;
+	if (flags&RTCF_DNAT)
+		rth->rt_gateway	= key.dst;
+#endif
+	rth->rt_iif 	=
+	rth->key.iif	= dev->ifindex;
+	rth->u.dst.dev	= out_dev->dev;
+	rth->key.oif 	= 0;
+	rth->rt_spec_dst= spec_dst;
+
+	rth->u.dst.input = ip_forward;
+	rth->u.dst.output = ip_output;
+
+	rt_set_nexthop(rth, &res, itag);
+
+	rth->rt_flags = flags;
+
+#ifdef CONFIG_NET_FASTROUTE
+	if (netdev_fastroute && !(flags&(RTCF_NAT|RTCF_MASQ|RTCF_DOREDIRECT))) {
+		struct device *odev = rth->u.dst.dev;
+		if (odev != dev &&
+		    dev->accept_fastpath &&
+		    odev->mtu >= dev->mtu &&
+		    dev->accept_fastpath(dev, &rth->u.dst) == 0)
+			rth->rt_flags |= RTCF_FAST;
+	}
+#endif
+
+	return rt_intern_hash(hash, rth, (struct rtable**)&skb->dst);
+
+brd_input:
+	if (skb->protocol != __constant_htons(ETH_P_IP))
+		return -EINVAL;
+
+	if (ZERONET(saddr)) {
+		spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
+	} else {
+		err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, &itag);
+		if (err < 0)
+			goto martian_source;
+		if (err)
+			flags |= RTCF_DIRECTSRC;
+	}
+	flags |= RTCF_BROADCAST;
+	res.type = RTN_BROADCAST;
+
+local_input:
+	rth = dst_alloc(sizeof(struct rtable), &ipv4_dst_ops);
+	if (!rth)
+		return -ENOBUFS;
+
+	rth->u.dst.output= ip_rt_bug;
+
+	atomic_set(&rth->u.dst.use, 1);
+	rth->key.dst	= daddr;
+	rth->rt_dst	= daddr;
+	rth->key.tos	= tos;
+#ifdef CONFIG_IP_ROUTE_FWMARK
+	rth->key.fwmark	= skb->fwmark;
+#endif
+	rth->key.src	= saddr;
+	rth->rt_src	= saddr;
+#ifdef CONFIG_IP_ROUTE_NAT
+	rth->rt_dst_map	= key.dst;
+	rth->rt_src_map	= key.src;
+#endif
+#ifdef CONFIG_NET_CLS_ROUTE
+	rth->u.dst.tclassid = itag;
+#endif
+	rth->rt_iif	=
+	rth->key.iif	= dev->ifindex;
+	rth->u.dst.dev	= &loopback_dev;
+	rth->key.oif 	= 0;
+	rth->rt_gateway	= daddr;
+	rth->rt_spec_dst= spec_dst;
+	rth->u.dst.input= ip_local_deliver;
+	rth->rt_flags 	= flags|RTCF_LOCAL;
+	if (res.type == RTN_UNREACHABLE) {
+		rth->u.dst.input= ip_error;
+		rth->u.dst.error= -err;
+		rth->rt_flags 	&= ~RTCF_LOCAL;
+	}
+	rth->rt_type	= res.type;
+	return rt_intern_hash(hash, rth, (struct rtable**)&skb->dst);
+
+no_route:
+	spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
+	res.type = RTN_UNREACHABLE;
+	goto local_input;
+
+	/*
+	 *	Do not cache martian addresses: they should be logged (RFC1812)
+	 */
+martian_destination:
+#ifdef CONFIG_IP_ROUTE_VERBOSE
+	if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit())
+		printk(KERN_WARNING "martian destination %08x from %08x, dev %s\n", daddr, saddr, dev->name);
+#endif
+	return -EINVAL;
+
+martian_source:
+#ifdef CONFIG_IP_ROUTE_VERBOSE
+	if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) {
+		/*
+		 *	RFC1812 recommenadtion, if source is martian,
+		 *	the only hint is MAC header.
+		 */
+		printk(KERN_WARNING "martian source %08x for %08x, dev %s\n", saddr, daddr, dev->name);
+		if (dev->hard_header_len) {
+			int i;
+			unsigned char *p = skb->mac.raw;
+			printk(KERN_WARNING "ll header:");
+			for (i=0; i<dev->hard_header_len; i++, p++)
+				printk(" %02x", *p);
+			printk("\n");
+		}
+	}
+#endif
+	return -EINVAL;
+}
+
+int ip_route_input(struct sk_buff *skb, u32 daddr, u32 saddr,
+		   u8 tos, struct device *dev)
+{
+	struct rtable * rth;
+	unsigned	hash;
+	int iif = dev->ifindex;
+
+	tos &= IPTOS_TOS_MASK;
+	hash = rt_hash_code(daddr, saddr^(iif<<5), tos);
+
+	for (rth=rt_hash_table[hash]; rth; rth=rth->u.rt_next) {
+		if (rth->key.dst == daddr &&
+		    rth->key.src == saddr &&
+		    rth->key.iif == iif &&
+		    rth->key.oif == 0 &&
+#ifdef CONFIG_IP_ROUTE_FWMARK
+		    rth->key.fwmark == skb->fwmark &&
+#endif
+		    rth->key.tos == tos) {
+			rth->u.dst.lastuse = jiffies;
+			atomic_inc(&rth->u.dst.use);
+			atomic_inc(&rth->u.dst.refcnt);
+			skb->dst = (struct dst_entry*)rth;
+			return 0;
+		}
+	}
+
+	/* Multicast recognition logic is moved from route cache to here.
+	   The problem was that too many Ethernet cards have broken/missing
+	   hardware multicast filters :-( As result the host on multicasting
+	   network acquires a lot of useless route cache entries, sort of
+	   SDR messages from all the world. Now we try to get rid of them.
+	   Really, provided software IP multicast filter is organized
+	   reasonably (at least, hashed), it does not result in a slowdown
+	   comparing with route cache reject entries.
+	   Note, that multicast routers are not affected, because
+	   route cache entry is created eventually.
+	 */
+	if (MULTICAST(daddr)) {
+		int our = ip_check_mc(dev, daddr);
+		if (!our
+#ifdef CONFIG_IP_MROUTE
+		    && (LOCAL_MCAST(daddr) || !dev->ip_ptr ||
+			!IN_DEV_MFORWARD((struct in_device*)dev->ip_ptr))
+#endif
+		    ) return -EINVAL;
+		return ip_route_input_mc(skb, daddr, saddr, tos, dev, our);
+	}
+	return ip_route_input_slow(skb, daddr, saddr, tos, dev);
+}
+
+/*
+ * Major route resolver routine.
+ */
+
+int ip_route_output_slow(struct rtable **rp, u32 daddr, u32 saddr, u32 tos, int oif)
+{
+	struct rt_key key;
+	struct fib_result res;
+	unsigned flags = 0;
+	struct rtable *rth;
+	struct device *dev_out = NULL;
+	unsigned hash;
+#ifdef CONFIG_IP_TRANSPARENT_PROXY
+	u32 nochecksrc = (tos & RTO_TPROXY);
+#endif
+
+	tos &= IPTOS_TOS_MASK|RTO_ONLINK;
+	key.dst = daddr;
+	key.src = saddr;
+	key.tos = tos&IPTOS_TOS_MASK;
+	key.iif = loopback_dev.ifindex;
+	key.oif = oif;
+	key.scope = (tos&RTO_ONLINK) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE;
+	res.fi = NULL;
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+	res.r = NULL;
+#endif
+
+	if (saddr) {
+		if (MULTICAST(saddr) || BADCLASS(saddr) || ZERONET(saddr))
+			return -EINVAL;
+
+		/* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
+		dev_out = ip_dev_find(saddr);
+#ifdef CONFIG_IP_TRANSPARENT_PROXY
+		/* If address is not local, test for transparent proxy flag;
+		   if address is local --- clear the flag.
+		 */
+		if (dev_out == NULL) {
+			if (nochecksrc == 0 || inet_addr_type(saddr) != RTN_UNICAST)
+				return -EINVAL;
+			flags |= RTCF_TPROXY;
+		}
+#else
+		if (dev_out == NULL)
+			return -EINVAL;
+#endif
+
+		/* I removed check for oif == dev_out->oif here.
+		   It was wrong by three reasons:
+		   1. ip_dev_find(saddr) can return wrong iface, if saddr is
+		      assigned to multiple interfaces.
+		   2. Moreover, we are allowed to send packets with saddr
+		      of another iface. --ANK
+		 */
+
+		if (oif == 0 &&
+#ifdef CONFIG_IP_TRANSPARENT_PROXY
+			dev_out &&
+#endif
+			(MULTICAST(daddr) || daddr == 0xFFFFFFFF)) {
+			/* Special hack: user can direct multicasts
+			   and limited broadcast via necessary interface
+			   without fiddling with IP_MULTICAST_IF or IP_PKTINFO.
+			   This hack is not just for fun, it allows
+			   vic,vat and friends to work.
+			   They bind socket to loopback, set ttl to zero
+			   and expect that it will work.
+			   From the viewpoint of routing cache they are broken,
+			   because we are not allowed to build multicast path
+			   with loopback source addr (look, routing cache
+			   cannot know, that ttl is zero, so that packet
+			   will not leave this host and route is valid).
+			   Luckily, this hack is good workaround.
+			 */
+
+			key.oif = dev_out->ifindex;
+			goto make_route;
+		}
+		dev_out = NULL;
+	}
+	if (oif) {
+		dev_out = dev_get_by_index(oif);
+		if (dev_out == NULL)
+			return -ENODEV;
+		if (dev_out->ip_ptr == NULL)
+			return -ENODEV;	/* Wrong error code */
+
+		if (LOCAL_MCAST(daddr) || daddr == 0xFFFFFFFF) {
+			if (!key.src)
+				key.src = inet_select_addr(dev_out, 0, RT_SCOPE_LINK);
+			goto make_route;
+		}
+		if (!key.src) {
+			if (MULTICAST(daddr))
+				key.src = inet_select_addr(dev_out, 0, key.scope);
+			else if (!daddr)
+				key.src = inet_select_addr(dev_out, 0, RT_SCOPE_HOST);
+		}
+	}
+
+	if (!key.dst) {
+		key.dst = key.src;
+		if (!key.dst)
+			key.dst = key.src = htonl(INADDR_LOOPBACK);
+		dev_out = &loopback_dev;
+		key.oif = loopback_dev.ifindex;
+		res.type = RTN_LOCAL;
+		flags |= RTCF_LOCAL;
+		goto make_route;
+	}
+
+	if (fib_lookup(&key, &res)) {
+		res.fi = NULL;
+		if (oif) {
+			/* Apparently, routing tables are wrong. Assume,
+			   that the destination is on link.
+
+			   WHY? DW.
+			   Because we are allowed to send to iface
+			   even if it has NO routes and NO assigned
+			   addresses. When oif is specified, routing
+			   tables are looked up with only one purpose:
+			   to catch if destination is gatewayed, rather than
+			   direct. Moreover, if MSG_DONTROUTE is set,
+			   we send packet, ignoring both routing tables
+			   and ifaddr state. --ANK
+
+
+			   We could make it even if oif is unknown,
+			   likely IPv6, but we do not.
+			 */
+
+			if (key.src == 0)
+				key.src = inet_select_addr(dev_out, 0, RT_SCOPE_LINK);
+			res.type = RTN_UNICAST;
+			goto make_route;
+		}
+		return -ENETUNREACH;
+	}
+
+	if (res.type == RTN_NAT)
+		return -EINVAL;
+
+	if (res.type == RTN_LOCAL) {
+		if (!key.src)
+			key.src = key.dst;
+		dev_out = &loopback_dev;
+		key.oif = dev_out->ifindex;
+		res.fi = NULL;
+		flags |= RTCF_LOCAL;
+		goto make_route;
+	}
+
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+	if (res.fi->fib_nhs > 1 && key.oif == 0)
+		fib_select_multipath(&key, &res);
+	else
+#endif
+	if (res.prefixlen==0 && res.type == RTN_UNICAST && key.oif == 0)
+		fib_select_default(&key, &res);
+
+	if (!key.src)
+		key.src = FIB_RES_PREFSRC(res);
+
+	dev_out = FIB_RES_DEV(res);
+	key.oif = dev_out->ifindex;
+
+make_route:
+	if (LOOPBACK(key.src) && !(dev_out->flags&IFF_LOOPBACK))
+		return -EINVAL;
+
+	if (key.dst == 0xFFFFFFFF)
+		res.type = RTN_BROADCAST;
+	else if (MULTICAST(key.dst))
+		res.type = RTN_MULTICAST;
+	else if (BADCLASS(key.dst) || ZERONET(key.dst))
+		return -EINVAL;
+
+	if (dev_out->flags&IFF_LOOPBACK)
+		flags |= RTCF_LOCAL;
+
+	if (res.type == RTN_BROADCAST) {
+		flags |= RTCF_BROADCAST|RTCF_LOCAL;
+		res.fi = NULL;
+	} else if (res.type == RTN_MULTICAST) {
+		flags |= RTCF_MULTICAST|RTCF_LOCAL;
+		if (!ip_check_mc(dev_out, daddr))
+			flags &= ~RTCF_LOCAL;
+		/* If multicast route do not exist use
+		   default one, but do not gateway in this case.
+		   Yes, it is hack.
+		 */
+		if (res.fi && res.prefixlen < 4)
+			res.fi = NULL;
+	}
+
+	rth = dst_alloc(sizeof(struct rtable), &ipv4_dst_ops);
+	if (!rth)
+		return -ENOBUFS;
+
+	atomic_set(&rth->u.dst.use, 1);
+	rth->key.dst	= daddr;
+	rth->key.tos	= tos;
+	rth->key.src	= saddr;
+	rth->key.iif	= 0;
+	rth->key.oif	= oif;
+	rth->rt_dst	= key.dst;
+	rth->rt_src	= key.src;
+#ifdef CONFIG_IP_ROUTE_NAT
+	rth->rt_dst_map	= key.dst;
+	rth->rt_src_map	= key.src;
+#endif
+	rth->rt_iif	= oif ? : dev_out->ifindex;
+	rth->u.dst.dev	= dev_out;
+	rth->rt_gateway = key.dst;
+	rth->rt_spec_dst= key.src;
+
+	rth->u.dst.output=ip_output;
+
+	if (flags&RTCF_LOCAL) {
+		rth->u.dst.input = ip_local_deliver;
+		rth->rt_spec_dst = key.dst;
+	}
+	if (flags&(RTCF_BROADCAST|RTCF_MULTICAST)) {
+		rth->rt_spec_dst = key.src;
+		if (flags&RTCF_LOCAL && !(dev_out->flags&IFF_LOOPBACK))
+			rth->u.dst.output = ip_mc_output;
+#ifdef CONFIG_IP_MROUTE
+		if (res.type == RTN_MULTICAST && dev_out->ip_ptr) {
+			struct in_device *in_dev = dev_out->ip_ptr;
+			if (IN_DEV_MFORWARD(in_dev) && !LOCAL_MCAST(daddr)) {
+				rth->u.dst.input = ip_mr_input;
+				rth->u.dst.output = ip_mc_output;
+			}
+		}
+#endif
+	}
+
+	rt_set_nexthop(rth, &res, 0);
+
+	rth->rt_flags = flags;
+
+	hash = rt_hash_code(daddr, saddr^(oif<<5), tos);
+	return rt_intern_hash(hash, rth, rp);
+}
+
+int ip_route_output(struct rtable **rp, u32 daddr, u32 saddr, u32 tos, int oif)
+{
+	unsigned hash;
+	struct rtable *rth;
+
+	hash = rt_hash_code(daddr, saddr^(oif<<5), tos);
+
+	start_bh_atomic();
+	for (rth=rt_hash_table[hash]; rth; rth=rth->u.rt_next) {
+		if (rth->key.dst == daddr &&
+		    rth->key.src == saddr &&
+		    rth->key.iif == 0 &&
+		    rth->key.oif == oif &&
+#ifndef CONFIG_IP_TRANSPARENT_PROXY
+		    rth->key.tos == tos
+#else
+		    !((rth->key.tos^tos)&(IPTOS_TOS_MASK|RTO_ONLINK)) &&
+		    ((tos&RTO_TPROXY) || !(rth->rt_flags&RTCF_TPROXY))
+#endif
+		) {
+			rth->u.dst.lastuse = jiffies;
+			atomic_inc(&rth->u.dst.use);
+			atomic_inc(&rth->u.dst.refcnt);
+			end_bh_atomic();
+			*rp = rth;
+			return 0;
+		}
+	}
+	end_bh_atomic();
+
+	return ip_route_output_slow(rp, daddr, saddr, tos, oif);
+}
+
+#ifdef CONFIG_RTNETLINK
+
+static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, int nowait)
+{
+	struct rtable *rt = (struct rtable*)skb->dst;
+	struct rtmsg *r;
+	struct nlmsghdr  *nlh;
+	unsigned char	 *b = skb->tail;
+	struct rta_cacheinfo ci;
+#ifdef CONFIG_IP_MROUTE
+	struct rtattr *eptr;
+#endif
+	struct rtattr *mx;
+
+	nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*r));
+	r = NLMSG_DATA(nlh);
+	nlh->nlmsg_flags = (nowait && pid) ? NLM_F_MULTI : 0;
+	r->rtm_family = AF_INET;
+	r->rtm_dst_len = 32;
+	r->rtm_src_len = 0;
+	r->rtm_tos = rt->key.tos;
+	r->rtm_table = RT_TABLE_MAIN;
+	r->rtm_type = rt->rt_type;
+	r->rtm_scope = RT_SCOPE_UNIVERSE;
+	r->rtm_protocol = RTPROT_UNSPEC;
+	r->rtm_flags = (rt->rt_flags&~0xFFFF) | RTM_F_CLONED;
+	if (rt->rt_flags & RTCF_NOTIFY)
+		r->rtm_flags |= RTM_F_NOTIFY;
+	RTA_PUT(skb, RTA_DST, 4, &rt->rt_dst);
+	if (rt->key.src) {
+		r->rtm_src_len = 32;
+		RTA_PUT(skb, RTA_SRC, 4, &rt->key.src);
+	}
+	if (rt->u.dst.dev)
+		RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->u.dst.dev->ifindex);
+#ifdef CONFIG_NET_CLS_ROUTE
+	if (rt->u.dst.tclassid)
+		RTA_PUT(skb, RTA_FLOW, 4, &rt->u.dst.tclassid);
+#endif
+	if (rt->key.iif)
+		RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_spec_dst);
+	else if (rt->rt_src != rt->key.src)
+		RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_src);
+	if (rt->rt_dst != rt->rt_gateway)
+		RTA_PUT(skb, RTA_GATEWAY, 4, &rt->rt_gateway);
+	mx = (struct rtattr*)skb->tail;
+	RTA_PUT(skb, RTA_METRICS, 0, NULL);
+	if (rt->u.dst.mxlock)
+		RTA_PUT(skb, RTAX_LOCK, sizeof(unsigned), &rt->u.dst.mxlock);
+	if (rt->u.dst.pmtu)
+		RTA_PUT(skb, RTAX_MTU, sizeof(unsigned), &rt->u.dst.pmtu);
+	if (rt->u.dst.window)
+		RTA_PUT(skb, RTAX_WINDOW, sizeof(unsigned), &rt->u.dst.window);
+	if (rt->u.dst.rtt)
+		RTA_PUT(skb, RTAX_RTT, sizeof(unsigned), &rt->u.dst.rtt);
+	mx->rta_len = skb->tail - (u8*)mx;
+	if (mx->rta_len == RTA_LENGTH(0))
+		skb_trim(skb, (u8*)mx - skb->data);
+	ci.rta_lastuse = jiffies - rt->u.dst.lastuse;
+	ci.rta_used = atomic_read(&rt->u.dst.refcnt);
+	ci.rta_clntref = atomic_read(&rt->u.dst.use);
+	if (rt->u.dst.expires)
+		ci.rta_expires = rt->u.dst.expires - jiffies;
+	else
+		ci.rta_expires = 0;
+	ci.rta_error = rt->u.dst.error;
+#ifdef CONFIG_IP_MROUTE
+	eptr = (struct rtattr*)skb->tail;
+#endif
+	RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
+	if (rt->key.iif) {
+#ifdef CONFIG_IP_MROUTE
+		u32 dst = rt->rt_dst;
+
+		if (MULTICAST(dst) && !LOCAL_MCAST(dst) && ipv4_devconf.mc_forwarding) {
+			int err = ipmr_get_route(skb, r, nowait);
+			if (err <= 0) {
+				if (!nowait) {
+					if (err == 0)
+						return 0;
+					goto nlmsg_failure;
+				} else {
+					if (err == -EMSGSIZE)
+						goto nlmsg_failure;
+					((struct rta_cacheinfo*)RTA_DATA(eptr))->rta_error = err;
+				}
+			}
+		} else
+#endif
+		{
+			RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->key.iif);
+		}
+	}
+
+	nlh->nlmsg_len = skb->tail - b;
+	return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
+{
+	struct rtattr **rta = arg;
+	struct rtmsg *rtm = NLMSG_DATA(nlh);
+	struct rtable *rt = NULL;
+	u32 dst = 0;
+	u32 src = 0;
+	int iif = 0;
+	int err;
+	struct sk_buff *skb;
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (skb == NULL)
+		return -ENOBUFS;
+
+	/* Reserve room for dummy headers, this skb can pass
+	   through good chunk of routing engine.
+	 */
+	skb->mac.raw = skb->data;
+	skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
+
+	if (rta[RTA_SRC-1])
+		memcpy(&src, RTA_DATA(rta[RTA_SRC-1]), 4);
+	if (rta[RTA_DST-1])
+		memcpy(&dst, RTA_DATA(rta[RTA_DST-1]), 4);
+	if (rta[RTA_IIF-1])
+		memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
+
+	if (iif) {
+		struct device *dev;
+		dev = dev_get_by_index(iif);
+		if (!dev)
+			return -ENODEV;
+		skb->protocol = __constant_htons(ETH_P_IP);
+		skb->dev = dev;
+		start_bh_atomic();
+		err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
+		end_bh_atomic();
+		rt = (struct rtable*)skb->dst;
+		if (!err && rt->u.dst.error)
+			err = -rt->u.dst.error;
+	} else {
+		int oif = 0;
+		if (rta[RTA_OIF-1])
+			memcpy(&oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
+		err = ip_route_output(&rt, dst, src, rtm->rtm_tos, oif);
+	}
+	if (err) {
+		kfree_skb(skb);
+		return err;
+	}
+
+	skb->dst = &rt->u.dst;
+	if (rtm->rtm_flags & RTM_F_NOTIFY)
+		rt->rt_flags |= RTCF_NOTIFY;
+
+	NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
+
+	err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, RTM_NEWROUTE, 0);
+	if (err == 0)
+		return 0;
+	if (err < 0)
+		return -EMSGSIZE;
+
+	err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
+	if (err < 0)
+		return err;
+	return 0;
+}
+
+
+int ip_rt_dump(struct sk_buff *skb,  struct netlink_callback *cb)
+{
+	struct rtable *rt;
+	int h, s_h;
+	int idx, s_idx;
+
+	s_h = cb->args[0];
+	s_idx = idx = cb->args[1];
+	for (h=0; h < RT_HASH_DIVISOR; h++) {
+		if (h < s_h) continue;
+		if (h > s_h)
+			s_idx = 0;
+		start_bh_atomic();
+		for (rt = rt_hash_table[h], idx = 0; rt; rt = rt->u.rt_next, idx++) {
+			if (idx < s_idx)
+				continue;
+			skb->dst = dst_clone(&rt->u.dst);
+			if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid,
+					 cb->nlh->nlmsg_seq, RTM_NEWROUTE, 1) <= 0) {
+				dst_release(xchg(&skb->dst, NULL));
+				end_bh_atomic();
+				goto done;
+			}
+			dst_release(xchg(&skb->dst, NULL));
+		}
+		end_bh_atomic();
+	}
+
+done:
+	cb->args[0] = h;
+	cb->args[1] = idx;
+	return skb->len;
+}
+
+#endif /* CONFIG_RTNETLINK */
+
+void ip_rt_multicast_event(struct in_device *in_dev)
+{
+	rt_cache_flush(0);
+}
+
+
+
+#ifdef CONFIG_SYSCTL
+
+static int flush_delay;
+
+static
+int ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
+			      void *buffer, size_t *lenp)
+{
+	if (write) {
+		proc_dointvec(ctl, write, filp, buffer, lenp);
+		rt_cache_flush(flush_delay);
+		return 0;
+	} else
+		return -EINVAL;
+}
+
+static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table, int *name, int nlen,
+			 void *oldval, size_t *oldlenp,
+			 void *newval, size_t newlen, 
+			 void **context)
+{
+	int delay;
+	if (newlen != sizeof(int))
+		return -EINVAL;
+	if (get_user(delay,(int *)newval))
+		return -EFAULT; 
+	rt_cache_flush(delay); 
+	return 0;
+}
+
+ctl_table ipv4_route_table[] = {
+        {NET_IPV4_ROUTE_FLUSH, "flush",
+         &flush_delay, sizeof(int), 0644, NULL,
+         &ipv4_sysctl_rtcache_flush, &ipv4_sysctl_rtcache_flush_strategy },
+	{NET_IPV4_ROUTE_MIN_DELAY, "min_delay",
+         &ip_rt_min_delay, sizeof(int), 0644, NULL,
+         &proc_dointvec_jiffies, &sysctl_jiffies},
+	{NET_IPV4_ROUTE_MAX_DELAY, "max_delay",
+         &ip_rt_max_delay, sizeof(int), 0644, NULL,
+         &proc_dointvec_jiffies, &sysctl_jiffies},
+	{NET_IPV4_ROUTE_GC_THRESH, "gc_thresh",
+         &ipv4_dst_ops.gc_thresh, sizeof(int), 0644, NULL,
+         &proc_dointvec},
+	{NET_IPV4_ROUTE_MAX_SIZE, "max_size",
+         &ip_rt_max_size, sizeof(int), 0644, NULL,
+         &proc_dointvec},
+	{NET_IPV4_ROUTE_GC_MIN_INTERVAL, "gc_min_interval",
+         &ip_rt_gc_min_interval, sizeof(int), 0644, NULL,
+         &proc_dointvec_jiffies, &sysctl_jiffies},
+	{NET_IPV4_ROUTE_GC_TIMEOUT, "gc_timeout",
+         &ip_rt_gc_timeout, sizeof(int), 0644, NULL,
+         &proc_dointvec_jiffies, &sysctl_jiffies},
+	{NET_IPV4_ROUTE_GC_INTERVAL, "gc_interval",
+         &ip_rt_gc_interval, sizeof(int), 0644, NULL,
+         &proc_dointvec_jiffies, &sysctl_jiffies},
+	{NET_IPV4_ROUTE_REDIRECT_LOAD, "redirect_load",
+         &ip_rt_redirect_load, sizeof(int), 0644, NULL,
+         &proc_dointvec},
+	{NET_IPV4_ROUTE_REDIRECT_NUMBER, "redirect_number",
+         &ip_rt_redirect_number, sizeof(int), 0644, NULL,
+         &proc_dointvec},
+	{NET_IPV4_ROUTE_REDIRECT_SILENCE, "redirect_silence",
+         &ip_rt_redirect_silence, sizeof(int), 0644, NULL,
+         &proc_dointvec},
+	{NET_IPV4_ROUTE_ERROR_COST, "error_cost",
+         &ip_rt_error_cost, sizeof(int), 0644, NULL,
+         &proc_dointvec},
+	{NET_IPV4_ROUTE_ERROR_BURST, "error_burst",
+         &ip_rt_error_burst, sizeof(int), 0644, NULL,
+         &proc_dointvec},
+	{NET_IPV4_ROUTE_GC_ELASTICITY, "gc_elasticity",
+         &ip_rt_gc_elasticity, sizeof(int), 0644, NULL,
+         &proc_dointvec},
+	{NET_IPV4_ROUTE_MTU_EXPIRES, "mtu_expires",
+         &ip_rt_mtu_expires, sizeof(int), 0644, NULL,
+         &proc_dointvec_jiffies, &sysctl_jiffies},
+	 {0}
+};
+#endif
+
+#ifdef CONFIG_NET_CLS_ROUTE
+struct ip_rt_acct ip_rt_acct[256];
+
+#ifdef CONFIG_PROC_FS
+static int ip_rt_acct_read(char *buffer, char **start, off_t offset,
+			   int length, int *eof, void *data)
+{
+	*start=buffer;
+
+	if (offset + length > sizeof(ip_rt_acct)) {
+		length = sizeof(ip_rt_acct) - offset;
+		*eof = 1;
+	}
+	if (length > 0) {
+		start_bh_atomic();
+		memcpy(buffer, ((u8*)&ip_rt_acct)+offset, length);
+		end_bh_atomic();
+		return length;
+	}
+	return 0;
+}
+#endif
+#endif
+
+
+__initfunc(void ip_rt_init(void))
+{
+#ifdef CONFIG_PROC_FS
+#ifdef CONFIG_NET_CLS_ROUTE
+	struct proc_dir_entry *ent;
+#endif
+#endif
+	devinet_init();
+	ip_fib_init();
+	rt_periodic_timer.function = rt_check_expire;
+	/* All the timers, started at system startup tend
+	   to synchronize. Perturb it a bit.
+	 */
+	rt_periodic_timer.expires = jiffies + net_random()%ip_rt_gc_interval
+		+ ip_rt_gc_interval;
+	add_timer(&rt_periodic_timer);
+
+#ifdef CONFIG_PROC_FS
+	proc_net_register(&(struct proc_dir_entry) {
+		PROC_NET_RTCACHE, 8, "rt_cache",
+		S_IFREG | S_IRUGO, 1, 0, 0,
+		0, &proc_net_inode_operations,
+		rt_cache_get_info
+	});
+#ifdef CONFIG_NET_CLS_ROUTE
+	ent = create_proc_entry("net/rt_acct", 0, 0);
+	ent->read_proc = ip_rt_acct_read;
+#endif
+#endif
+}
diff --git a/pfinet/linux-src/net/ipv4/syncookies.c b/pfinet/linux-src/net/ipv4/syncookies.c
new file mode 100644
index 00000000..fb4e8f80
--- /dev/null
+++ b/pfinet/linux-src/net/ipv4/syncookies.c
@@ -0,0 +1,201 @@
+/*
+ *  Syncookies implementation for the Linux kernel
+ *
+ *  Copyright (C) 1997 Andi Kleen
+ *  Based on ideas by D.J.Bernstein and Eric Schenk. 
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ * 
+ *  $Id: syncookies.c,v 1.7.2.1 1999/08/08 08:43:13 davem Exp $
+ *
+ *  Missing: IPv6 support. 
+ */
+
+#include <linux/config.h>
+#if defined(CONFIG_SYN_COOKIES) 
+#include <linux/tcp.h>
+#include <linux/malloc.h>
+#include <linux/random.h>
+#include <net/tcp.h>
+
+extern int sysctl_tcp_syncookies;
+
+static unsigned long tcp_lastsynq_overflow;
+
+/* 
+ * This table has to be sorted and terminated with (__u16)-1.
+ * XXX generate a better table.
+ * Unresolved Issues: HIPPI with a 64k MSS is not well supported.
+ */
+static __u16 const msstab[] = {
+	64-1,
+	256-1,	
+	512-1,
+	536-1,
+	1024-1,	
+	1440-1,
+	1460-1,
+	4312-1,
+	(__u16)-1
+};
+/* The number doesn't include the -1 terminator */
+#define NUM_MSS (sizeof(msstab)/sizeof(msstab[0]) - 1)
+
+/*
+ * Generate a syncookie.  mssp points to the mss, which is returned
+ * rounded down to the value encoded in the cookie.
+ */
+__u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, 
+			      __u16 *mssp)
+{
+	int mssind;
+	const __u16 mss = *mssp;
+
+	tcp_lastsynq_overflow = jiffies;
+	/* XXX sort msstab[] by probability?  Binary search? */
+	for (mssind = 0; mss > msstab[mssind+1]; mssind++)
+		;
+	*mssp = msstab[mssind]+1;
+
+	net_statistics.SyncookiesSent++;
+
+	return secure_tcp_syn_cookie(skb->nh.iph->saddr, skb->nh.iph->daddr,
+				     skb->h.th->source, skb->h.th->dest,
+				     ntohl(skb->h.th->seq),
+				     jiffies / (HZ*60), mssind);
+}
+
+/* 
+ * This (misnamed) value is the age of syncookie which is permitted.
+ * Its ideal value should be dependent on TCP_TIMEOUT_INIT and
+ * sysctl_tcp_retries1. It's a rather complicated formula (exponential
+ * backoff) to compute at runtime so it's currently hardcoded here.
+ */
+#define COUNTER_TRIES 4
+/*  
+ * Check if a ack sequence number is a valid syncookie. 
+ * Return the decoded mss if it is, or 0 if not.
+ */
+static inline int cookie_check(struct sk_buff *skb, __u32 cookie) 
+{
+	__u32 seq; 
+	__u32 mssind;
+
+  	if ((jiffies - tcp_lastsynq_overflow) > TCP_TIMEOUT_INIT)
+		return 0; 
+
+	seq = ntohl(skb->h.th->seq)-1; 
+	mssind = check_tcp_syn_cookie(cookie,
+				      skb->nh.iph->saddr, skb->nh.iph->daddr,
+				      skb->h.th->source, skb->h.th->dest,
+				      seq, jiffies/(HZ*60), COUNTER_TRIES);
+
+	return mssind < NUM_MSS ? msstab[mssind]+1 : 0;
+}
+
+extern struct or_calltable or_ipv4;
+
+static inline struct sock *
+get_cookie_sock(struct sock *sk, struct sk_buff *skb, struct open_request *req,
+		struct dst_entry *dst)
+{
+	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+
+	sk = tp->af_specific->syn_recv_sock(sk, skb, req, dst);
+	req->sk = sk; 
+	
+	/* Queue up for accept() */
+	tcp_synq_queue(tp, req);
+	
+	return sk; 
+}
+
+struct sock *
+cookie_v4_check(struct sock *sk, struct sk_buff *skb, struct ip_options *opt)
+{
+	__u32 cookie = ntohl(skb->h.th->ack_seq)-1; 
+	struct open_request *req; 
+	int mss; 
+	struct rtable *rt; 
+	__u8 rcv_wscale;
+
+	if (!sysctl_tcp_syncookies)
+		return sk;
+	if (!skb->h.th->ack)
+		return sk; 
+
+	mss = cookie_check(skb, cookie);
+	if (mss == 0) {
+	 	net_statistics.SyncookiesFailed++;
+		return sk;
+	}
+
+	net_statistics.SyncookiesRecv++;
+
+	req = tcp_openreq_alloc();
+	if (req == NULL)
+		return NULL;	
+
+	req->rcv_isn = htonl(skb->h.th->seq)-1;
+	req->snt_isn = cookie; 
+	req->mss = mss;
+ 	req->rmt_port = skb->h.th->source;
+	req->af.v4_req.loc_addr = skb->nh.iph->daddr;
+	req->af.v4_req.rmt_addr = skb->nh.iph->saddr;
+	req->class = &or_ipv4; /* for safety */
+#ifdef CONFIG_IP_TRANSPARENT_PROXY 
+	req->lcl_port = skb->h.th->dest;
+#endif
+
+	req->af.v4_req.opt = NULL;
+
+	/* We throwed the options of the initial SYN away, so we hope
+	 * the ACK carries the same options again (see RFC1122 4.2.3.8)
+	 */
+	if (opt && opt->optlen) {
+		int opt_size = sizeof(struct ip_options) + opt->optlen;
+
+		req->af.v4_req.opt = kmalloc(opt_size, GFP_ATOMIC);
+		if (req->af.v4_req.opt) {
+			if (ip_options_echo(req->af.v4_req.opt, skb)) {
+				kfree_s(req->af.v4_req.opt, opt_size);
+				req->af.v4_req.opt = NULL;
+			}
+		}
+	}
+	
+	req->snd_wscale = req->rcv_wscale = req->tstamp_ok = 0;
+	req->wscale_ok = 0; 
+	req->expires = 0UL; 
+	req->retrans = 0; 
+	
+	/*
+	 * We need to lookup the route here to get at the correct
+	 * window size. We should better make sure that the window size
+	 * hasn't changed since we received the original syn, but I see
+	 * no easy way to do this. 
+	 */
+	if (ip_route_output(&rt,
+			    opt && 
+			    opt->srr ? opt->faddr : req->af.v4_req.rmt_addr,
+			    req->af.v4_req.loc_addr,
+			    sk->ip_tos | RTO_CONN,
+			    0)) { 
+	    tcp_openreq_free(req);
+	    return NULL; 
+	}
+
+	/* Try to redo what tcp_v4_send_synack did. */
+	req->window_clamp = rt->u.dst.window;  
+	tcp_select_initial_window(sock_rspace(sk)/2,req->mss,
+				  &req->rcv_wnd, &req->window_clamp, 
+				  0, &rcv_wscale);
+	req->rcv_wscale = rcv_wscale; 
+
+	return get_cookie_sock(sk, skb, req, &rt->u.dst);
+}
+
+#endif
diff --git a/pfinet/linux-src/net/ipv4/sysctl_net_ipv4.c b/pfinet/linux-src/net/ipv4/sysctl_net_ipv4.c
new file mode 100644
index 00000000..e578e4e7
--- /dev/null
+++ b/pfinet/linux-src/net/ipv4/sysctl_net_ipv4.c
@@ -0,0 +1,205 @@
+/*
+ * sysctl_net_ipv4.c: sysctl interface to net IPV4 subsystem.
+ *
+ * $Id: sysctl_net_ipv4.c,v 1.38.2.1 1999/08/08 08:43:14 davem Exp $
+ *
+ * Begun April 1, 1996, Mike Shaver.
+ * Added /proc/sys/net/ipv4 directory entry (empty =) ). [MS]
+ */
+
+#include <linux/mm.h>
+#include <linux/sysctl.h>
+#include <linux/config.h>
+#include <net/snmp.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include <net/tcp.h>
+
+/*
+ *	TCP configuration parameters
+ */
+
+#define TCP_PMTU_DISC	0x00000001	/* perform PMTU discovery	  */
+#define TCP_CONG_AVOID	0x00000002	/* congestion avoidance algorithm */
+#define TCP_DELAY_ACKS	0x00000003	/* delayed ack stategy		  */
+
+#if 0
+static int boolean_min = 0;
+static int boolean_max = 1;
+#endif
+
+/* From icmp.c */
+extern int sysctl_icmp_echo_ignore_all;
+extern int sysctl_icmp_echo_ignore_broadcasts;
+extern int sysctl_icmp_ignore_bogus_error_responses;
+
+/* From ip_fragment.c */
+extern int sysctl_ipfrag_low_thresh;
+extern int sysctl_ipfrag_high_thresh; 
+extern int sysctl_ipfrag_time;
+
+/* From ip_output.c */
+extern int sysctl_ip_dynaddr;
+
+/* From ip_masq.c */
+extern int sysctl_ip_masq_debug;
+
+extern int sysctl_tcp_timestamps;
+extern int sysctl_tcp_window_scaling;
+extern int sysctl_tcp_sack;
+extern int sysctl_tcp_retrans_collapse;
+extern int sysctl_tcp_keepalive_time;
+extern int sysctl_tcp_keepalive_probes;
+extern int sysctl_tcp_max_ka_probes;
+extern int sysctl_tcp_retries1;
+extern int sysctl_tcp_retries2;
+extern int sysctl_tcp_fin_timeout;
+extern int sysctl_tcp_syncookies;
+extern int sysctl_tcp_syn_retries;
+extern int sysctl_tcp_stdurg;
+extern int sysctl_tcp_rfc1337;
+extern int sysctl_tcp_syn_taildrop; 
+extern int sysctl_max_syn_backlog; 
+
+/* From icmp.c */
+extern int sysctl_icmp_destunreach_time;
+extern int sysctl_icmp_timeexceed_time;
+extern int sysctl_icmp_paramprob_time;
+extern int sysctl_icmp_echoreply_time;
+
+/* From igmp.c */
+extern int sysctl_igmp_max_memberships;
+
+int tcp_retr1_max = 255; 
+
+struct ipv4_config ipv4_config;
+
+extern ctl_table ipv4_route_table[];
+
+#ifdef CONFIG_SYSCTL
+
+static
+int ipv4_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
+			void *buffer, size_t *lenp)
+{
+	int val = ipv4_devconf.forwarding;
+	int ret;
+
+	ret = proc_dointvec(ctl, write, filp, buffer, lenp);
+
+	if (write && ipv4_devconf.forwarding != val)
+		inet_forward_change();
+
+	return ret;
+}
+
+static int ipv4_sysctl_forward_strategy(ctl_table *table, int *name, int nlen,
+			 void *oldval, size_t *oldlenp,
+			 void *newval, size_t newlen, 
+			 void **context)
+{
+	int new;
+	if (newlen != sizeof(int))
+		return -EINVAL;
+	if (get_user(new,(int *)newval))
+		return -EFAULT; 
+	if (new != ipv4_devconf.forwarding) 
+		inet_forward_change(); 
+	return 0; /* caller does change again and handles handles oldval */ 
+}
+
+ctl_table ipv4_table[] = {
+        {NET_IPV4_TCP_TIMESTAMPS, "tcp_timestamps",
+         &sysctl_tcp_timestamps, sizeof(int), 0644, NULL,
+         &proc_dointvec},
+        {NET_IPV4_TCP_WINDOW_SCALING, "tcp_window_scaling",
+         &sysctl_tcp_window_scaling, sizeof(int), 0644, NULL,
+         &proc_dointvec},
+        {NET_IPV4_TCP_SACK, "tcp_sack",
+         &sysctl_tcp_sack, sizeof(int), 0644, NULL,
+         &proc_dointvec},
+        {NET_IPV4_TCP_RETRANS_COLLAPSE, "tcp_retrans_collapse",
+         &sysctl_tcp_retrans_collapse, sizeof(int), 0644, NULL,
+         &proc_dointvec},
+        {NET_IPV4_FORWARD, "ip_forward",
+         &ipv4_devconf.forwarding, sizeof(int), 0644, NULL,
+         &ipv4_sysctl_forward,&ipv4_sysctl_forward_strategy},
+        {NET_IPV4_DEFAULT_TTL, "ip_default_ttl",
+         &ip_statistics.IpDefaultTTL, sizeof(int), 0644, NULL,
+         &proc_dointvec},
+        {NET_IPV4_AUTOCONFIG, "ip_autoconfig",
+         &ipv4_config.autoconfig, sizeof(int), 0644, NULL,
+         &proc_dointvec},
+        {NET_IPV4_NO_PMTU_DISC, "ip_no_pmtu_disc",
+         &ipv4_config.no_pmtu_disc, sizeof(int), 0644, NULL,
+         &proc_dointvec},
+	{NET_IPV4_TCP_SYN_RETRIES, "tcp_syn_retries",
+	 &sysctl_tcp_syn_retries, sizeof(int), 0644, NULL, &proc_dointvec},
+	{NET_IPV4_IPFRAG_HIGH_THRESH, "ipfrag_high_thresh",
+	 &sysctl_ipfrag_high_thresh, sizeof(int), 0644, NULL, &proc_dointvec},
+	{NET_IPV4_IPFRAG_LOW_THRESH, "ipfrag_low_thresh",
+	 &sysctl_ipfrag_low_thresh, sizeof(int), 0644, NULL, &proc_dointvec},
+	{NET_IPV4_DYNADDR, "ip_dynaddr",
+	 &sysctl_ip_dynaddr, sizeof(int), 0644, NULL, &proc_dointvec},
+#ifdef CONFIG_IP_MASQUERADE
+	{NET_IPV4_IP_MASQ_DEBUG, "ip_masq_debug",
+	 &sysctl_ip_masq_debug, sizeof(int), 0644, NULL, &proc_dointvec},
+#endif
+	{NET_IPV4_IPFRAG_TIME, "ipfrag_time",
+	 &sysctl_ipfrag_time, sizeof(int), 0644, NULL, &proc_dointvec_jiffies, 
+	 &sysctl_jiffies},
+	{NET_IPV4_TCP_MAX_KA_PROBES, "tcp_max_ka_probes",
+	 &sysctl_tcp_max_ka_probes, sizeof(int), 0644, NULL, &proc_dointvec},
+	{NET_IPV4_TCP_KEEPALIVE_TIME, "tcp_keepalive_time",
+	 &sysctl_tcp_keepalive_time, sizeof(int), 0644, NULL, 
+	 &proc_dointvec_jiffies, &sysctl_jiffies},
+	{NET_IPV4_TCP_KEEPALIVE_PROBES, "tcp_keepalive_probes",
+	 &sysctl_tcp_keepalive_probes, sizeof(int), 0644, NULL, 
+	 &proc_dointvec},
+	{NET_IPV4_TCP_RETRIES1, "tcp_retries1",
+	 &sysctl_tcp_retries1, sizeof(int), 0644, NULL, &proc_dointvec_minmax, 
+	 &sysctl_intvec, NULL, NULL, &tcp_retr1_max},
+	{NET_IPV4_TCP_RETRIES2, "tcp_retries2",
+	 &sysctl_tcp_retries2, sizeof(int), 0644, NULL, &proc_dointvec},
+	{NET_IPV4_TCP_FIN_TIMEOUT, "tcp_fin_timeout",
+	 &sysctl_tcp_fin_timeout, sizeof(int), 0644, NULL, 
+	 &proc_dointvec_jiffies, &sysctl_jiffies},
+#ifdef CONFIG_SYN_COOKIES
+	{NET_TCP_SYNCOOKIES, "tcp_syncookies",
+	 &sysctl_tcp_syncookies, sizeof(int), 0644, NULL, &proc_dointvec},
+#endif
+	{NET_TCP_STDURG, "tcp_stdurg", &sysctl_tcp_stdurg,
+	 sizeof(int), 0644, NULL, &proc_dointvec},
+	{NET_TCP_RFC1337, "tcp_rfc1337", &sysctl_tcp_rfc1337,
+	 sizeof(int), 0644, NULL, &proc_dointvec},
+	{NET_TCP_MAX_SYN_BACKLOG, "tcp_max_syn_backlog", &sysctl_max_syn_backlog,
+	 sizeof(int), 0644, NULL, &proc_dointvec},
+	{NET_IPV4_LOCAL_PORT_RANGE, "ip_local_port_range",
+	 &sysctl_local_port_range, sizeof(sysctl_local_port_range), 0644, 
+	 NULL, &proc_dointvec},
+	{NET_IPV4_ICMP_ECHO_IGNORE_ALL, "icmp_echo_ignore_all",
+	 &sysctl_icmp_echo_ignore_all, sizeof(int), 0644, NULL,
+	 &proc_dointvec},
+	{NET_IPV4_ICMP_ECHO_IGNORE_BROADCASTS, "icmp_echo_ignore_broadcasts",
+	 &sysctl_icmp_echo_ignore_broadcasts, sizeof(int), 0644, NULL,
+	 &proc_dointvec},
+	{NET_IPV4_ICMP_IGNORE_BOGUS_ERROR_RESPONSES, "icmp_ignore_bogus_error_responses",
+	 &sysctl_icmp_ignore_bogus_error_responses, sizeof(int), 0644, NULL,
+	 &proc_dointvec},
+	{NET_IPV4_ICMP_DESTUNREACH_RATE, "icmp_destunreach_rate",
+	 &sysctl_icmp_destunreach_time, sizeof(int), 0644, NULL, &proc_dointvec},
+	{NET_IPV4_ICMP_TIMEEXCEED_RATE, "icmp_timeexceed_rate",
+	 &sysctl_icmp_timeexceed_time, sizeof(int), 0644, NULL, &proc_dointvec},
+	{NET_IPV4_ICMP_PARAMPROB_RATE, "icmp_paramprob_rate",
+	 &sysctl_icmp_paramprob_time, sizeof(int), 0644, NULL, &proc_dointvec},
+	{NET_IPV4_ICMP_ECHOREPLY_RATE, "icmp_echoreply_rate",
+	 &sysctl_icmp_echoreply_time, sizeof(int), 0644, NULL, &proc_dointvec},
+	{NET_IPV4_ROUTE, "route", NULL, 0, 0555, ipv4_route_table},
+#ifdef CONFIG_IP_MULTICAST
+	{NET_IPV4_IGMP_MAX_MEMBERSHIPS, "igmp_max_memberships",
+	 &sysctl_igmp_max_memberships, sizeof(int), 0644, NULL, &proc_dointvec},
+#endif
+	{0}
+};
+
+#endif /* CONFIG_SYSCTL */
diff --git a/pfinet/linux-src/net/ipv4/tcp.c b/pfinet/linux-src/net/ipv4/tcp.c
new file mode 100644
index 00000000..65763215
--- /dev/null
+++ b/pfinet/linux-src/net/ipv4/tcp.c
@@ -0,0 +1,1826 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		Implementation of the Transmission Control Protocol(TCP).
+ *
+ * Version:	$Id: tcp.c,v 1.140.2.4 1999/08/09 03:13:12 davem Exp $
+ *
+ * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
+ *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *		Mark Evans, <evansmp@uhura.aston.ac.uk>
+ *		Corey Minyard <wf-rch!minyard@relay.EU.net>
+ *		Florian La Roche, <flla@stud.uni-sb.de>
+ *		Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
+ *		Linus Torvalds, <torvalds@cs.helsinki.fi>
+ *		Alan Cox, <gw4pts@gw4pts.ampr.org>
+ *		Matthew Dillon, <dillon@apollo.west.oic.com>
+ *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
+ *		Jorge Cwik, <jorge@laser.satlink.net>
+ *
+ * Fixes:
+ *		Alan Cox	:	Numerous verify_area() calls
+ *		Alan Cox	:	Set the ACK bit on a reset
+ *		Alan Cox	:	Stopped it crashing if it closed while
+ *					sk->inuse=1 and was trying to connect
+ *					(tcp_err()).
+ *		Alan Cox	:	All icmp error handling was broken
+ *					pointers passed where wrong and the
+ *					socket was looked up backwards. Nobody
+ *					tested any icmp error code obviously.
+ *		Alan Cox	:	tcp_err() now handled properly. It
+ *					wakes people on errors. poll
+ *					behaves and the icmp error race
+ *					has gone by moving it into sock.c
+ *		Alan Cox	:	tcp_send_reset() fixed to work for
+ *					everything not just packets for
+ *					unknown sockets.
+ *		Alan Cox	:	tcp option processing.
+ *		Alan Cox	:	Reset tweaked (still not 100%) [Had
+ *					syn rule wrong]
+ *		Herp Rosmanith  :	More reset fixes
+ *		Alan Cox	:	No longer acks invalid rst frames.
+ *					Acking any kind of RST is right out.
+ *		Alan Cox	:	Sets an ignore me flag on an rst
+ *					receive otherwise odd bits of prattle
+ *					escape still
+ *		Alan Cox	:	Fixed another acking RST frame bug.
+ *					Should stop LAN workplace lockups.
+ *		Alan Cox	: 	Some tidyups using the new skb list
+ *					facilities
+ *		Alan Cox	:	sk->keepopen now seems to work
+ *		Alan Cox	:	Pulls options out correctly on accepts
+ *		Alan Cox	:	Fixed assorted sk->rqueue->next errors
+ *		Alan Cox	:	PSH doesn't end a TCP read. Switched a
+ *					bit to skb ops.
+ *		Alan Cox	:	Tidied tcp_data to avoid a potential
+ *					nasty.
+ *		Alan Cox	:	Added some better commenting, as the
+ *					tcp is hard to follow
+ *		Alan Cox	:	Removed incorrect check for 20 * psh
+ *	Michael O'Reilly	:	ack < copied bug fix.
+ *	Johannes Stille		:	Misc tcp fixes (not all in yet).
+ *		Alan Cox	:	FIN with no memory -> CRASH
+ *		Alan Cox	:	Added socket option proto entries.
+ *					Also added awareness of them to accept.
+ *		Alan Cox	:	Added TCP options (SOL_TCP)
+ *		Alan Cox	:	Switched wakeup calls to callbacks,
+ *					so the kernel can layer network
+ *					sockets.
+ *		Alan Cox	:	Use ip_tos/ip_ttl settings.
+ *		Alan Cox	:	Handle FIN (more) properly (we hope).
+ *		Alan Cox	:	RST frames sent on unsynchronised
+ *					state ack error.
+ *		Alan Cox	:	Put in missing check for SYN bit.
+ *		Alan Cox	:	Added tcp_select_window() aka NET2E
+ *					window non shrink trick.
+ *		Alan Cox	:	Added a couple of small NET2E timer
+ *					fixes
+ *		Charles Hedrick :	TCP fixes
+ *		Toomas Tamm	:	TCP window fixes
+ *		Alan Cox	:	Small URG fix to rlogin ^C ack fight
+ *		Charles Hedrick	:	Rewrote most of it to actually work
+ *		Linus		:	Rewrote tcp_read() and URG handling
+ *					completely
+ *		Gerhard Koerting:	Fixed some missing timer handling
+ *		Matthew Dillon  :	Reworked TCP machine states as per RFC
+ *		Gerhard Koerting:	PC/TCP workarounds
+ *		Adam Caldwell	:	Assorted timer/timing errors
+ *		Matthew Dillon	:	Fixed another RST bug
+ *		Alan Cox	:	Move to kernel side addressing changes.
+ *		Alan Cox	:	Beginning work on TCP fastpathing
+ *					(not yet usable)
+ *		Arnt Gulbrandsen:	Turbocharged tcp_check() routine.
+ *		Alan Cox	:	TCP fast path debugging
+ *		Alan Cox	:	Window clamping
+ *		Michael Riepe	:	Bug in tcp_check()
+ *		Matt Dillon	:	More TCP improvements and RST bug fixes
+ *		Matt Dillon	:	Yet more small nasties remove from the
+ *					TCP code (Be very nice to this man if
+ *					tcp finally works 100%) 8)
+ *		Alan Cox	:	BSD accept semantics.
+ *		Alan Cox	:	Reset on closedown bug.
+ *	Peter De Schrijver	:	ENOTCONN check missing in tcp_sendto().
+ *		Michael Pall	:	Handle poll() after URG properly in
+ *					all cases.
+ *		Michael Pall	:	Undo the last fix in tcp_read_urg()
+ *					(multi URG PUSH broke rlogin).
+ *		Michael Pall	:	Fix the multi URG PUSH problem in
+ *					tcp_readable(), poll() after URG
+ *					works now.
+ *		Michael Pall	:	recv(...,MSG_OOB) never blocks in the
+ *					BSD api.
+ *		Alan Cox	:	Changed the semantics of sk->socket to
+ *					fix a race and a signal problem with
+ *					accept() and async I/O.
+ *		Alan Cox	:	Relaxed the rules on tcp_sendto().
+ *		Yury Shevchuk	:	Really fixed accept() blocking problem.
+ *		Craig I. Hagan  :	Allow for BSD compatible TIME_WAIT for
+ *					clients/servers which listen in on
+ *					fixed ports.
+ *		Alan Cox	:	Cleaned the above up and shrank it to
+ *					a sensible code size.
+ *		Alan Cox	:	Self connect lockup fix.
+ *		Alan Cox	:	No connect to multicast.
+ *		Ross Biro	:	Close unaccepted children on master
+ *					socket close.
+ *		Alan Cox	:	Reset tracing code.
+ *		Alan Cox	:	Spurious resets on shutdown.
+ *		Alan Cox	:	Giant 15 minute/60 second timer error
+ *		Alan Cox	:	Small whoops in polling before an
+ *					accept.
+ *		Alan Cox	:	Kept the state trace facility since
+ *					it's handy for debugging.
+ *		Alan Cox	:	More reset handler fixes.
+ *		Alan Cox	:	Started rewriting the code based on
+ *					the RFC's for other useful protocol
+ *					references see: Comer, KA9Q NOS, and
+ *					for a reference on the difference
+ *					between specifications and how BSD
+ *					works see the 4.4lite source.
+ *		A.N.Kuznetsov	:	Don't time wait on completion of tidy
+ *					close.
+ *		Linus Torvalds	:	Fin/Shutdown & copied_seq changes.
+ *		Linus Torvalds	:	Fixed BSD port reuse to work first syn
+ *		Alan Cox	:	Reimplemented timers as per the RFC
+ *					and using multiple timers for sanity.
+ *		Alan Cox	:	Small bug fixes, and a lot of new
+ *					comments.
+ *		Alan Cox	:	Fixed dual reader crash by locking
+ *					the buffers (much like datagram.c)
+ *		Alan Cox	:	Fixed stuck sockets in probe. A probe
+ *					now gets fed up of retrying without
+ *					(even a no space) answer.
+ *		Alan Cox	:	Extracted closing code better
+ *		Alan Cox	:	Fixed the closing state machine to
+ *					resemble the RFC.
+ *		Alan Cox	:	More 'per spec' fixes.
+ *		Jorge Cwik	:	Even faster checksumming.
+ *		Alan Cox	:	tcp_data() doesn't ack illegal PSH
+ *					only frames. At least one pc tcp stack
+ *					generates them.
+ *		Alan Cox	:	Cache last socket.
+ *		Alan Cox	:	Per route irtt.
+ *		Matt Day	:	poll()->select() match BSD precisely on error
+ *		Alan Cox	:	New buffers
+ *		Marc Tamsky	:	Various sk->prot->retransmits and
+ *					sk->retransmits misupdating fixed.
+ *					Fixed tcp_write_timeout: stuck close,
+ *					and TCP syn retries gets used now.
+ *		Mark Yarvis	:	In tcp_read_wakeup(), don't send an
+ *					ack if state is TCP_CLOSED.
+ *		Alan Cox	:	Look up device on a retransmit - routes may
+ *					change. Doesn't yet cope with MSS shrink right
+ *					but its a start!
+ *		Marc Tamsky	:	Closing in closing fixes.
+ *		Mike Shaver	:	RFC1122 verifications.
+ *		Alan Cox	:	rcv_saddr errors.
+ *		Alan Cox	:	Block double connect().
+ *		Alan Cox	:	Small hooks for enSKIP.
+ *		Alexey Kuznetsov:	Path MTU discovery.
+ *		Alan Cox	:	Support soft errors.
+ *		Alan Cox	:	Fix MTU discovery pathological case
+ *					when the remote claims no mtu!
+ *		Marc Tamsky	:	TCP_CLOSE fix.
+ *		Colin (G3TNE)	:	Send a reset on syn ack replies in
+ *					window but wrong (fixes NT lpd problems)
+ *		Pedro Roque	:	Better TCP window handling, delayed ack.
+ *		Joerg Reuter	:	No modification of locked buffers in
+ *					tcp_do_retransmit()
+ *		Eric Schenk	:	Changed receiver side silly window
+ *					avoidance algorithm to BSD style
+ *					algorithm. This doubles throughput
+ *					against machines running Solaris,
+ *					and seems to result in general
+ *					improvement.
+ *	Stefan Magdalinski	:	adjusted tcp_readable() to fix FIONREAD
+ *	Willy Konynenberg	:	Transparent proxying support.
+ *	Mike McLagan		:	Routing by source
+ *		Keith Owens	:	Do proper merging with partial SKB's in
+ *					tcp_do_sendmsg to avoid burstiness.
+ *		Eric Schenk	:	Fix fast close down bug with
+ *					shutdown() followed by close().
+ *		Andi Kleen :	Make poll agree with SIGIO
+ *					
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or(at your option) any later version.
+ *
+ * Description of States:
+ *
+ *	TCP_SYN_SENT		sent a connection request, waiting for ack
+ *
+ *	TCP_SYN_RECV		received a connection request, sent ack,
+ *				waiting for final ack in three-way handshake.
+ *
+ *	TCP_ESTABLISHED		connection established
+ *
+ *	TCP_FIN_WAIT1		our side has shutdown, waiting to complete
+ *				transmission of remaining buffered data
+ *
+ *	TCP_FIN_WAIT2		all buffered data sent, waiting for remote
+ *				to shutdown
+ *
+ *	TCP_CLOSING		both sides have shutdown but we still have
+ *				data we have to finish sending
+ *
+ *	TCP_TIME_WAIT		timeout to catch resent junk before entering
+ *				closed, can only be entered from FIN_WAIT2
+ *				or CLOSING.  Required because the other end
+ *				may not have gotten our last ACK causing it
+ *				to retransmit the data packet (which we ignore)
+ *
+ *	TCP_CLOSE_WAIT		remote side has shutdown and is waiting for
+ *				us to finish writing our data and to shutdown
+ *				(we have to close() to move on to LAST_ACK)
+ *
+ *	TCP_LAST_ACK		out side has shutdown after remote has
+ *				shutdown.  There may still be data in our
+ *				buffer that we have to finish sending
+ *
+ *	TCP_CLOSE		socket is finished
+ */
+
+/*
+ * RFC1122 status:
+ * NOTE: I'm not going to be doing comments in the code for this one except
+ * for violations and the like.  tcp.c is just too big... If I say something
+ * "does?" or "doesn't?", it means I'm not sure, and will have to hash it out
+ * with Alan. -- MS 950903
+ * [Note: Most of the TCP code has been rewriten/redesigned since this 
+ *  RFC1122 check. It is probably not correct anymore. It should be redone 
+ *  before 2.2. -AK]
+ *
+ * Use of PSH (4.2.2.2)
+ *   MAY aggregate data sent without the PSH flag. (does)
+ *   MAY queue data received without the PSH flag. (does)
+ *   SHOULD collapse successive PSH flags when it packetizes data. (doesn't)
+ *   MAY implement PSH on send calls. (doesn't, thus:)
+ *     MUST NOT buffer data indefinitely (doesn't [1 second])
+ *     MUST set PSH on last segment (does)
+ *   MAY pass received PSH to application layer (doesn't)
+ *   SHOULD send maximum-sized segment whenever possible. (almost always does)
+ *
+ * Window Size (4.2.2.3, 4.2.2.16)
+ *   MUST treat window size as an unsigned number (does)
+ *   SHOULD treat window size as a 32-bit number (does not)
+ *   MUST NOT shrink window once it is offered (does not normally)
+ *
+ * Urgent Pointer (4.2.2.4)
+ * **MUST point urgent pointer to last byte of urgent data (not right
+ *     after). (doesn't, to be like BSD. That's configurable, but defaults
+ *	to off)
+ *   MUST inform application layer asynchronously of incoming urgent
+ *     data. (does)
+ *   MUST provide application with means of determining the amount of
+ *     urgent data pending. (does)
+ * **MUST support urgent data sequence of arbitrary length. (doesn't, but
+ *   it's sort of tricky to fix, as urg_ptr is a 16-bit quantity)
+ *	[Follows BSD 1 byte of urgent data]
+ *
+ * TCP Options (4.2.2.5)
+ *   MUST be able to receive TCP options in any segment. (does)
+ *   MUST ignore unsupported options (does)
+ *
+ * Maximum Segment Size Option (4.2.2.6)
+ *   MUST implement both sending and receiving MSS. (does, but currently
+ *	only uses the smaller of both of them)
+ *   SHOULD send an MSS with every SYN where receive MSS != 536 (MAY send
+ *     it always). (does, even when MSS == 536, which is legal)
+ *   MUST assume MSS == 536 if no MSS received at connection setup (does)
+ *   MUST calculate "effective send MSS" correctly:
+ *     min(physical_MTU, remote_MSS+20) - sizeof(tcphdr) - sizeof(ipopts)
+ *     (does - but allows operator override)
+ *
+ * TCP Checksum (4.2.2.7)
+ *   MUST generate and check TCP checksum. (does)
+ *
+ * Initial Sequence Number Selection (4.2.2.8)
+ *   MUST use the RFC 793 clock selection mechanism.  (doesn't, but it's
+ *     OK: RFC 793 specifies a 250KHz clock, while we use 1MHz, which is
+ *     necessary for 10Mbps networks - and harder than BSD to spoof!
+ *     With syncookies we don't)
+ *
+ * Simultaneous Open Attempts (4.2.2.10)
+ *   MUST support simultaneous open attempts (does)
+ *
+ * Recovery from Old Duplicate SYN (4.2.2.11)
+ *   MUST keep track of active vs. passive open (does)
+ *
+ * RST segment (4.2.2.12)
+ *   SHOULD allow an RST segment to contain data (does, but doesn't do
+ *     anything with it, which is standard)
+ *
+ * Closing a Connection (4.2.2.13)
+ *   MUST inform application of whether connection was closed by RST or
+ *     normal close. (does)
+ *   MAY allow "half-duplex" close (treat connection as closed for the
+ *     local app, even before handshake is done). (does)
+ *   MUST linger in TIME_WAIT for 2 * MSL (does)
+ *
+ * Retransmission Timeout (4.2.2.15)
+ *   MUST implement Jacobson's slow start and congestion avoidance
+ *     stuff. (does)
+ *
+ * Probing Zero Windows (4.2.2.17)
+ *   MUST support probing of zero windows. (does)
+ *   MAY keep offered window closed indefinitely. (does)
+ *   MUST allow remote window to stay closed indefinitely. (does)
+ *
+ * Passive Open Calls (4.2.2.18)
+ *   MUST NOT let new passive open affect other connections. (doesn't)
+ *   MUST support passive opens (LISTENs) concurrently. (does)
+ *
+ * Time to Live (4.2.2.19)
+ *   MUST make TCP TTL configurable. (does - IP_TTL option)
+ *
+ * Event Processing (4.2.2.20)
+ *   SHOULD queue out-of-order segments. (does)
+ *   MUST aggregate ACK segments whenever possible. (does but badly)
+ *
+ * Retransmission Timeout Calculation (4.2.3.1)
+ *   MUST implement Karn's algorithm and Jacobson's algorithm for RTO
+ *     calculation. (does, or at least explains them in the comments 8*b)
+ *  SHOULD initialize RTO to 0 and RTT to 3. (does)
+ *
+ * When to Send an ACK Segment (4.2.3.2)
+ *   SHOULD implement delayed ACK. (does)
+ *   MUST keep ACK delay < 0.5 sec. (does)
+ *
+ * When to Send a Window Update (4.2.3.3)
+ *   MUST implement receiver-side SWS. (does)
+ *
+ * When to Send Data (4.2.3.4)
+ *   MUST implement sender-side SWS. (does)
+ *   SHOULD implement Nagle algorithm. (does)
+ *
+ * TCP Connection Failures (4.2.3.5)
+ *  MUST handle excessive retransmissions "properly" (see the RFC). (does)
+ *   SHOULD inform application layer of soft errors. (does)
+ *
+ * TCP Keep-Alives (4.2.3.6)
+ *   MAY provide keep-alives. (does)
+ *   MUST make keep-alives configurable on a per-connection basis. (does)
+ *   MUST default to no keep-alives. (does)
+ *   MUST make keep-alive interval configurable. (does)
+ *   MUST make default keep-alive interval > 2 hours. (does)
+ *   MUST NOT interpret failure to ACK keep-alive packet as dead
+ *     connection. (doesn't)
+ *   SHOULD send keep-alive with no data. (does)
+ *
+ * TCP Multihoming (4.2.3.7)
+ *   MUST get source address from IP layer before sending first
+ *     SYN. (does)
+ *   MUST use same local address for all segments of a connection. (does)
+ *
+ * IP Options (4.2.3.8)
+ *   MUST ignore unsupported IP options. (does)
+ *   MAY support Time Stamp and Record Route. (does)
+ *   MUST allow application to specify a source route. (does)
+ *   MUST allow received Source Route option to set route for all future
+ *     segments on this connection. (does not (security issues))
+ *
+ * ICMP messages (4.2.3.9)
+ *   MUST act on ICMP errors. (does)
+ *   MUST slow transmission upon receipt of a Source Quench. (doesn't anymore 
+ *   because that is deprecated now by the IETF, can be turned on)
+ *   MUST NOT abort connection upon receipt of soft Destination
+ *     Unreachables (0, 1, 5), Time Exceededs and Parameter
+ *     Problems. (doesn't)
+ *   SHOULD report soft Destination Unreachables etc. to the
+ *     application. (does, except during SYN_RECV and may drop messages
+ *     in some rare cases before accept() - ICMP is unreliable)	
+ *   SHOULD abort connection upon receipt of hard Destination Unreachable
+ *     messages (2, 3, 4). (does, but see above)
+ *
+ * Remote Address Validation (4.2.3.10)
+ *   MUST reject as an error OPEN for invalid remote IP address. (does)
+ *   MUST ignore SYN with invalid source address. (does)
+ *   MUST silently discard incoming SYN for broadcast/multicast
+ *     address. (does)
+ *
+ * Asynchronous Reports (4.2.4.1)
+ * MUST provide mechanism for reporting soft errors to application
+ *     layer. (does)
+ *
+ * Type of Service (4.2.4.2)
+ *   MUST allow application layer to set Type of Service. (does IP_TOS)
+ *
+ * (Whew. -- MS 950903)
+ * (Updated by AK, but not complete yet.)
+ **/
+
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/poll.h>
+#include <linux/init.h>
+
+#include <net/icmp.h>
+#include <net/tcp.h>
+
+#include <asm/uaccess.h>
+
+int sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
+
+struct tcp_mib	tcp_statistics;
+
+kmem_cache_t *tcp_openreq_cachep;
+kmem_cache_t *tcp_bucket_cachep;
+kmem_cache_t *tcp_timewait_cachep;
+
+/*
+ *	Find someone to 'accept'. Must be called with
+ *	the socket locked or with interrupts disabled
+ */
+
+static struct open_request *tcp_find_established(struct tcp_opt *tp, 
+						 struct open_request **prevp)
+{
+	struct open_request *req = tp->syn_wait_queue;
+	struct open_request *prev = (struct open_request *)&tp->syn_wait_queue; 
+	while(req) {
+		if (req->sk && 
+		    ((1 << req->sk->state) &
+		     ~(TCPF_SYN_SENT|TCPF_SYN_RECV)))
+			break;
+		prev = req; 
+		req = req->dl_next;
+	}
+	*prevp = prev; 
+	return req;
+}
+
+/*
+ *	Walk down the receive queue counting readable data.
+ *
+ *	Must be called with the socket lock held.
+ */
+
+static int tcp_readable(struct sock *sk)
+{
+	unsigned long counted;
+	unsigned long amount;
+	struct sk_buff *skb;
+	int sum;
+
+	SOCK_DEBUG(sk, "tcp_readable: %p - ",sk);
+
+	skb = skb_peek(&sk->receive_queue);
+	if (skb == NULL) {
+		SOCK_DEBUG(sk, "empty\n");
+	  	return(0);
+	}
+
+	counted = sk->tp_pinfo.af_tcp.copied_seq;	/* Where we are at the moment */
+	amount = 0;
+
+	/* Do until a push or until we are out of data. */
+	do {
+		/* Found a hole so stops here. */
+		if (before(counted, TCP_SKB_CB(skb)->seq))	/* should not happen */
+			break;
+
+		/* Length - header but start from where we are up to
+		 * avoid overlaps.
+		 */
+		sum = skb->len - (counted - TCP_SKB_CB(skb)->seq);
+		if (sum >= 0) {
+			/* Add it up, move on. */
+			amount += sum;
+			counted += sum;
+			if (skb->h.th->syn)
+				counted++;
+		}
+
+		/* Don't count urg data ... but do it in the right place!
+		 * Consider: "old_data (ptr is here) URG PUSH data"
+		 * The old code would stop at the first push because
+		 * it counted the urg (amount==1) and then does amount--
+		 * *after* the loop.  This means tcp_readable() always
+		 * returned zero if any URG PUSH was in the queue, even
+		 * though there was normal data available. If we subtract
+		 * the urg data right here, we even get it to work for more
+		 * than one URG PUSH skb without normal data.
+		 * This means that poll() finally works now with urg data
+		 * in the queue.  Note that rlogin was never affected
+		 * because it doesn't use poll(); it uses two processes
+		 * and a blocking read().  And the queue scan in tcp_read()
+		 * was correct.  Mike <pall@rz.uni-karlsruhe.de>
+		 */
+
+		/* Don't count urg data. */
+		if (skb->h.th->urg)
+			amount--;
+#if 0
+		if (amount && skb->h.th->psh) break;
+#endif
+		skb = skb->next;
+	} while(skb != (struct sk_buff *)&sk->receive_queue);
+
+	SOCK_DEBUG(sk, "got %lu bytes.\n",amount);
+	return(amount);
+}
+
+/*
+ * LISTEN is a special case for poll..
+ */
+static unsigned int tcp_listen_poll(struct sock *sk, poll_table *wait)
+{
+	struct open_request *req, *dummy;
+
+	lock_sock(sk);
+	req = tcp_find_established(&sk->tp_pinfo.af_tcp, &dummy);
+	release_sock(sk);
+	if (req)
+		return POLLIN | POLLRDNORM;
+	return 0;
+}
+
+/*
+ *	Compute minimal free write space needed to queue new packets. 
+ */
+#define tcp_min_write_space(__sk) \
+	(atomic_read(&(__sk)->wmem_alloc) / 2)
+
+/*
+ *	Wait for a TCP event.
+ *
+ *	Note that we don't need to lock the socket, as the upper poll layers
+ *	take care of normal races (between the test and the event) and we don't
+ *	go look at any of the socket buffers directly.
+ */
+unsigned int tcp_poll(struct file * file, struct socket *sock, poll_table *wait)
+{
+	unsigned int mask;
+	struct sock *sk = sock->sk;
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+	poll_wait(file, sk->sleep, wait);
+	if (sk->state == TCP_LISTEN)
+		return tcp_listen_poll(sk, wait);
+
+	mask = 0;
+	if (sk->err)
+		mask = POLLERR;
+
+	/*
+	 * POLLHUP is certainly not done right. But poll() doesn't
+	 * have a notion of HUP in just one direction, and for a
+	 * socket the read side is more interesting.
+	 *
+	 * Some poll() documentation says that POLLHUP is incompatible
+	 * with the POLLOUT/POLLWR flags, so somebody should check this
+	 * all. But careful, it tends to be safer to return too many
+	 * bits than too few, and you can easily break real applications
+	 * if you don't tell them that something has hung up!
+	 *
+	 * Check-me.
+	 */
+	if (sk->shutdown & RCV_SHUTDOWN)
+		mask |= POLLHUP;
+
+	/* Connected? */
+	if ((1 << sk->state) & ~(TCPF_SYN_SENT|TCPF_SYN_RECV)) {
+		if ((tp->rcv_nxt != tp->copied_seq) &&
+		    (tp->urg_seq != tp->copied_seq ||
+		     tp->rcv_nxt != tp->copied_seq+1 ||
+		     sk->urginline || !tp->urg_data))
+			mask |= POLLIN | POLLRDNORM;
+
+		if (!(sk->shutdown & SEND_SHUTDOWN)) {
+			if (sock_wspace(sk) >= tcp_min_write_space(sk)) {
+				mask |= POLLOUT | POLLWRNORM;
+			} else {  /* send SIGIO later */
+				sk->socket->flags |= SO_NOSPACE;
+			}
+		}
+
+		if (tp->urg_data & URG_VALID)
+			mask |= POLLPRI;
+	}
+	return mask;
+}
+
+/*
+ *	Socket write_space callback.
+ *	This (or rather the sock_wake_async) should agree with poll. 
+ */
+void tcp_write_space(struct sock *sk)
+{
+	if (sk->dead)
+		return; 
+
+	wake_up_interruptible(sk->sleep);
+	if (sock_wspace(sk) >=
+	    tcp_min_write_space(sk))
+		sock_wake_async(sk->socket, 2);
+}
+
+
+int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
+{
+	int answ;
+
+	switch(cmd) {
+	case TIOCINQ:
+#ifdef FIXME	/* FIXME: */
+	case FIONREAD:
+#endif
+		if (sk->state == TCP_LISTEN)
+			return(-EINVAL);
+		lock_sock(sk);
+		answ = tcp_readable(sk);
+		release_sock(sk);
+		break;
+	case SIOCATMARK:
+		{
+			struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+			answ = tp->urg_data && tp->urg_seq == tp->copied_seq;
+			break;
+		}
+	case TIOCOUTQ:
+		if (sk->state == TCP_LISTEN)
+			return(-EINVAL);
+		answ = sock_wspace(sk);
+		break;
+	default:
+		return(-ENOIOCTLCMD);
+	};
+
+	return put_user(answ, (int *)arg);
+}
+
+/*
+ *	Wait for a socket to get into the connected state
+ *
+ *	Note: must be called with the socket locked.
+ */
+static int wait_for_tcp_connect(struct sock * sk, int flags)
+{
+	struct task_struct *tsk = current;
+	struct wait_queue wait = { tsk, NULL };
+
+	while((1 << sk->state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) {
+		if(sk->err)
+			return sock_error(sk);
+		if((1 << sk->state) &
+		   ~(TCPF_SYN_SENT | TCPF_SYN_RECV)) {
+			if(sk->keepopen && !(flags&MSG_NOSIGNAL))
+				send_sig(SIGPIPE, tsk, 0);
+			return -EPIPE;
+		}
+		if(flags & MSG_DONTWAIT)
+			return -EAGAIN;
+		if(signal_pending(tsk))
+			return -ERESTARTSYS;
+
+		tsk->state = TASK_INTERRUPTIBLE;
+		add_wait_queue(sk->sleep, &wait);
+		release_sock(sk);
+
+		if (((1 << sk->state) & ~(TCPF_ESTABLISHED|TCPF_CLOSE_WAIT)) &&
+		    sk->err == 0)
+			schedule();
+
+		tsk->state = TASK_RUNNING;
+		remove_wait_queue(sk->sleep, &wait);
+		lock_sock(sk);
+	}
+	return 0;
+}
+
+static inline int tcp_memory_free(struct sock *sk)
+{
+	return atomic_read(&sk->wmem_alloc) < sk->sndbuf;
+}
+
+/*
+ *	Wait for more memory for a socket
+ */
+static void wait_for_tcp_memory(struct sock * sk)
+{
+	release_sock(sk);
+	if (!tcp_memory_free(sk)) {
+		struct wait_queue wait = { current, NULL };
+
+		sk->socket->flags &= ~SO_NOSPACE;
+		add_wait_queue(sk->sleep, &wait);
+		for (;;) {
+			if (signal_pending(current))
+				break;
+			current->state = TASK_INTERRUPTIBLE;
+			if (tcp_memory_free(sk))
+				break;
+			if (sk->shutdown & SEND_SHUTDOWN)
+				break;
+			if (sk->err)
+				break;
+			schedule();
+		}
+		current->state = TASK_RUNNING;
+		remove_wait_queue(sk->sleep, &wait);
+	}
+	lock_sock(sk);
+}
+
+/*
+ * Wait for a buffer.
+ */ 
+static int wait_for_buffer(struct sock *sk) 
+{ 
+	struct wait_queue wait = { current, NULL }; 
+
+	release_sock(sk); 
+	add_wait_queue(sk->sleep, &wait); 
+	current->state = TASK_INTERRUPTIBLE; 
+	schedule(); 
+	current->state = TASK_RUNNING; 
+	remove_wait_queue(sk->sleep, &wait);
+	lock_sock(sk); 
+	return 0; 
+} 
+
+/* When all user supplied data has been queued set the PSH bit */
+#define PSH_NEEDED (seglen == 0 && iovlen == 0)
+
+/*
+ *	This routine copies from a user buffer into a socket,
+ *	and starts the transmit system.
+ *
+ *	Note: must be called with the socket locked.
+ */
+
+int tcp_do_sendmsg(struct sock *sk, struct msghdr *msg)
+{
+	struct iovec *iov;
+	struct tcp_opt *tp;
+	struct sk_buff *skb;
+	int iovlen, flags;
+	int mss_now;
+	int err, copied;
+
+	lock_sock(sk);
+
+	err = 0;
+	tp = &(sk->tp_pinfo.af_tcp);
+
+	/* Wait for a connection to finish. */
+	flags = msg->msg_flags;
+	if ((1 << sk->state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
+		if((err = wait_for_tcp_connect(sk, flags)) != 0)
+			goto out;
+
+	/* This should be in poll */
+	sk->socket->flags &= ~SO_NOSPACE; /* clear SIGIO XXX */
+
+	mss_now = tcp_current_mss(sk);
+
+	/* Ok commence sending. */
+	iovlen = msg->msg_iovlen;
+	iov = msg->msg_iov;
+	copied = 0;
+	
+	while(--iovlen >= 0) {
+		int seglen=iov->iov_len;
+		unsigned char * from=iov->iov_base;
+
+		iov++;
+
+		while(seglen > 0) {
+			int copy, tmp, queue_it, psh;
+
+			if (err)
+				goto do_fault2;
+
+			/* Stop on errors. */
+			if (sk->err)
+				goto do_sock_err;
+
+			/* Make sure that we are established. */
+			if (sk->shutdown & SEND_SHUTDOWN)
+				goto do_shutdown;
+	
+			/* Now we need to check if we have a half
+			 * built packet we can tack some data onto.
+			 */
+			if (tp->send_head && !(flags & MSG_OOB)) {
+				skb = sk->write_queue.prev;
+				copy = skb->len;
+				/* If the remote does SWS avoidance we should
+				 * queue the best we can if not we should in 
+				 * fact send multiple packets...
+				 * A method for detecting this would be most
+				 * welcome.
+				 */
+				if (skb_tailroom(skb) > 0 &&
+				    (mss_now - copy) > 0 &&
+				    tp->snd_nxt < TCP_SKB_CB(skb)->end_seq) {
+					int last_byte_was_odd = (copy % 4);
+
+					/* 
+					 * Check for parallel writers sleeping in user access.
+					 */ 
+					if (tp->partial_writers++ > 0) { 
+						wait_for_buffer(sk);
+						tp->partial_writers--;
+						continue; 
+					}
+				
+					copy = mss_now - copy;
+					if(copy > skb_tailroom(skb))
+						copy = skb_tailroom(skb);
+					if(copy > seglen)
+						copy = seglen;
+		
+					if(last_byte_was_odd) {
+						if(copy_from_user(skb_put(skb, copy),
+								  from, copy))
+							err = -EFAULT;
+						skb->csum = csum_partial(skb->data,
+									 skb->len, 0);
+					} else {
+						skb->csum =
+							csum_and_copy_from_user(
+							from, skb_put(skb, copy),
+							copy, skb->csum, &err);
+					}
+		
+					/*
+					 * FIXME: the *_user functions should
+					 *	  return how much data was
+					 *	  copied before the fault
+					 *	  occurred and then a partial
+					 *	  packet with this data should
+					 *	  be sent.  Unfortunately
+					 *	  csum_and_copy_from_user doesn't
+					 *	  return this information.
+					 *	  ATM it might send partly zeroed
+					 *	  data in this case.
+					 */
+					tp->write_seq += copy;
+					TCP_SKB_CB(skb)->end_seq += copy;
+					from += copy;
+					copied += copy;
+					seglen -= copy;
+					if (PSH_NEEDED)
+						TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
+
+					if (--tp->partial_writers > 0) 
+						wake_up_interruptible(sk->sleep); 
+
+					continue;
+				}
+			}
+
+			/* We also need to worry about the window.  If
+			 * window < 1/2 the maximum window we've seen
+			 * from this host, don't use it.  This is
+			 * sender side silly window prevention, as
+			 * specified in RFC1122.  (Note that this is
+			 * different than earlier versions of SWS
+			 * prevention, e.g. RFC813.).  What we
+			 * actually do is use the whole MSS.  Since
+			 * the results in the right edge of the packet
+			 * being outside the window, it will be queued
+			 * for later rather than sent.
+			 */
+			psh = 0;
+			copy = tp->snd_wnd - (tp->snd_nxt - tp->snd_una);
+			if(copy > (tp->max_window >> 1)) {
+				copy = min(copy, mss_now);
+				psh = 1;
+			} else {
+				copy = mss_now;
+			}
+			if(copy > seglen)
+				copy = seglen;
+
+			/* Determine how large of a buffer to allocate.  */
+			tmp = MAX_HEADER + sk->prot->max_header;
+			if (copy < min(mss_now, tp->max_window >> 1) &&
+			    !(flags & MSG_OOB)) {
+				tmp += min(mss_now, tp->max_window);
+
+				/* What is happening here is that we want to
+				 * tack on later members of the users iovec
+				 * if possible into a single frame.  When we
+				 * leave this loop our caller checks to see if
+				 * we can send queued frames onto the wire.
+				 * See tcp_v[46]_sendmsg() for this.
+				 */
+				queue_it = 1;
+			} else {
+				tmp += copy;
+				queue_it = 0;
+			}
+			skb = sock_wmalloc(sk, tmp, 0, GFP_KERNEL);
+
+			/* If we didn't get any memory, we need to sleep. */
+			if (skb == NULL) {
+				sk->socket->flags |= SO_NOSPACE;
+				if (flags&MSG_DONTWAIT) {
+					err = -EAGAIN;
+					goto do_interrupted;
+				}
+				if (signal_pending(current)) {
+					err = -ERESTARTSYS;
+					goto do_interrupted;
+				}
+				tcp_push_pending_frames(sk, tp);
+				wait_for_tcp_memory(sk);
+
+				/* If SACK's were formed or PMTU events happened,
+				 * we must find out about it.
+				 */
+				mss_now = tcp_current_mss(sk);
+				continue;
+			}
+
+			seglen -= copy;
+
+			/* Prepare control bits for TCP header creation engine. */
+			TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK |
+						  ((PSH_NEEDED || psh) ?
+						   TCPCB_FLAG_PSH : 0));
+			TCP_SKB_CB(skb)->sacked = 0;
+			if (flags & MSG_OOB) {
+				TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_URG;
+				TCP_SKB_CB(skb)->urg_ptr = copy;
+			} else
+				TCP_SKB_CB(skb)->urg_ptr = 0;
+
+			/* TCP data bytes are SKB_PUT() on top, later
+			 * TCP+IP+DEV headers are SKB_PUSH()'d beneath.
+			 * Reserve header space and checksum the data.
+			 */
+			skb_reserve(skb, MAX_HEADER + sk->prot->max_header);
+			skb->csum = csum_and_copy_from_user(from,
+					skb_put(skb, copy), copy, 0, &err);
+
+			if (err)
+				goto do_fault;
+
+			from += copy;
+			copied += copy;
+
+			TCP_SKB_CB(skb)->seq = tp->write_seq;
+			TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + copy;
+
+			/* This advances tp->write_seq for us. */
+			tcp_send_skb(sk, skb, queue_it);
+		}
+	}
+	sk->err = 0;
+	err = copied;
+	goto out;
+
+do_sock_err:
+	if(copied)
+		err = copied;
+	else
+		err = sock_error(sk);
+	goto out;
+do_shutdown:
+	if(copied)
+		err = copied;
+	else {
+		if (!(flags&MSG_NOSIGNAL))
+			send_sig(SIGPIPE, current, 0);
+		err = -EPIPE;
+	}
+	goto out;
+do_interrupted:
+	if(copied)
+		err = copied;
+	goto out;
+do_fault:
+	kfree_skb(skb);
+do_fault2:
+	err = -EFAULT;
+out:
+	tcp_push_pending_frames(sk, tp);
+	release_sock(sk);
+	return err;
+}
+
+#undef PSH_NEEDED
+
+/*
+ *	Send an ack if one is backlogged at this point. Ought to merge
+ *	this with tcp_send_ack().
+ *      This is called for delayed acks also.
+ */
+ 
+void tcp_read_wakeup(struct sock *sk)
+{
+	/* If we're closed, don't send an ack, or we'll get a RST
+	 * from the closed destination.
+	 */
+	if (sk->state != TCP_CLOSE)
+		tcp_send_ack(sk);
+}
+
+/*
+ *	Handle reading urgent data. BSD has very simple semantics for
+ *	this, no blocking and very strange errors 8)
+ */
+
+static int tcp_recv_urg(struct sock * sk, int nonblock,
+			struct msghdr *msg, int len, int flags, 
+			int *addr_len)
+{
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+	/* No URG data to read. */
+	if (sk->urginline || !tp->urg_data || tp->urg_data == URG_READ)
+		return -EINVAL;	/* Yes this is right ! */
+
+	if (sk->err)
+		return sock_error(sk);
+
+	if (sk->done)
+		return -ENOTCONN;
+
+	if (sk->state == TCP_CLOSE || (sk->shutdown & RCV_SHUTDOWN)) {
+		sk->done = 1;
+		return 0;
+	}
+
+	lock_sock(sk);
+	if (tp->urg_data & URG_VALID) {
+		int err = 0; 
+		char c = tp->urg_data;
+
+		if (!(flags & MSG_PEEK))
+			tp->urg_data = URG_READ;
+			
+		if(msg->msg_name)
+			tp->af_specific->addr2sockaddr(sk, (struct sockaddr *)
+						       msg->msg_name);       
+
+		if(addr_len)
+			*addr_len = tp->af_specific->sockaddr_len;
+
+		/* Read urgent data. */
+		msg->msg_flags|=MSG_OOB;
+		release_sock(sk);
+
+		if(len>0)
+		{
+			err = memcpy_toiovec(msg->msg_iov, &c, 1);
+			/* N.B. already set above ... */
+			msg->msg_flags|=MSG_OOB;
+		}
+		else
+			msg->msg_flags|=MSG_TRUNC;
+			
+		/* N.B. Is this right?? If len == 0 we didn't read any data */ 
+		return err ? -EFAULT : 1;
+	}
+	release_sock(sk);
+
+	/* Fixed the recv(..., MSG_OOB) behaviour.  BSD docs and
+	 * the available implementations agree in this case:
+	 * this call should never block, independent of the
+	 * blocking state of the socket.
+	 * Mike <pall@rz.uni-karlsruhe.de>
+	 */
+	return -EAGAIN;
+}
+
+/*
+ *	Release a skb if it is no longer needed. This routine
+ *	must be called with interrupts disabled or with the
+ *	socket locked so that the sk_buff queue operation is ok.
+ */
+
+static inline void tcp_eat_skb(struct sock *sk, struct sk_buff * skb)
+{
+	__skb_unlink(skb, &sk->receive_queue);
+	kfree_skb(skb);
+}
+
+/* Clean up the receive buffer for full frames taken by the user,
+ * then send an ACK if necessary.  COPIED is the number of bytes
+ * tcp_recvmsg has given to the user so far, it speeds up the
+ * calculation of whether or not we must ACK for the sake of
+ * a window update.
+ */
+static void cleanup_rbuf(struct sock *sk, int copied)
+{
+	struct sk_buff *skb;
+	
+	/* NOTE! The socket must be locked, so that we don't get
+	 * a messed-up receive queue.
+	 */
+	while ((skb=skb_peek(&sk->receive_queue)) != NULL) {
+		if (!skb->used || atomic_read(&skb->users) > 1)
+			break;
+		tcp_eat_skb(sk, skb);
+	}
+
+  	/* We send an ACK if we can now advertise a non-zero window
+	 * which has been raised "significantly".
+  	 */
+	if(copied > 0) {
+		struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+		__u32 rcv_window_now = tcp_receive_window(tp);
+		__u32 new_window = __tcp_select_window(sk);
+
+		/* We won't be raising the window any further than
+		 * the window-clamp allows.  Our window selection
+		 * also keeps things a nice multiple of MSS.  These
+		 * checks are necessary to prevent spurious ACKs
+		 * which don't advertize a larger window.
+		 */
+		if((new_window && (new_window >= rcv_window_now * 2)) &&
+		   ((rcv_window_now + tp->mss_cache) <= tp->window_clamp))
+			tcp_read_wakeup(sk);
+	}
+}
+
+
+/*
+ *	This routine copies from a sock struct into the user buffer. 
+ */
+ 
+int tcp_recvmsg(struct sock *sk, struct msghdr *msg,
+		int len, int nonblock, int flags, int *addr_len)
+{
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	struct wait_queue wait = { current, NULL };
+	int copied = 0;
+	u32 peek_seq;
+	volatile u32 *seq;	/* So gcc doesn't overoptimise */
+	unsigned long used;
+	int err = 0; 
+	int target = 1;		/* Read at least this many bytes */
+
+	if (sk->err)
+		return sock_error(sk);
+
+	if (sk->state == TCP_LISTEN)
+		return -ENOTCONN;
+
+	/* Urgent data needs to be handled specially. */
+	if (flags & MSG_OOB)
+		return tcp_recv_urg(sk, nonblock, msg, len, flags, addr_len);
+
+	/*	Copying sequence to update. This is volatile to handle
+	 *	the multi-reader case neatly (memcpy_to/fromfs might be
+	 *	inline and thus not flush cached variables otherwise).
+	 */
+	peek_seq = tp->copied_seq;
+	seq = &tp->copied_seq;
+	if (flags & MSG_PEEK)
+		seq = &peek_seq;
+		
+	/* Handle the POSIX bogosity MSG_WAITALL. */
+	if (flags & MSG_WAITALL)
+		target=len;
+
+	add_wait_queue(sk->sleep, &wait);
+	lock_sock(sk);
+	
+	/*
+	 *	BUG BUG BUG
+	 *	This violates 1003.1g compliance. We must wait for 
+	 *	data to exist even if we read none!
+	 */
+	 
+	while (len > 0) {
+		struct sk_buff * skb;
+		u32 offset;
+
+		/* Are we at urgent data? Stop if we have read anything. */
+		if (copied && tp->urg_data && tp->urg_seq == *seq)
+			break;
+
+		/* We need to check signals first, to get correct SIGURG
+		 * handling. FIXME: Need to check this doesnt impact 1003.1g
+		 * and move it down to the bottom of the loop
+		 */
+		if (signal_pending(current)) {
+			if (copied)
+				break;
+			copied = -ERESTARTSYS;
+			if (nonblock)
+				copied = -EAGAIN;
+			break;
+		}
+
+		/* Next get a buffer. */
+		current->state = TASK_INTERRUPTIBLE;
+
+		skb = skb_peek(&sk->receive_queue);
+		do {
+			if (!skb)
+				break;
+
+			/* Now that we have two receive queues this 
+			 * shouldn't happen.
+			 */
+			if (before(*seq, TCP_SKB_CB(skb)->seq)) {
+				printk(KERN_INFO "recvmsg bug: copied %X seq %X\n",
+				       *seq, TCP_SKB_CB(skb)->seq);
+				break;
+			}
+			offset = *seq - TCP_SKB_CB(skb)->seq;
+			if (skb->h.th->syn)
+				offset--;
+			if (offset < skb->len)
+				goto found_ok_skb;
+			if (skb->h.th->fin)
+				goto found_fin_ok;
+			if (!(flags & MSG_PEEK))
+				skb->used = 1;
+			skb = skb->next;
+		} while (skb != (struct sk_buff *)&sk->receive_queue);
+
+		if (copied >= target)
+			break;
+
+		/*
+		   These three lines and clause if (sk->state == TCP_CLOSE)
+		   are unlikely to be correct, if target > 1.
+		   I DO NOT FIX IT, because I have no idea, what
+		   POSIX prescribes to make here. Probably, it really
+		   wants to lose data 8), if not all target is received.
+		                                                 --ANK
+		 */
+		if (sk->err && !(flags&MSG_PEEK)) {
+			copied = sock_error(sk);
+			break;
+		}
+
+		if (sk->shutdown & RCV_SHUTDOWN) {
+			sk->done = 1;
+			break;
+		}
+
+		if (sk->state == TCP_CLOSE) {
+			if (!sk->done) {
+				sk->done = 1;
+				break;
+			}
+			copied = -ENOTCONN;
+			break;
+		}
+
+		if (nonblock) {
+			copied = -EAGAIN;
+			break;
+		}
+
+		cleanup_rbuf(sk, copied);
+		release_sock(sk);
+		sk->socket->flags |= SO_WAITDATA;
+		schedule();
+		sk->socket->flags &= ~SO_WAITDATA;
+		lock_sock(sk);
+		continue;
+
+	found_ok_skb:
+		/*	Lock the buffer. We can be fairly relaxed as
+		 *	an interrupt will never steal a buffer we are
+		 *	using unless I've missed something serious in
+		 *	tcp_data.
+		 */
+		atomic_inc(&skb->users);
+
+		/* Ok so how much can we use? */
+		used = skb->len - offset;
+		if (len < used)
+			used = len;
+
+		/* Do we have urgent data here? */
+		if (tp->urg_data) {
+			u32 urg_offset = tp->urg_seq - *seq;
+			if (urg_offset < used) {
+				if (!urg_offset) {
+					if (!sk->urginline) {
+						++*seq;
+						offset++;
+						used--;
+					}
+				} else
+					used = urg_offset;
+			}
+		}
+
+		/*	Copy it - We _MUST_ update *seq first so that we
+		 *	don't ever double read when we have dual readers
+		 */
+		*seq += used;
+
+		/*	This memcpy_toiovec can sleep. If it sleeps and we
+		 *	do a second read it relies on the skb->users to avoid
+		 *	a crash when cleanup_rbuf() gets called.
+		 */
+		err = memcpy_toiovec(msg->msg_iov, ((unsigned char *)skb->h.th) + skb->h.th->doff*4 + offset, used);
+		if (err) {
+			/* Exception. Bailout! */
+			atomic_dec(&skb->users);
+			copied = -EFAULT;
+			break;
+		}
+
+		copied += used;
+		len -= used;
+
+		/*	We now will not sleep again until we are finished
+		 *	with skb. Sorry if you are doing the SMP port
+		 *	but you'll just have to fix it neatly ;)
+		 */
+		atomic_dec(&skb->users);
+
+		if (after(tp->copied_seq,tp->urg_seq))
+			tp->urg_data = 0;
+		if (used + offset < skb->len)
+			continue;
+
+		/*	Process the FIN. We may also need to handle PSH
+		 *	here and make it break out of MSG_WAITALL.
+		 */
+		if (skb->h.th->fin)
+			goto found_fin_ok;
+		if (flags & MSG_PEEK)
+			continue;
+		skb->used = 1;
+		if (atomic_read(&skb->users) == 1)
+			tcp_eat_skb(sk, skb);
+		continue;
+
+	found_fin_ok:
+		++*seq;
+		if (flags & MSG_PEEK)
+			break;
+
+		/* All is done. */
+		skb->used = 1;
+		sk->shutdown |= RCV_SHUTDOWN;
+		break;
+	}
+
+	if(copied >= 0 && msg->msg_name) {
+		tp->af_specific->addr2sockaddr(sk, (struct sockaddr *)
+					       msg->msg_name);       
+		if(addr_len)
+			*addr_len = tp->af_specific->sockaddr_len;
+	}
+
+	remove_wait_queue(sk->sleep, &wait);
+	current->state = TASK_RUNNING;
+
+	/* Clean up data we have read: This will do ACK frames. */
+	cleanup_rbuf(sk, copied);
+	release_sock(sk);
+	return copied;
+}
+
+/*
+ * Check whether to renew the timer.
+ */
+static inline void tcp_check_fin_timer(struct sock *sk)
+{
+	if (sk->state == TCP_FIN_WAIT2 && !sk->timer.prev)
+		tcp_reset_msl_timer(sk, TIME_CLOSE, sysctl_tcp_fin_timeout);
+}
+
+/*
+ *	State processing on a close. This implements the state shift for
+ *	sending our FIN frame. Note that we only send a FIN for some
+ *	states. A shutdown() may have already sent the FIN, or we may be
+ *	closed.
+ */
+
+static unsigned char new_state[16] = {
+  /* current state:        new state:      action:	*/
+  /* (Invalid)		*/ TCP_CLOSE,
+  /* TCP_ESTABLISHED	*/ TCP_FIN_WAIT1 | TCP_ACTION_FIN,
+  /* TCP_SYN_SENT	*/ TCP_CLOSE,
+  /* TCP_SYN_RECV	*/ TCP_FIN_WAIT1 | TCP_ACTION_FIN,
+  /* TCP_FIN_WAIT1	*/ TCP_FIN_WAIT1,
+  /* TCP_FIN_WAIT2	*/ TCP_FIN_WAIT2,
+  /* TCP_TIME_WAIT	*/ TCP_CLOSE,
+  /* TCP_CLOSE		*/ TCP_CLOSE,
+  /* TCP_CLOSE_WAIT	*/ TCP_LAST_ACK  | TCP_ACTION_FIN,
+  /* TCP_LAST_ACK	*/ TCP_LAST_ACK,
+  /* TCP_LISTEN		*/ TCP_CLOSE,
+  /* TCP_CLOSING	*/ TCP_CLOSING,
+};
+
+static int tcp_close_state(struct sock *sk, int dead)
+{
+	int next = (int) new_state[sk->state];
+	int ns = (next & TCP_STATE_MASK);
+
+	tcp_set_state(sk, ns);
+
+	/*	This is a (useful) BSD violating of the RFC. There is a
+	 *	problem with TCP as specified in that the other end could
+	 *	keep a socket open forever with no application left this end.
+	 *	We use a 3 minute timeout (about the same as BSD) then kill
+	 *	our end. If they send after that then tough - BUT: long enough
+	 *	that we won't make the old 4*rto = almost no time - whoops
+	 *	reset mistake.
+	 */
+	if (dead)
+		tcp_check_fin_timer(sk);
+
+	return (next & TCP_ACTION_FIN);
+}
+
+/*
+ *	Shutdown the sending side of a connection. Much like close except
+ *	that we don't receive shut down or set sk->dead.
+ */
+
+void tcp_shutdown(struct sock *sk, int how)
+{
+	/*	We need to grab some memory, and put together a FIN,
+	 *	and then put it into the queue to be sent.
+	 *		Tim MacKenzie(tym@dibbler.cs.monash.edu.au) 4 Dec '92.
+	 */
+	if (!(how & SEND_SHUTDOWN))
+		return;
+
+	/* If we've already sent a FIN, or it's a closed state, skip this. */
+	if ((1 << sk->state) &
+	    (TCPF_ESTABLISHED|TCPF_SYN_SENT|TCPF_SYN_RECV|TCPF_CLOSE_WAIT)) {
+		lock_sock(sk);
+
+		/* Clear out any half completed packets.  FIN if needed. */
+		if (tcp_close_state(sk,0))
+			tcp_send_fin(sk);
+
+		release_sock(sk);
+	}
+}
+
+
+/*
+ *	Return 1 if we still have things to send in our buffers.
+ */
+
+static inline int closing(struct sock * sk)
+{
+	return ((1 << sk->state) & (TCPF_FIN_WAIT1|TCPF_CLOSING|TCPF_LAST_ACK));
+}
+
+/*
+ *	This routine closes sockets which have been at least partially
+ *	opened, but not yet accepted. Currently it is only called by
+ *	tcp_close, and timeout mirrors the value there.
+ */
+
+static void tcp_close_pending (struct sock *sk)
+{
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	struct open_request *req = tp->syn_wait_queue;
+
+	while(req) {
+		struct open_request *iter;
+		
+		if (req->sk)
+			tcp_close(req->sk, 0);
+
+		iter = req;
+		req = req->dl_next;
+		
+		(*iter->class->destructor)(iter);
+		tcp_dec_slow_timer(TCP_SLT_SYNACK);
+		sk->ack_backlog--;
+		tcp_openreq_free(iter);
+	}
+
+	tcp_synq_init(tp);
+}
+
+void tcp_close(struct sock *sk, long timeout)
+{
+	struct sk_buff *skb;
+	int data_was_unread = 0;
+
+	/*
+	 * Check whether the socket is locked ... supposedly
+	 * it's impossible to tcp_close() a locked socket.
+	 */
+	if (atomic_read(&sk->sock_readers))
+		printk("tcp_close: socket already locked!\n");
+
+	/* We need to grab some memory, and put together a FIN,
+	 * and then put it into the queue to be sent.
+	 */
+	lock_sock(sk);
+	if(sk->state == TCP_LISTEN) {
+		/* Special case. */
+		tcp_set_state(sk, TCP_CLOSE);
+		tcp_close_pending(sk);
+		release_sock(sk);
+		sk->dead = 1;
+		return;
+	}
+
+	/* It is questionable, what the role of this is now.
+	 * In any event either it should be removed, or
+	 * increment of SLT_KEEPALIVE be done, this is causing
+	 * big problems.  For now I comment it out.  -DaveM
+	 */
+	/* sk->keepopen = 1; */
+	sk->shutdown = SHUTDOWN_MASK;
+
+	if (!sk->dead)
+	  	sk->state_change(sk);
+
+	/*  We need to flush the recv. buffs.  We do this only on the
+	 *  descriptor close, not protocol-sourced closes, because the
+	 *  reader process may not have drained the data yet!
+	 */
+	while((skb=__skb_dequeue(&sk->receive_queue))!=NULL) {
+		u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq - skb->h.th->fin;
+		data_was_unread += len;
+		kfree_skb(skb);
+	}
+
+	/* As outlined in draft-ietf-tcpimpl-prob-03.txt, section
+	 * 3.10, we send a RST here because data was lost.  To
+	 * witness the awful effects of the old behavior of always
+	 * doing a FIN, run an older 2.1.x kernel or 2.0.x, start
+	 * a bulk GET in an FTP client, suspend the process, wait
+	 * for the client to advertise a zero window, then kill -9
+	 * the FTP client, wheee...  Note: timeout is always zero
+	 * in such a case.
+	 */
+	if(data_was_unread != 0) {
+		/* Unread data was tossed, zap the connection. */
+		tcp_set_state(sk, TCP_CLOSE);
+		tcp_send_active_reset(sk);
+	} else if (tcp_close_state(sk,1)) {
+		/* We FIN if the application ate all the data before
+		 * zapping the connection.
+		 */
+		tcp_send_fin(sk);
+	}
+
+	if (timeout) {
+		struct task_struct *tsk = current;
+		struct wait_queue wait = { tsk, NULL };
+
+		add_wait_queue(sk->sleep, &wait);
+		release_sock(sk);
+
+		while (1) {
+			tsk->state = TASK_INTERRUPTIBLE;
+			if (!closing(sk))
+				break;
+			timeout = schedule_timeout(timeout);
+			if (signal_pending(tsk) || !timeout)
+				break;
+		}
+
+		tsk->state = TASK_RUNNING;
+		remove_wait_queue(sk->sleep, &wait);
+		
+		lock_sock(sk);
+	}
+
+	/* Now that the socket is dead, if we are in the FIN_WAIT2 state
+	 * we may need to set up a timer.
+         */
+	tcp_check_fin_timer(sk);
+
+	release_sock(sk);
+	sk->dead = 1;
+}
+
+/*
+ *	Wait for an incoming connection, avoid race
+ *	conditions. This must be called with the socket locked.
+ */
+static struct open_request * wait_for_connect(struct sock * sk,
+					      struct open_request **pprev)
+{
+	struct wait_queue wait = { current, NULL };
+	struct open_request *req;
+
+	add_wait_queue(sk->sleep, &wait);
+	for (;;) {
+		current->state = TASK_INTERRUPTIBLE;
+		release_sock(sk);
+		schedule();
+		lock_sock(sk);
+		req = tcp_find_established(&(sk->tp_pinfo.af_tcp), pprev);
+		if (req) 
+			break;
+		if (signal_pending(current))
+			break;
+	}
+	current->state = TASK_RUNNING;
+	remove_wait_queue(sk->sleep, &wait);
+	return req;
+}
+
+/*
+ *	This will accept the next outstanding connection.
+ *
+ *	Be careful about race conditions here - this is subtle.
+ */
+
+struct sock *tcp_accept(struct sock *sk, int flags)
+{
+	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+	struct open_request *req, *prev;
+	struct sock *newsk = NULL;
+	int error;
+
+	lock_sock(sk); 
+
+	/* We need to make sure that this socket is listening,
+	 * and that it has something pending.
+	 */
+	error = EINVAL;
+	if (sk->state != TCP_LISTEN)
+		goto out;
+
+	/* Find already established connection */
+	req = tcp_find_established(tp, &prev);
+	if (!req) {
+		/* If this is a non blocking socket don't sleep */
+		error = EAGAIN;
+		if (flags & O_NONBLOCK)
+			goto out;
+	    
+		error = ERESTARTSYS;
+		req = wait_for_connect(sk, &prev);
+		if (!req) 
+			goto out;
+	}
+
+	tcp_synq_unlink(tp, req, prev);
+	newsk = req->sk;
+	req->class->destructor(req);
+	tcp_openreq_free(req);
+	sk->ack_backlog--; 
+	if(sk->keepopen)
+		tcp_inc_slow_timer(TCP_SLT_KEEPALIVE);
+
+	release_sock(sk);
+	return newsk;
+
+out:
+	/* sk should be in LISTEN state, thus accept can use sk->err for
+	 * internal purposes without stomping one anyone's feed.
+	 */ 
+	sk->err = error; 
+	release_sock(sk);
+	return newsk;
+}
+
+/*
+ *	Socket option code for TCP. 
+ */
+  
+int tcp_setsockopt(struct sock *sk, int level, int optname, char *optval, 
+		   int optlen)
+{
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	int val;
+
+	if (level != SOL_TCP)
+		return tp->af_specific->setsockopt(sk, level, optname, 
+						   optval, optlen);
+	
+	if(optlen<sizeof(int))
+		return -EINVAL;
+
+  	if (get_user(val, (int *)optval))
+		return -EFAULT;
+
+	switch(optname) {
+	case TCP_MAXSEG:
+		/* values greater than interface MTU won't take effect.  however at
+		 * the point when this call is done we typically don't yet know
+		 * which interface is going to be used
+		 */
+		if(val < 1 || val > MAX_WINDOW)
+			return -EINVAL;
+		tp->user_mss = val;
+		return 0;
+
+	case TCP_NODELAY:
+		/* You cannot try to use this and TCP_CORK in
+		 * tandem, so let the user know.
+		 */
+		if (sk->nonagle == 2)
+			return -EINVAL;
+		sk->nonagle = (val == 0) ? 0 : 1;
+		return 0;
+
+	case TCP_CORK:
+		/* When set indicates to always queue non-full frames.
+		 * Later the user clears this option and we transmit
+		 * any pending partial frames in the queue.  This is
+		 * meant to be used alongside sendfile() to get properly
+		 * filled frames when the user (for example) must write
+		 * out headers with a write() call first and then use
+		 * sendfile to send out the data parts.
+		 *
+		 * You cannot try to use TCP_NODELAY and this mechanism
+		 * at the same time, so let the user know.
+		 */
+		if (sk->nonagle == 1)
+			return -EINVAL;
+		if (val != 0) {
+			sk->nonagle = 2;
+		} else {
+			sk->nonagle = 0;
+
+			lock_sock(sk);
+			tcp_push_pending_frames(sk, tp);
+			release_sock(sk);
+		}
+		return 0;
+
+	default:
+		return -ENOPROTOOPT;
+	};
+}
+
+int tcp_getsockopt(struct sock *sk, int level, int optname, char *optval,
+		   int *optlen)
+{
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	int val, len;
+
+	if(level != SOL_TCP)
+		return tp->af_specific->getsockopt(sk, level, optname,
+						   optval, optlen);
+
+	if(get_user(len,optlen))
+		return -EFAULT;
+
+	len = min(len, sizeof(int));
+
+	switch(optname) {
+	case TCP_MAXSEG:
+		val = tp->user_mss;
+		break;
+	case TCP_NODELAY:
+		val = (sk->nonagle == 1);
+		break;
+	case TCP_CORK:
+		val = (sk->nonagle == 2);
+		break;
+	default:
+		return -ENOPROTOOPT;
+	};
+
+  	if(put_user(len, optlen))
+  		return -EFAULT;
+	if(copy_to_user(optval, &val,len))
+		return -EFAULT;
+  	return 0;
+}
+
+void tcp_set_keepalive(struct sock *sk, int val)
+{
+	if (!sk->keepopen && val)
+		tcp_inc_slow_timer(TCP_SLT_KEEPALIVE);
+	else if (sk->keepopen && !val)
+		tcp_dec_slow_timer(TCP_SLT_KEEPALIVE);
+}
+
+extern void __skb_cb_too_small_for_tcp(int, int);
+
+void __init tcp_init(void)
+{
+	struct sk_buff *skb = NULL;
+
+	if(sizeof(struct tcp_skb_cb) > sizeof(skb->cb))
+		__skb_cb_too_small_for_tcp(sizeof(struct tcp_skb_cb),
+					   sizeof(skb->cb));
+
+	tcp_openreq_cachep = kmem_cache_create("tcp_open_request",
+						   sizeof(struct open_request),
+					       0, SLAB_HWCACHE_ALIGN,
+					       NULL, NULL);
+	if(!tcp_openreq_cachep)
+		panic("tcp_init: Cannot alloc open_request cache.");
+
+	tcp_bucket_cachep = kmem_cache_create("tcp_bind_bucket",
+					      sizeof(struct tcp_bind_bucket),
+					      0, SLAB_HWCACHE_ALIGN,
+					      NULL, NULL);
+	if(!tcp_bucket_cachep)
+		panic("tcp_init: Cannot alloc tcp_bind_bucket cache.");
+
+	tcp_timewait_cachep = kmem_cache_create("tcp_tw_bucket",
+						sizeof(struct tcp_tw_bucket),
+						0, SLAB_HWCACHE_ALIGN,
+						NULL, NULL);
+	if(!tcp_timewait_cachep)
+		panic("tcp_init: Cannot alloc tcp_tw_bucket cache.");
+}
diff --git a/pfinet/linux-src/net/ipv4/tcp_input.c b/pfinet/linux-src/net/ipv4/tcp_input.c
new file mode 100644
index 00000000..a753b128
--- /dev/null
+++ b/pfinet/linux-src/net/ipv4/tcp_input.c
@@ -0,0 +1,2432 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		Implementation of the Transmission Control Protocol(TCP).
+ *
+ * Version:	$Id: tcp_input.c,v 1.164.2.7 1999/08/13 16:14:27 davem Exp $
+ *
+ * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
+ *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *		Mark Evans, <evansmp@uhura.aston.ac.uk>
+ *		Corey Minyard <wf-rch!minyard@relay.EU.net>
+ *		Florian La Roche, <flla@stud.uni-sb.de>
+ *		Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
+ *		Linus Torvalds, <torvalds@cs.helsinki.fi>
+ *		Alan Cox, <gw4pts@gw4pts.ampr.org>
+ *		Matthew Dillon, <dillon@apollo.west.oic.com>
+ *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
+ *		Jorge Cwik, <jorge@laser.satlink.net>
+ */
+
+/*
+ * Changes:
+ *		Pedro Roque	:	Fast Retransmit/Recovery.
+ *					Two receive queues.
+ *					Retransmit queue handled by TCP.
+ *					Better retransmit timer handling.
+ *					New congestion avoidance.
+ *					Header prediction.
+ *					Variable renaming.
+ *
+ *		Eric		:	Fast Retransmit.
+ *		Randy Scott	:	MSS option defines.
+ *		Eric Schenk	:	Fixes to slow start algorithm.
+ *		Eric Schenk	:	Yet another double ACK bug.
+ *		Eric Schenk	:	Delayed ACK bug fixes.
+ *		Eric Schenk	:	Floyd style fast retrans war avoidance.
+ *		David S. Miller	:	Don't allow zero congestion window.
+ *		Eric Schenk	:	Fix retransmitter so that it sends
+ *					next packet on ack of previous packet.
+ *		Andi Kleen	:	Moved open_request checking here
+ *					and process RSTs for open_requests.
+ *		Andi Kleen	:	Better prune_queue, and other fixes.
+ *		Andrey Savochkin:	Fix RTT measurements in the presnce of
+ *					timestamps.
+ *		Andrey Savochkin:	Check sequence numbers correctly when
+ *					removing SACKs due to in sequence incoming
+ *					data segments.
+ *		Andi Kleen:		Make sure we never ack data there is not
+ *					enough room for. Also make this condition
+ *					a fatal error if it might still happen.
+ *		Andi Kleen:		Add tcp_measure_rcv_mss to make 
+ *					connections with MSS<min(MTU,ann. MSS)
+ *					work without delayed acks. 
+ *		Andi Kleen:		Process packets with PSH set in the
+ *					fast path.
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/sysctl.h>
+#include <net/tcp.h>
+#include <linux/ipsec.h>
+
+#ifdef CONFIG_SYSCTL
+#define SYNC_INIT 0 /* let the user enable it */
+#else
+#define SYNC_INIT 1
+#endif
+
+extern int sysctl_tcp_fin_timeout;
+
+/* These are on by default so the code paths get tested.
+ * For the final 2.2 this may be undone at our discretion. -DaveM
+ */
+int sysctl_tcp_timestamps = 1;
+int sysctl_tcp_window_scaling = 1;
+int sysctl_tcp_sack = 1;
+
+int sysctl_tcp_syncookies = SYNC_INIT; 
+int sysctl_tcp_stdurg;
+int sysctl_tcp_rfc1337;
+
+static int prune_queue(struct sock *sk);
+
+/* There is something which you must keep in mind when you analyze the
+ * behavior of the tp->ato delayed ack timeout interval.  When a
+ * connection starts up, we want to ack as quickly as possible.  The
+ * problem is that "good" TCP's do slow start at the beginning of data
+ * transmission.  The means that until we send the first few ACK's the
+ * sender will sit on his end and only queue most of his data, because
+ * he can only send snd_cwnd unacked packets at any given time.  For
+ * each ACK we send, he increments snd_cwnd and transmits more of his
+ * queue.  -DaveM
+ */
+static void tcp_delack_estimator(struct tcp_opt *tp)
+{
+	if(tp->ato == 0) {
+		tp->lrcvtime = tcp_time_stamp;
+
+		/* Help sender leave slow start quickly,
+		 * and also makes sure we do not take this
+		 * branch ever again for this connection.
+		 */
+		tp->ato = 1;
+		tcp_enter_quickack_mode(tp);
+	} else {
+		int m = tcp_time_stamp - tp->lrcvtime;
+
+		tp->lrcvtime = tcp_time_stamp;
+		if(m <= 0)
+			m = 1;
+		if(m > tp->rto)
+			tp->ato = tp->rto;
+		else {
+			/* This funny shift makes sure we
+			 * clear the "quick ack mode" bit.
+			 */
+			tp->ato = ((tp->ato << 1) >> 2) + m;
+		}
+	}
+}
+
+/* 
+ * Remember to send an ACK later.
+ */
+static __inline__ void tcp_remember_ack(struct tcp_opt *tp, struct tcphdr *th, 
+					struct sk_buff *skb)
+{
+	tp->delayed_acks++; 
+
+	/* Tiny-grams with PSH set artifically deflate our
+	 * ato measurement, but with a lower bound.
+	 */
+	if(th->psh && (skb->len < (tp->mss_cache >> 1))) {
+		/* Preserve the quickack state. */
+		if((tp->ato & 0x7fffffff) > HZ/50)
+			tp->ato = ((tp->ato & 0x80000000) |
+				   (HZ/50));
+	}
+} 
+
+/* Called to compute a smoothed rtt estimate. The data fed to this
+ * routine either comes from timestamps, or from segments that were
+ * known _not_ to have been retransmitted [see Karn/Partridge
+ * Proceedings SIGCOMM 87]. The algorithm is from the SIGCOMM 88
+ * piece by Van Jacobson.
+ * NOTE: the next three routines used to be one big routine.
+ * To save cycles in the RFC 1323 implementation it was better to break
+ * it up into three procedures. -- erics
+ */
+
+static __inline__ void tcp_rtt_estimator(struct tcp_opt *tp, __u32 mrtt)
+{
+	long m = mrtt; /* RTT */
+
+	/*	The following amusing code comes from Jacobson's
+	 *	article in SIGCOMM '88.  Note that rtt and mdev
+	 *	are scaled versions of rtt and mean deviation.
+	 *	This is designed to be as fast as possible 
+	 *	m stands for "measurement".
+	 *
+	 *	On a 1990 paper the rto value is changed to:
+	 *	RTO = rtt + 4 * mdev
+	 */
+	if(m == 0)
+		m = 1;
+	if (tp->srtt != 0) {
+		m -= (tp->srtt >> 3);	/* m is now error in rtt est */
+		tp->srtt += m;		/* rtt = 7/8 rtt + 1/8 new */
+		if (m < 0)
+			m = -m;		/* m is now abs(error) */
+		m -= (tp->mdev >> 2);   /* similar update on mdev */
+		tp->mdev += m;	    	/* mdev = 3/4 mdev + 1/4 new */
+	} else {
+		/* no previous measure. */
+		tp->srtt = m<<3;	/* take the measured time to be rtt */
+		tp->mdev = m<<2;	/* make sure rto = 3*rtt */
+	}
+}
+
+/* Calculate rto without backoff.  This is the second half of Van Jacobson's
+ * routine referred to above.
+ */
+
+static __inline__ void tcp_set_rto(struct tcp_opt *tp)
+{
+	tp->rto = (tp->srtt >> 3) + tp->mdev;
+	tp->rto += (tp->rto >> 2) + (tp->rto >> (tp->snd_cwnd-1));
+}
+ 
+
+/* Keep the rto between HZ/5 and 120*HZ. 120*HZ is the upper bound
+ * on packet lifetime in the internet. We need the HZ/5 lower
+ * bound to behave correctly against BSD stacks with a fixed
+ * delayed ack.
+ * FIXME: It's not entirely clear this lower bound is the best
+ * way to avoid the problem. Is it possible to drop the lower
+ * bound and still avoid trouble with BSD stacks? Perhaps
+ * some modification to the RTO calculation that takes delayed
+ * ack bias into account? This needs serious thought. -- erics
+ */
+static __inline__ void tcp_bound_rto(struct tcp_opt *tp)
+{
+	if (tp->rto > 120*HZ)
+		tp->rto = 120*HZ;
+	if (tp->rto < HZ/5)
+		tp->rto = HZ/5;
+}
+
+/* WARNING: this must not be called if tp->saw_timestamp was false. */
+extern __inline__ void tcp_replace_ts_recent(struct sock *sk, struct tcp_opt *tp,
+					     __u32 start_seq, __u32 end_seq)
+{
+	/* It is start_seq <= last_ack_seq combined
+	   with in window check. If start_seq<=last_ack_seq<=rcv_nxt,
+	   then segment is in window if end_seq>=rcv_nxt.
+	 */
+	if (!after(start_seq, tp->last_ack_sent) &&
+	    !before(end_seq, tp->rcv_nxt)) {
+		/* PAWS bug workaround wrt. ACK frames, the PAWS discard
+		 * extra check below makes sure this can only happen
+		 * for pure ACK frames.  -DaveM
+		 *
+		 * Plus: expired timestamps.
+		 *
+		 * Plus: resets failing PAWS.
+		 */
+		if((s32)(tp->rcv_tsval - tp->ts_recent) >= 0) {
+			tp->ts_recent = tp->rcv_tsval;
+			tp->ts_recent_stamp = tcp_time_stamp;
+		}
+	}
+}
+
+#define PAWS_24DAYS	(HZ * 60 * 60 * 24 * 24)
+
+extern __inline__ int tcp_paws_discard(struct tcp_opt *tp, struct tcphdr *th, unsigned len)
+{
+	return ((s32)(tp->rcv_tsval - tp->ts_recent) < 0 &&
+		(s32)(tcp_time_stamp - tp->ts_recent_stamp) < PAWS_24DAYS &&
+		/* Sorry, PAWS as specified is broken wrt. pure-ACKs -DaveM */
+		len != (th->doff * 4));
+}
+
+
+static int __tcp_sequence(struct tcp_opt *tp, u32 seq, u32 end_seq)
+{
+	u32 end_window = tp->rcv_wup + tp->rcv_wnd;
+
+	if (tp->rcv_wnd &&
+	    after(end_seq, tp->rcv_nxt) &&
+	    before(seq, end_window))
+		return 1;
+	if (seq != end_window)
+		return 0;
+	return (seq == end_seq);
+}
+
+/* This functions checks to see if the tcp header is actually acceptable. */
+extern __inline__ int tcp_sequence(struct tcp_opt *tp, u32 seq, u32 end_seq)
+{
+	if (seq == tp->rcv_nxt)
+		return (tp->rcv_wnd || (end_seq == seq));
+
+	return __tcp_sequence(tp, seq, end_seq);
+}
+
+/* When we get a reset we do this. */
+static void tcp_reset(struct sock *sk)
+{
+	sk->zapped = 1;
+
+	/* We want the right error as BSD sees it (and indeed as we do). */
+	switch (sk->state) {
+		case TCP_SYN_SENT:
+			sk->err = ECONNREFUSED;
+			break;
+		case TCP_CLOSE_WAIT:
+			sk->err = EPIPE;
+			break;
+		default:
+			sk->err = ECONNRESET;
+	};
+	tcp_set_state(sk, TCP_CLOSE);
+	sk->shutdown = SHUTDOWN_MASK;
+	if (!sk->dead) 
+		sk->state_change(sk);
+}
+
+/* This tags the retransmission queue when SACKs arrive. */
+static void tcp_sacktag_write_queue(struct sock *sk, struct tcp_sack_block *sp, int nsacks)
+{
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	int i = nsacks;
+
+	while(i--) {
+		struct sk_buff *skb = skb_peek(&sk->write_queue);
+		__u32 start_seq = ntohl(sp->start_seq);
+		__u32 end_seq = ntohl(sp->end_seq);
+		int fack_count = 0;
+
+		while((skb != NULL) &&
+		      (skb != tp->send_head) &&
+		      (skb != (struct sk_buff *)&sk->write_queue)) {
+			/* The retransmission queue is always in order, so
+			 * we can short-circuit the walk early.
+			 */
+			if(after(TCP_SKB_CB(skb)->seq, end_seq))
+				break;
+
+			/* We play conservative, we don't allow SACKS to partially
+			 * tag a sequence space.
+			 */
+			fack_count++;
+			if(!after(start_seq, TCP_SKB_CB(skb)->seq) &&
+			   !before(end_seq, TCP_SKB_CB(skb)->end_seq)) {
+				/* If this was a retransmitted frame, account for it. */
+				if((TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) &&
+				   tp->retrans_out)
+					tp->retrans_out--;
+				TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED;
+
+				/* RULE: All new SACKs will either decrease retrans_out
+				 *       or advance fackets_out.
+				 */
+				if(fack_count > tp->fackets_out)
+					tp->fackets_out = fack_count;
+			}
+			skb = skb->next;
+		}
+		sp++; /* Move on to the next SACK block. */
+	}
+}
+
+/* Look for tcp options. Normally only called on SYN and SYNACK packets.
+ * But, this can also be called on packets in the established flow when
+ * the fast version below fails.
+ */
+void tcp_parse_options(struct sock *sk, struct tcphdr *th, struct tcp_opt *tp, int no_fancy)
+{
+	unsigned char *ptr;
+	int length=(th->doff*4)-sizeof(struct tcphdr);
+	int saw_mss = 0;
+
+	ptr = (unsigned char *)(th + 1);
+	tp->saw_tstamp = 0;
+
+	while(length>0) {
+	  	int opcode=*ptr++;
+		int opsize;
+
+		switch (opcode) {
+			case TCPOPT_EOL:
+				return;
+			case TCPOPT_NOP:	/* Ref: RFC 793 section 3.1 */
+				length--;
+				continue;
+			default:
+				opsize=*ptr++;
+				if (opsize < 2) /* "silly options" */
+					return;
+				if (opsize > length)
+					break;	/* don't parse partial options */
+	  			switch(opcode) {
+				case TCPOPT_MSS:
+					if(opsize==TCPOLEN_MSS && th->syn) {
+						u16 in_mss = ntohs(*(__u16 *)ptr);
+						if (in_mss == 0)
+							in_mss = 536;
+						if (tp->mss_clamp > in_mss)
+							tp->mss_clamp = in_mss;
+						saw_mss = 1;
+					}
+					break;
+				case TCPOPT_WINDOW:
+					if(opsize==TCPOLEN_WINDOW && th->syn)
+						if (!no_fancy && sysctl_tcp_window_scaling) {
+							tp->wscale_ok = 1;
+							tp->snd_wscale = *(__u8 *)ptr;
+							if(tp->snd_wscale > 14) {
+								if(net_ratelimit())
+									printk("tcp_parse_options: Illegal window "
+									       "scaling value %d >14 received.",
+									       tp->snd_wscale);
+								tp->snd_wscale = 14;
+							}
+						}
+					break;
+				case TCPOPT_TIMESTAMP:
+					if(opsize==TCPOLEN_TIMESTAMP) {
+						if (sysctl_tcp_timestamps && !no_fancy) {
+							tp->tstamp_ok = 1;
+							tp->saw_tstamp = 1;
+							tp->rcv_tsval = ntohl(*(__u32 *)ptr);
+							tp->rcv_tsecr = ntohl(*(__u32 *)(ptr+4));
+						}
+					}
+					break;
+				case TCPOPT_SACK_PERM:
+					if(opsize==TCPOLEN_SACK_PERM && th->syn) {
+						if (sysctl_tcp_sack && !no_fancy) {
+							tp->sack_ok = 1;
+							tp->num_sacks = 0;
+						}
+					}
+					break;
+
+				case TCPOPT_SACK:
+					if((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
+					   sysctl_tcp_sack && (sk != NULL) && !th->syn) {
+						int sack_bytes = opsize - TCPOLEN_SACK_BASE;
+
+						if(!(sack_bytes % TCPOLEN_SACK_PERBLOCK)) {
+							int num_sacks = sack_bytes >> 3;
+							struct tcp_sack_block *sackp;
+
+							sackp = (struct tcp_sack_block *)ptr;
+							tcp_sacktag_write_queue(sk, sackp, num_sacks);
+						}
+					}
+	  			};
+	  			ptr+=opsize-2;
+	  			length-=opsize;
+	  	};
+	}
+	if(th->syn && saw_mss == 0)
+		tp->mss_clamp = 536;
+}
+
+/* Fast parse options. This hopes to only see timestamps.
+ * If it is wrong it falls back on tcp_parse_options().
+ */
+static __inline__ int tcp_fast_parse_options(struct sock *sk, struct tcphdr *th, struct tcp_opt *tp)
+{
+	/* If we didn't send out any options ignore them all. */
+	if (tp->tcp_header_len == sizeof(struct tcphdr))
+		return 0;
+	if (th->doff == sizeof(struct tcphdr)>>2) {
+		tp->saw_tstamp = 0;
+		return 0;
+	} else if (th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) {
+		__u32 *ptr = (__u32 *)(th + 1);
+		if (*ptr == __constant_ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
+					     | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
+			tp->saw_tstamp = 1;
+			tp->rcv_tsval = ntohl(*++ptr);
+			tp->rcv_tsecr = ntohl(*++ptr);
+			return 1;
+		}
+	}
+	tcp_parse_options(sk, th, tp, 0);
+	return 1;
+}
+
+#define FLAG_DATA		0x01 /* Incoming frame contained data.		*/
+#define FLAG_WIN_UPDATE		0x02 /* Incoming ACK was a window update.	*/
+#define FLAG_DATA_ACKED		0x04 /* This ACK acknowledged new data.		*/
+#define FLAG_RETRANS_DATA_ACKED	0x08 /* "" "" some of which was retransmitted.	*/
+
+static __inline__ void clear_fast_retransmit(struct tcp_opt *tp)
+{
+	if (tp->dup_acks > 3)
+		tp->snd_cwnd = (tp->snd_ssthresh);
+
+	tp->dup_acks = 0;
+}
+
+/* NOTE: This code assumes that tp->dup_acks gets cleared when a
+ * retransmit timer fires.
+ */
+static void tcp_fast_retrans(struct sock *sk, u32 ack, int not_dup)
+{
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+	/* Note: If not_dup is set this implies we got a
+	 * data carrying packet or a window update.
+	 * This carries no new information about possible
+	 * lost packets, so we have to ignore it for the purposes
+	 * of counting duplicate acks. Ideally this does not imply we
+	 * should stop our fast retransmit phase, more acks may come
+	 * later without data to help us. Unfortunately this would make
+	 * the code below much more complex. For now if I see such
+	 * a packet I clear the fast retransmit phase.
+	 */
+	if (ack == tp->snd_una && tp->packets_out && (not_dup == 0)) {
+		/* This is the standard reno style fast retransmit branch. */
+
+                /* 1. When the third duplicate ack is received, set ssthresh 
+                 * to one half the current congestion window, but no less 
+                 * than two segments. Retransmit the missing segment.
+                 */
+		if (tp->high_seq == 0 || after(ack, tp->high_seq)) {
+			tp->dup_acks++;
+			if ((tp->fackets_out > 3) || (tp->dup_acks == 3)) {
+                                tp->snd_ssthresh = tcp_recalc_ssthresh(tp);
+                                tp->snd_cwnd = (tp->snd_ssthresh + 3);
+				tp->high_seq = tp->snd_nxt;
+				if(!tp->fackets_out)
+					tcp_retransmit_skb(sk,
+							   skb_peek(&sk->write_queue));
+				else
+					tcp_fack_retransmit(sk);
+                                tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
+			}
+		} else if (++tp->dup_acks > 3) {
+			/* 2. Each time another duplicate ACK arrives, increment 
+			 * cwnd by the segment size. [...] Transmit a packet...
+			 *
+			 * Packet transmission will be done on normal flow processing
+			 * since we're not in "retransmit mode".  We do not use
+			 * duplicate ACKs to artificially inflate the congestion
+			 * window when doing FACK.
+			 */
+			if(!tp->fackets_out) {
+				tp->snd_cwnd++;
+			} else {
+				/* Fill any further holes which may have
+				 * appeared.
+				 *
+				 * We may want to change this to run every
+				 * further multiple-of-3 dup ack increments,
+				 * to be more robust against out-of-order
+				 * packet delivery.  -DaveM
+				 */
+				tcp_fack_retransmit(sk);
+			}
+		}
+	} else if (tp->high_seq != 0) {
+		/* In this branch we deal with clearing the Floyd style
+		 * block on duplicate fast retransmits, and if requested
+		 * we do Hoe style secondary fast retransmits.
+		 */
+		if (!before(ack, tp->high_seq) || (not_dup & FLAG_DATA) != 0) {
+			/* Once we have acked all the packets up to high_seq
+			 * we are done this fast retransmit phase.
+			 * Alternatively data arrived. In this case we
+			 * Have to abort the fast retransmit attempt.
+			 * Note that we do want to accept a window
+			 * update since this is expected with Hoe's algorithm.
+			 */
+			clear_fast_retransmit(tp);
+
+			/* After we have cleared up to high_seq we can
+			 * clear the Floyd style block.
+			 */
+			if (!before(ack, tp->high_seq)) {
+				tp->high_seq = 0;
+				tp->fackets_out = 0;
+			}
+		} else if (tp->dup_acks >= 3) {
+			if (!tp->fackets_out) {
+				/* Hoe Style. We didn't ack the whole
+				 * window. Take this as a cue that
+				 * another packet was lost and retransmit it.
+				 * Don't muck with the congestion window here.
+				 * Note that we have to be careful not to
+				 * act if this was a window update and it
+				 * didn't ack new data, since this does
+				 * not indicate a packet left the system.
+				 * We can test this by just checking
+				 * if ack changed from snd_una, since
+				 * the only way to get here without advancing
+				 * from snd_una is if this was a window update.
+				 */
+				if (ack != tp->snd_una && before(ack, tp->high_seq)) {
+                                	tcp_retransmit_skb(sk,
+							   skb_peek(&sk->write_queue));
+                                	tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
+				}
+			} else {
+				/* FACK style, fill any remaining holes in
+				 * receiver's queue.
+				 */
+				tcp_fack_retransmit(sk);
+			}
+		}
+	}
+}
+
+/* This is Jacobson's slow start and congestion avoidance. 
+ * SIGCOMM '88, p. 328.
+ */
+static __inline__ void tcp_cong_avoid(struct tcp_opt *tp)
+{
+        if (tp->snd_cwnd <= tp->snd_ssthresh) {
+                /* In "safe" area, increase. */
+                tp->snd_cwnd++;
+	} else {
+                /* In dangerous area, increase slowly.
+		 * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd
+		 */
+		if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
+			tp->snd_cwnd++;
+			tp->snd_cwnd_cnt=0;
+		} else
+			tp->snd_cwnd_cnt++;
+        }       
+}
+
+/* Remove acknowledged frames from the retransmission queue. */
+static int tcp_clean_rtx_queue(struct sock *sk, __u32 ack,
+			       __u32 *seq, __u32 *seq_rtt)
+{
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	struct sk_buff *skb;
+	__u32 now = tcp_time_stamp;
+	int acked = 0;
+
+	/* If we are retransmitting, and this ACK clears up to
+	 * the retransmit head, or further, then clear our state.
+	 */
+	if (tp->retrans_head != NULL &&
+	    !before(ack, TCP_SKB_CB(tp->retrans_head)->end_seq))
+		tp->retrans_head = NULL;
+
+	while((skb=skb_peek(&sk->write_queue)) && (skb != tp->send_head)) {
+		struct tcp_skb_cb *scb = TCP_SKB_CB(skb); 
+		__u8 sacked = scb->sacked;
+		
+		/* If our packet is before the ack sequence we can
+		 * discard it as it's confirmed to have arrived at
+		 * the other end.
+		 */
+		if (after(scb->end_seq, ack))
+			break;
+
+		/* Initial outgoing SYN's get put onto the write_queue
+		 * just like anything else we transmit.  It is not
+		 * true data, and if we misinform our callers that
+		 * this ACK acks real data, we will erroneously exit
+		 * connection startup slow start one packet too
+		 * quickly.  This is severely frowned upon behavior.
+		 */
+		if((sacked & TCPCB_SACKED_RETRANS) && tp->retrans_out)
+			tp->retrans_out--;
+		if(!(scb->flags & TCPCB_FLAG_SYN)) {
+			acked |= FLAG_DATA_ACKED;
+			if(sacked & TCPCB_SACKED_RETRANS)
+				acked |= FLAG_RETRANS_DATA_ACKED;
+			if(tp->fackets_out)
+				tp->fackets_out--;
+		} else {
+			/* This is pure paranoia. */
+			tp->retrans_head = NULL;
+		}		
+		tp->packets_out--;
+		*seq = scb->seq;
+		*seq_rtt = now - scb->when;
+		__skb_unlink(skb, skb->list);
+		kfree_skb(skb);
+	}
+	return acked;
+}
+
+static void tcp_ack_probe(struct sock *sk, __u32 ack)
+{
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	
+	/* Our probe was answered. */
+	tp->probes_out = 0;
+	
+	/* Was it a usable window open? */
+
+	/* should always be non-null */
+	if (tp->send_head != NULL &&
+	    !before (ack + tp->snd_wnd, TCP_SKB_CB(tp->send_head)->end_seq)) {
+		tp->backoff = 0;
+		tp->pending = 0;
+		tcp_clear_xmit_timer(sk, TIME_PROBE0);
+	} else {
+		tcp_reset_xmit_timer(sk, TIME_PROBE0,
+				     min(tp->rto << tp->backoff, 120*HZ));
+	}
+}
+ 
+/* Should we open up the congestion window? */
+static __inline__ int should_advance_cwnd(struct tcp_opt *tp, int flag)
+{
+	/* Data must have been acked. */
+	if ((flag & FLAG_DATA_ACKED) == 0)
+		return 0;
+
+	/* Some of the data acked was retransmitted somehow? */
+	if ((flag & FLAG_RETRANS_DATA_ACKED) != 0) {
+		/* We advance in all cases except during
+		 * non-FACK fast retransmit/recovery.
+		 */
+		if (tp->fackets_out != 0 ||
+		    tp->retransmits != 0)
+			return 1;
+
+		/* Non-FACK fast retransmit does it's own
+		 * congestion window management, don't get
+		 * in the way.
+		 */
+		return 0;
+	}
+
+	/* New non-retransmitted data acked, always advance.  */
+	return 1;
+}
+
+/* Read draft-ietf-tcplw-high-performance before mucking
+ * with this code. (Superceeds RFC1323)
+ */
+static void tcp_ack_saw_tstamp(struct sock *sk, struct tcp_opt *tp,
+			       u32 seq, u32 ack, int flag)
+{
+	__u32 seq_rtt;
+
+	/* RTTM Rule: A TSecr value received in a segment is used to
+	 * update the averaged RTT measurement only if the segment
+	 * acknowledges some new data, i.e., only if it advances the
+	 * left edge of the send window.
+	 *
+	 * See draft-ietf-tcplw-high-performance-00, section 3.3.
+	 * 1998/04/10 Andrey V. Savochkin <saw@msu.ru>
+	 */
+	if (!(flag & FLAG_DATA_ACKED))
+		return;
+
+	seq_rtt = tcp_time_stamp - tp->rcv_tsecr;
+	tcp_rtt_estimator(tp, seq_rtt);
+	if (tp->retransmits) {
+		if (tp->packets_out == 0) {
+			tp->retransmits = 0;
+			tp->fackets_out = 0;
+			tp->retrans_out = 0;
+			tp->backoff = 0;
+			tcp_set_rto(tp);
+		} else {
+			/* Still retransmitting, use backoff */
+			tcp_set_rto(tp);
+			tp->rto = tp->rto << tp->backoff;
+		}
+	} else {
+		tcp_set_rto(tp);
+	}
+
+	tcp_bound_rto(tp);
+}
+
+static __inline__ void tcp_ack_packets_out(struct sock *sk, struct tcp_opt *tp)
+{
+	struct sk_buff *skb = skb_peek(&sk->write_queue);
+
+	/* Some data was ACK'd, if still retransmitting (due to a
+	 * timeout), resend more of the retransmit queue.  The
+	 * congestion window is handled properly by that code.
+	 */
+	if (tp->retransmits) {
+		tcp_xmit_retransmit_queue(sk);
+		tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
+	} else {
+		__u32 when = tp->rto - (tcp_time_stamp - TCP_SKB_CB(skb)->when);
+		if ((__s32)when < 0)
+			when = 1;
+		tcp_reset_xmit_timer(sk, TIME_RETRANS, when);
+	}
+}
+
+/* This routine deals with incoming acks, but not outgoing ones. */
+static int tcp_ack(struct sock *sk, struct tcphdr *th, 
+		   u32 ack_seq, u32 ack, int len)
+{
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	int flag = 0;
+	u32 seq = 0;
+	u32 seq_rtt = 0;
+
+	if(sk->zapped)
+		return(1);	/* Dead, can't ack any more so why bother */
+
+	if (tp->pending == TIME_KEEPOPEN)
+	  	tp->probes_out = 0;
+
+	tp->rcv_tstamp = tcp_time_stamp;
+
+	/* If the ack is newer than sent or older than previous acks
+	 * then we can probably ignore it.
+	 */
+	if (after(ack, tp->snd_nxt) || before(ack, tp->snd_una))
+		goto uninteresting_ack;
+
+	/* If there is data set flag 1 */
+	if (len != th->doff*4) {
+		flag |= FLAG_DATA;
+		tcp_delack_estimator(tp);
+	}
+
+	/* Update our send window. */
+
+	/* This is the window update code as per RFC 793
+	 * snd_wl{1,2} are used to prevent unordered
+	 * segments from shrinking the window 
+	 */
+	if (before(tp->snd_wl1, ack_seq) ||
+	    (tp->snd_wl1 == ack_seq && !after(tp->snd_wl2, ack))) {
+		u32 nwin = ntohs(th->window) << tp->snd_wscale;
+
+		if ((tp->snd_wl2 != ack) || (nwin > tp->snd_wnd)) {
+			flag |= FLAG_WIN_UPDATE;
+			tp->snd_wnd = nwin;
+
+			tp->snd_wl1 = ack_seq;
+			tp->snd_wl2 = ack;
+
+			if (nwin > tp->max_window)
+				tp->max_window = nwin;
+		}
+	}
+
+	/* We passed data and got it acked, remove any soft error
+	 * log. Something worked...
+	 */
+	sk->err_soft = 0;
+
+	/* If this ack opens up a zero window, clear backoff.  It was
+	 * being used to time the probes, and is probably far higher than
+	 * it needs to be for normal retransmission.
+	 */
+	if (tp->pending == TIME_PROBE0)
+		tcp_ack_probe(sk, ack);
+
+	/* See if we can take anything off of the retransmit queue. */
+	flag |= tcp_clean_rtx_queue(sk, ack, &seq, &seq_rtt);
+
+	/* We must do this here, before code below clears out important
+	 * state contained in tp->fackets_out and tp->retransmits.  -DaveM
+	 */
+	if (should_advance_cwnd(tp, flag))
+		tcp_cong_avoid(tp);
+
+	/* If we have a timestamp, we always do rtt estimates. */
+	if (tp->saw_tstamp) {
+		tcp_ack_saw_tstamp(sk, tp, seq, ack, flag);
+	} else {
+		/* If we were retransmiting don't count rtt estimate. */
+		if (tp->retransmits) {
+			if (tp->packets_out == 0) {
+				tp->retransmits = 0;
+				tp->fackets_out = 0;
+				tp->retrans_out = 0;
+			}
+		} else {
+			/* We don't have a timestamp. Can only use
+			 * packets that are not retransmitted to determine
+			 * rtt estimates. Also, we must not reset the
+			 * backoff for rto until we get a non-retransmitted
+			 * packet. This allows us to deal with a situation
+			 * where the network delay has increased suddenly.
+			 * I.e. Karn's algorithm. (SIGCOMM '87, p5.)
+			 */
+			if (flag & FLAG_DATA_ACKED) {
+				if(!(flag & FLAG_RETRANS_DATA_ACKED)) {
+					tp->backoff = 0;
+					tcp_rtt_estimator(tp, seq_rtt);
+					tcp_set_rto(tp);
+					tcp_bound_rto(tp);
+				}
+			}
+		}
+	}
+
+	if (tp->packets_out) {
+		if (flag & FLAG_DATA_ACKED)
+			tcp_ack_packets_out(sk, tp);
+	} else {
+		tcp_clear_xmit_timer(sk, TIME_RETRANS);
+	}
+
+	flag &= (FLAG_DATA | FLAG_WIN_UPDATE);
+	if ((ack == tp->snd_una	&& tp->packets_out && flag == 0) ||
+	    (tp->high_seq != 0)) {
+		tcp_fast_retrans(sk, ack, flag);
+	} else {
+		/* Clear any aborted fast retransmit starts. */
+		tp->dup_acks = 0;
+	}
+	/* It is not a brain fart, I thought a bit now. 8)
+	 *
+	 * Forward progress is indicated, if:
+	 *   1. the ack acknowledges new data.
+	 *   2. or the ack is duplicate, but it is caused by new segment
+	 *      arrival. This case is filtered by:
+	 *      - it contains no data, syn or fin.
+	 *      - it does not update window.
+	 *   3. or new SACK. It is difficult to check, so that we ignore it.
+	 *
+	 * Forward progress is also indicated by arrival new data,
+	 * which was caused by window open from our side. This case is more
+	 * difficult and it is made (alas, incorrectly) in tcp_data_queue().
+	 *                                              --ANK (990513)
+	 */
+	if (ack != tp->snd_una || (flag == 0 && !th->fin))
+		dst_confirm(sk->dst_cache);
+
+	/* Remember the highest ack received. */
+	tp->snd_una = ack;
+	return 1;
+
+uninteresting_ack:
+	SOCK_DEBUG(sk, "Ack ignored %u %u\n", ack, tp->snd_nxt);
+	return 0;
+}
+
+/* New-style handling of TIME_WAIT sockets. */
+extern void tcp_tw_schedule(struct tcp_tw_bucket *tw);
+extern void tcp_tw_reschedule(struct tcp_tw_bucket *tw);
+extern void tcp_tw_deschedule(struct tcp_tw_bucket *tw);
+
+void tcp_timewait_kill(struct tcp_tw_bucket *tw)
+{
+	struct tcp_bind_bucket *tb = tw->tb;
+
+	/* Disassociate with bind bucket. */
+	if(tw->bind_next)
+		tw->bind_next->bind_pprev = tw->bind_pprev;
+	*(tw->bind_pprev) = tw->bind_next;
+	if (tb->owners == NULL) {
+		if (tb->next)
+			tb->next->pprev = tb->pprev;
+		*(tb->pprev) = tb->next;
+		kmem_cache_free(tcp_bucket_cachep, tb);
+	}
+
+	/* Unlink from established hashes. */
+	if(tw->next)
+		tw->next->pprev = tw->pprev;
+	*tw->pprev = tw->next;
+
+	/* We decremented the prot->inuse count when we entered TIME_WAIT
+	 * and the sock from which this came was destroyed.
+	 */
+	tw->sklist_next->sklist_prev = tw->sklist_prev;
+	tw->sklist_prev->sklist_next = tw->sklist_next;
+
+	/* Ok, now free it up. */
+	kmem_cache_free(tcp_timewait_cachep, tw);
+}
+
+/* We come here as a special case from the AF specific TCP input processing,
+ * and the SKB has no owner.  Essentially handling this is very simple,
+ * we just keep silently eating rx'd packets, acking them if necessary,
+ * until none show up for the entire timeout period. 
+ *
+ * Return 0, TCP_TW_ACK, TCP_TW_RST
+ */
+enum tcp_tw_status 
+tcp_timewait_state_process(struct tcp_tw_bucket *tw, struct sk_buff *skb,
+			       struct tcphdr *th, unsigned len)
+{
+	/*	RFC 1122:
+	 *	"When a connection is [...] on TIME-WAIT state [...]
+	 *	[a TCP] MAY accept a new SYN from the remote TCP to
+	 *	reopen the connection directly, if it:
+	 *	
+	 *	(1)  assigns its initial sequence number for the new
+	 *	connection to be larger than the largest sequence
+	 *	number it used on the previous connection incarnation,
+	 *	and
+	 *
+	 *	(2)  returns to TIME-WAIT state if the SYN turns out 
+	 *	to be an old duplicate".
+	 */
+	if(th->syn && !th->rst && after(TCP_SKB_CB(skb)->seq, tw->rcv_nxt)) {
+		struct sock *sk;
+		struct tcp_func *af_specific = tw->af_specific;
+		__u32 isn;
+
+		isn = tw->snd_nxt + 128000;
+		if(isn == 0)
+			isn++;
+		tcp_tw_deschedule(tw);
+		tcp_timewait_kill(tw);
+		sk = af_specific->get_sock(skb, th);
+		if(sk == NULL ||
+		   !ipsec_sk_policy(sk,skb) ||
+		   atomic_read(&sk->sock_readers) != 0)
+			return 0;
+		skb_set_owner_r(skb, sk);
+		af_specific = sk->tp_pinfo.af_tcp.af_specific;
+		if(af_specific->conn_request(sk, skb, isn) < 0)
+			return TCP_TW_RST; /* Toss a reset back. */
+		return 0; /* Discard the frame. */
+	}
+
+	/* Check RST or SYN */
+	if(th->rst || th->syn) {
+		/* This is TIME_WAIT assasination, in two flavors.
+		 * Oh well... nobody has a sufficient solution to this
+		 * protocol bug yet.
+		 */
+		if(sysctl_tcp_rfc1337 == 0) {
+			tcp_tw_deschedule(tw);
+			tcp_timewait_kill(tw);
+		}
+		if(!th->rst)
+			return TCP_TW_RST; /* toss a reset back */
+		return 0;
+	} else {
+		/* In this case we must reset the TIMEWAIT timer. */
+		if(th->ack)
+			tcp_tw_reschedule(tw);
+	}
+	/* Ack old packets if necessary */ 
+	if (!after(TCP_SKB_CB(skb)->end_seq, tw->rcv_nxt) &&
+	    (th->doff * 4) > len)
+		return TCP_TW_ACK; 
+	return 0; 
+}
+
+/* Enter the time wait state.  This is always called from BH
+ * context.  Essentially we whip up a timewait bucket, copy the
+ * relevant info into it from the SK, and mess with hash chains
+ * and list linkage.
+ */
+static __inline__ void tcp_tw_hashdance(struct sock *sk, struct tcp_tw_bucket *tw)
+{
+	struct sock **head, *sktw;
+
+	/* Step 1: Remove SK from established hash. */
+	if(sk->next)
+		sk->next->pprev = sk->pprev;
+	*sk->pprev = sk->next;
+	sk->pprev = NULL;
+	tcp_reg_zap(sk);
+
+	/* Step 2: Put TW into bind hash where SK was. */
+	tw->tb = (struct tcp_bind_bucket *)sk->prev;
+	if((tw->bind_next = sk->bind_next) != NULL)
+		sk->bind_next->bind_pprev = &tw->bind_next;
+	tw->bind_pprev = sk->bind_pprev;
+	*sk->bind_pprev = (struct sock *)tw;
+	sk->prev = NULL;
+
+	/* Step 3: Same for the protocol sklist. */
+	(tw->sklist_next = sk->sklist_next)->sklist_prev = (struct sock *)tw;
+	(tw->sklist_prev = sk->sklist_prev)->sklist_next = (struct sock *)tw;
+	sk->sklist_next = NULL;
+	sk->prot->inuse--;
+
+	/* Step 4: Hash TW into TIMEWAIT half of established hash table. */
+	head = &tcp_established_hash[sk->hashent + (TCP_HTABLE_SIZE/2)];
+	sktw = (struct sock *)tw;
+	if((sktw->next = *head) != NULL)
+		(*head)->pprev = &sktw->next;
+	*head = sktw;
+	sktw->pprev = head;
+}
+
+void tcp_time_wait(struct sock *sk)
+{
+	struct tcp_tw_bucket *tw;
+
+	tw = kmem_cache_alloc(tcp_timewait_cachep, SLAB_ATOMIC);
+	if(tw != NULL) {
+		/* Give us an identity. */
+		tw->daddr	= sk->daddr;
+		tw->rcv_saddr	= sk->rcv_saddr;
+		tw->bound_dev_if= sk->bound_dev_if;
+		tw->num		= sk->num;
+		tw->state	= TCP_TIME_WAIT;
+		tw->sport	= sk->sport;
+		tw->dport	= sk->dport;
+		tw->family	= sk->family;
+		tw->reuse	= sk->reuse;
+		tw->rcv_nxt	= sk->tp_pinfo.af_tcp.rcv_nxt;
+		tw->snd_nxt     = sk->tp_pinfo.af_tcp.snd_nxt;
+		tw->window	= tcp_select_window(sk);
+		tw->af_specific	= sk->tp_pinfo.af_tcp.af_specific;
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+		if(tw->family == PF_INET6) {
+			memcpy(&tw->v6_daddr,
+			       &sk->net_pinfo.af_inet6.daddr,
+			       sizeof(struct in6_addr));
+			memcpy(&tw->v6_rcv_saddr,
+			       &sk->net_pinfo.af_inet6.rcv_saddr,
+			       sizeof(struct in6_addr));
+		}
+#endif
+		/* Linkage updates. */
+		tcp_tw_hashdance(sk, tw);
+
+		/* Get the TIME_WAIT timeout firing. */
+		tcp_tw_schedule(tw);
+
+		/* CLOSE the SK. */
+		if(sk->state == TCP_ESTABLISHED)
+			tcp_statistics.TcpCurrEstab--;
+		sk->state = TCP_CLOSE;
+		net_reset_timer(sk, TIME_DONE,
+				min(sk->tp_pinfo.af_tcp.srtt * 2, TCP_DONE_TIME));
+	} else {
+		/* Sorry, we're out of memory, just CLOSE this
+		 * socket up.  We've got bigger problems than
+		 * non-graceful socket closings.
+		 */
+		tcp_set_state(sk, TCP_CLOSE);
+	}
+
+	/* Prevent rcvmsg/sndmsg calls, and wake people up. */
+	sk->shutdown = SHUTDOWN_MASK;
+	if(!sk->dead)
+		sk->state_change(sk);
+}
+
+/*
+ * 	Process the FIN bit. This now behaves as it is supposed to work
+ *	and the FIN takes effect when it is validly part of sequence
+ *	space. Not before when we get holes.
+ *
+ *	If we are ESTABLISHED, a received fin moves us to CLOSE-WAIT
+ *	(and thence onto LAST-ACK and finally, CLOSE, we never enter
+ *	TIME-WAIT)
+ *
+ *	If we are in FINWAIT-1, a received FIN indicates simultaneous
+ *	close and we go into CLOSING (and later onto TIME-WAIT)
+ *
+ *	If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT.
+ */
+ 
+static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
+{
+	sk->tp_pinfo.af_tcp.fin_seq = TCP_SKB_CB(skb)->end_seq;
+
+	tcp_send_ack(sk);
+
+	if (!sk->dead) {
+		sk->state_change(sk);
+		sock_wake_async(sk->socket, 1);
+	}
+
+	switch(sk->state) {
+		case TCP_SYN_RECV:
+		case TCP_ESTABLISHED:
+			/* Move to CLOSE_WAIT */
+			tcp_set_state(sk, TCP_CLOSE_WAIT);
+			if (th->rst)
+				sk->shutdown = SHUTDOWN_MASK;
+			break;
+
+		case TCP_CLOSE_WAIT:
+		case TCP_CLOSING:
+			/* Received a retransmission of the FIN, do
+			 * nothing.
+			 */
+			break;
+		case TCP_LAST_ACK:
+			/* RFC793: Remain in the LAST-ACK state. */
+			break;
+
+		case TCP_FIN_WAIT1:
+			/* This case occurs when a simultaneous close
+			 * happens, we must ack the received FIN and
+			 * enter the CLOSING state.
+			 *
+			 * This causes a WRITE timeout, which will either
+			 * move on to TIME_WAIT when we timeout, or resend
+			 * the FIN properly (maybe we get rid of that annoying
+			 * FIN lost hang). The TIME_WRITE code is already 
+			 * correct for handling this timeout.
+			 */
+			tcp_set_state(sk, TCP_CLOSING);
+			break;
+		case TCP_FIN_WAIT2:
+			/* Received a FIN -- send ACK and enter TIME_WAIT. */
+			tcp_time_wait(sk);
+			break;
+		default:
+			/* Only TCP_LISTEN and TCP_CLOSE are left, in these
+			 * cases we should never reach this piece of code.
+			 */
+			printk("tcp_fin: Impossible, sk->state=%d\n", sk->state);
+			break;
+	};
+}
+
+/* These routines update the SACK block as out-of-order packets arrive or
+ * in-order packets close up the sequence space.
+ */
+static void tcp_sack_maybe_coalesce(struct tcp_opt *tp, struct tcp_sack_block *sp)
+{
+	int this_sack, num_sacks = tp->num_sacks;
+	struct tcp_sack_block *swalk = &tp->selective_acks[0];
+
+	/* If more than one SACK block, see if the recent change to SP eats into
+	 * or hits the sequence space of other SACK blocks, if so coalesce.
+	 */
+	if(num_sacks != 1) {
+		for(this_sack = 0; this_sack < num_sacks; this_sack++, swalk++) {
+			if(swalk == sp)
+				continue;
+
+			/* First case, bottom of SP moves into top of the
+			 * sequence space of SWALK.
+			 */
+			if(between(sp->start_seq, swalk->start_seq, swalk->end_seq)) {
+				sp->start_seq = swalk->start_seq;
+				goto coalesce;
+			}
+			/* Second case, top of SP moves into bottom of the
+			 * sequence space of SWALK.
+			 */
+			if(between(sp->end_seq, swalk->start_seq, swalk->end_seq)) {
+				sp->end_seq = swalk->end_seq;
+				goto coalesce;
+			}
+		}
+	}
+	/* SP is the only SACK, or no coalescing cases found. */
+	return;
+
+coalesce:
+	/* Zap SWALK, by moving every further SACK up by one slot.
+	 * Decrease num_sacks.
+	 */
+	for(; this_sack < num_sacks-1; this_sack++, swalk++) {
+		struct tcp_sack_block *next = (swalk + 1);
+		swalk->start_seq = next->start_seq;
+		swalk->end_seq = next->end_seq;
+	}
+	tp->num_sacks--;
+}
+
+static __inline__ void tcp_sack_swap(struct tcp_sack_block *sack1, struct tcp_sack_block *sack2)
+{
+	__u32 tmp;
+
+	tmp = sack1->start_seq;
+	sack1->start_seq = sack2->start_seq;
+	sack2->start_seq = tmp;
+
+	tmp = sack1->end_seq;
+	sack1->end_seq = sack2->end_seq;
+	sack2->end_seq = tmp;
+}
+
+static void tcp_sack_new_ofo_skb(struct sock *sk, struct sk_buff *skb)
+{
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	struct tcp_sack_block *sp = &tp->selective_acks[0];
+	int cur_sacks = tp->num_sacks;
+
+	if (!cur_sacks)
+		goto new_sack;
+
+	/* Optimize for the common case, new ofo frames arrive
+	 * "in order". ;-)  This also satisfies the requirements
+	 * of RFC2018 about ordering of SACKs.
+	 */
+	if(sp->end_seq == TCP_SKB_CB(skb)->seq) {
+		sp->end_seq = TCP_SKB_CB(skb)->end_seq;
+		tcp_sack_maybe_coalesce(tp, sp);
+	} else if(sp->start_seq == TCP_SKB_CB(skb)->end_seq) {
+		/* Re-ordered arrival, in this case, can be optimized
+		 * as well.
+		 */
+		sp->start_seq = TCP_SKB_CB(skb)->seq;
+		tcp_sack_maybe_coalesce(tp, sp);
+	} else {
+		struct tcp_sack_block *swap = sp + 1;
+		int this_sack, max_sacks = (tp->tstamp_ok ? 3 : 4);
+
+		/* Oh well, we have to move things around.
+		 * Try to find a SACK we can tack this onto.
+		 */
+
+		for(this_sack = 1; this_sack < cur_sacks; this_sack++, swap++) {
+			if((swap->end_seq == TCP_SKB_CB(skb)->seq) ||
+			   (swap->start_seq == TCP_SKB_CB(skb)->end_seq)) {
+				if(swap->end_seq == TCP_SKB_CB(skb)->seq)
+					swap->end_seq = TCP_SKB_CB(skb)->end_seq;
+				else
+					swap->start_seq = TCP_SKB_CB(skb)->seq;
+				tcp_sack_swap(sp, swap);
+				tcp_sack_maybe_coalesce(tp, sp);
+				return;
+			}
+		}
+
+		/* Could not find an adjacent existing SACK, build a new one,
+		 * put it at the front, and shift everyone else down.  We
+		 * always know there is at least one SACK present already here.
+		 *
+		 * If the sack array is full, forget about the last one.
+		 */
+		if (cur_sacks >= max_sacks) {
+			cur_sacks--;
+			tp->num_sacks--;
+		}
+		while(cur_sacks >= 1) {
+			struct tcp_sack_block *this = &tp->selective_acks[cur_sacks];
+			struct tcp_sack_block *prev = (this - 1);
+			this->start_seq = prev->start_seq;
+			this->end_seq = prev->end_seq;
+			cur_sacks--;
+		}
+
+	new_sack:
+		/* Build the new head SACK, and we're done. */
+		sp->start_seq = TCP_SKB_CB(skb)->seq;
+		sp->end_seq = TCP_SKB_CB(skb)->end_seq;
+		tp->num_sacks++;
+	}
+}
+
+static void tcp_sack_remove_skb(struct tcp_opt *tp, struct sk_buff *skb)
+{
+	struct tcp_sack_block *sp = &tp->selective_acks[0];
+	int num_sacks = tp->num_sacks;
+	int this_sack;
+
+	/* This is an in order data segment _or_ an out-of-order SKB being
+	 * moved to the receive queue, so we know this removed SKB will eat
+	 * from the front of a SACK.
+	 */
+	for(this_sack = 0; this_sack < num_sacks; this_sack++, sp++) {
+		/* Check if the start of the sack is covered by skb. */
+		if(!before(sp->start_seq, TCP_SKB_CB(skb)->seq) &&
+		   before(sp->start_seq, TCP_SKB_CB(skb)->end_seq))
+			break;
+	}
+
+	/* This should only happen if so many SACKs get built that some get
+	 * pushed out before we get here, or we eat some in sequence packets
+	 * which are before the first SACK block.
+	 */
+	if(this_sack >= num_sacks)
+		return;
+
+	sp->start_seq = TCP_SKB_CB(skb)->end_seq;
+	if(!before(sp->start_seq, sp->end_seq)) {
+		/* Zap this SACK, by moving forward any other SACKS. */
+		for(this_sack += 1; this_sack < num_sacks; this_sack++, sp++) {
+			struct tcp_sack_block *next = (sp + 1);
+			sp->start_seq = next->start_seq;
+			sp->end_seq = next->end_seq;
+		}
+		tp->num_sacks--;
+	}
+}
+
+static void tcp_sack_extend(struct tcp_opt *tp, struct sk_buff *old_skb, struct sk_buff *new_skb)
+{
+	struct tcp_sack_block *sp = &tp->selective_acks[0];
+	int num_sacks = tp->num_sacks;
+	int this_sack;
+
+	for(this_sack = 0; this_sack < num_sacks; this_sack++, sp++) {
+		if(sp->end_seq == TCP_SKB_CB(old_skb)->end_seq)
+			break;
+	}
+	if(this_sack >= num_sacks)
+		return;
+	sp->end_seq = TCP_SKB_CB(new_skb)->end_seq;
+}
+
+/* This one checks to see if we can put data from the
+ * out_of_order queue into the receive_queue.
+ */
+static void tcp_ofo_queue(struct sock *sk)
+{
+	struct sk_buff *skb;
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+	while ((skb = skb_peek(&tp->out_of_order_queue))) {
+		if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
+			break;
+
+		if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
+			SOCK_DEBUG(sk, "ofo packet was already received \n");
+			__skb_unlink(skb, skb->list);
+			kfree_skb(skb);
+			continue;
+		}
+		SOCK_DEBUG(sk, "ofo requeuing : rcv_next %X seq %X - %X\n",
+			   tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
+			   TCP_SKB_CB(skb)->end_seq);
+
+		if(tp->sack_ok)
+			tcp_sack_remove_skb(tp, skb);
+		__skb_unlink(skb, skb->list);
+		__skb_queue_tail(&sk->receive_queue, skb);
+		tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+		if(skb->h.th->fin)
+			tcp_fin(skb, sk, skb->h.th);
+	}
+}
+
+static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
+{
+	struct sk_buff *skb1;
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+	/*  Queue data for delivery to the user.
+	 *  Packets in sequence go to the receive queue.
+	 *  Out of sequence packets to the out_of_order_queue.
+	 */
+	if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
+		/* Ok. In sequence. */
+	queue_and_out:
+		dst_confirm(sk->dst_cache);
+		__skb_queue_tail(&sk->receive_queue, skb);
+		tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+		if(skb->h.th->fin) {
+			tcp_fin(skb, sk, skb->h.th);
+		} else {
+			tcp_remember_ack(tp, skb->h.th, skb); 
+		}
+		/* This may have eaten into a SACK block. */
+		if(tp->sack_ok && tp->num_sacks)
+			tcp_sack_remove_skb(tp, skb);
+		tcp_ofo_queue(sk);
+
+		/* Turn on fast path. */ 
+		if (skb_queue_len(&tp->out_of_order_queue) == 0)
+			tp->pred_flags = htonl(((tp->tcp_header_len >> 2) << 28) |
+					       (0x10 << 16) |
+					       tp->snd_wnd);
+		return;
+	}
+	
+	/* An old packet, either a retransmit or some packet got lost. */
+	if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
+		/* A retransmit, 2nd most common case.  Force an imediate ack. */
+		SOCK_DEBUG(sk, "retransmit received: seq %X\n", TCP_SKB_CB(skb)->seq);
+		tcp_enter_quickack_mode(tp);
+		kfree_skb(skb);
+		return;
+	}
+
+	if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
+		/* Partial packet, seq < rcv_next < end_seq */
+		SOCK_DEBUG(sk, "partial packet: rcv_next %X seq %X - %X\n",
+			   tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
+			   TCP_SKB_CB(skb)->end_seq);
+
+		goto queue_and_out;
+	}
+
+	/* Ok. This is an out_of_order segment, force an ack. */
+	tp->delayed_acks++;
+	tcp_enter_quickack_mode(tp);
+
+	/* Disable header prediction. */
+	tp->pred_flags = 0;
+
+	SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
+		   tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
+
+	if (skb_peek(&tp->out_of_order_queue) == NULL) {
+		/* Initial out of order segment, build 1 SACK. */
+		if(tp->sack_ok) {
+			tp->num_sacks = 1;
+			tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq;
+			tp->selective_acks[0].end_seq = TCP_SKB_CB(skb)->end_seq;
+		}
+		__skb_queue_head(&tp->out_of_order_queue,skb);
+	} else {
+		for(skb1=tp->out_of_order_queue.prev; ; skb1 = skb1->prev) {
+			/* Already there. */
+			if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb1)->seq) {
+				if (skb->len >= skb1->len) {
+					if(tp->sack_ok)
+						tcp_sack_extend(tp, skb1, skb);
+					__skb_append(skb1, skb);
+					__skb_unlink(skb1, skb1->list);
+					kfree_skb(skb1);
+				} else {
+					/* A duplicate, smaller than what is in the
+					 * out-of-order queue right now, toss it.
+					 */
+					kfree_skb(skb);
+				}
+				break;
+			}
+			
+			if (after(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb1)->seq)) {
+				__skb_append(skb1, skb);
+				if(tp->sack_ok)
+					tcp_sack_new_ofo_skb(sk, skb);
+				break;
+			}
+
+                        /* See if we've hit the start. If so insert. */
+			if (skb1 == skb_peek(&tp->out_of_order_queue)) {
+				__skb_queue_head(&tp->out_of_order_queue,skb);
+				if(tp->sack_ok)
+					tcp_sack_new_ofo_skb(sk, skb);
+				break;
+			}
+		}
+	}
+}
+
+
+/*
+ *	This routine handles the data.  If there is room in the buffer,
+ *	it will be have already been moved into it.  If there is no
+ *	room, then we will just have to discard the packet.
+ */
+
+static int tcp_data(struct sk_buff *skb, struct sock *sk, unsigned int len)
+{
+	struct tcphdr *th;
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+	th = skb->h.th;
+	skb_pull(skb, th->doff*4);
+	skb_trim(skb, len - (th->doff*4));
+
+        if (skb->len == 0 && !th->fin)
+		return(0);
+
+	/* 
+	 *	If our receive queue has grown past its limits shrink it.
+	 *	Make sure to do this before moving snd_nxt, otherwise
+	 *	data might be acked for that we don't have enough room.
+	 */
+	if (atomic_read(&sk->rmem_alloc) > sk->rcvbuf) { 
+		if (prune_queue(sk) < 0) { 
+			/* Still not enough room. That can happen when
+			 * skb->true_size differs significantly from skb->len.
+			 */
+			return 0;
+		}
+	}
+
+	tcp_data_queue(sk, skb);
+
+	if (before(tp->rcv_nxt, tp->copied_seq)) {
+		printk(KERN_DEBUG "*** tcp.c:tcp_data bug acked < copied\n");
+		tp->rcv_nxt = tp->copied_seq;
+	}
+
+	/* Above, tcp_data_queue() increments delayed_acks appropriately.
+	 * Now tell the user we may have some data.
+	 */
+	if (!sk->dead) {
+		sk->data_ready(sk,0);
+	}
+	return(1);
+}
+
+static void __tcp_data_snd_check(struct sock *sk, struct sk_buff *skb)
+{
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+	if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una + tp->snd_wnd) &&
+	    tcp_packets_in_flight(tp) < tp->snd_cwnd) {
+		/* Put more data onto the wire. */
+		tcp_write_xmit(sk);
+	} else if (tp->packets_out == 0 && !tp->pending) {
+		/* Start probing the receivers window. */
+		tcp_reset_xmit_timer(sk, TIME_PROBE0, tp->rto);
+	}
+}
+
+static __inline__ void tcp_data_snd_check(struct sock *sk)
+{
+	struct sk_buff *skb = sk->tp_pinfo.af_tcp.send_head;
+
+	if (skb != NULL)
+		__tcp_data_snd_check(sk, skb); 
+}
+
+/* 
+ * Adapt the MSS value used to make delayed ack decision to the 
+ * real world. 
+ */ 
+static __inline__ void tcp_measure_rcv_mss(struct sock *sk, struct sk_buff *skb)
+{
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	unsigned int len = skb->len, lss; 
+
+	if (len > tp->rcv_mss) 
+		tp->rcv_mss = len; 
+	lss = tp->last_seg_size; 
+	tp->last_seg_size = 0; 
+	if (len >= 536) {
+		if (len == lss) 
+			tp->rcv_mss = len; 
+		tp->last_seg_size = len; 
+	}
+}
+
+/*
+ * Check if sending an ack is needed.
+ */
+static __inline__ void __tcp_ack_snd_check(struct sock *sk)
+{
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+	/* This also takes care of updating the window.
+	 * This if statement needs to be simplified.
+	 *
+	 * Rules for delaying an ack:
+	 *      - delay time <= 0.5 HZ
+	 *      - we don't have a window update to send
+	 *      - must send at least every 2 full sized packets
+	 *	- must send an ACK if we have any out of order data
+	 *
+	 * With an extra heuristic to handle loss of packet
+	 * situations and also helping the sender leave slow
+	 * start in an expediant manner.
+	 */
+
+	    /* Two full frames received or... */
+	if (((tp->rcv_nxt - tp->rcv_wup) >= tp->rcv_mss * MAX_DELAY_ACK) ||
+	    /* We will update the window "significantly" or... */
+	    tcp_raise_window(sk) ||
+	    /* We entered "quick ACK" mode or... */
+	    tcp_in_quickack_mode(tp) ||
+	    /* We have out of order data */
+	    (skb_peek(&tp->out_of_order_queue) != NULL)) {
+		/* Then ack it now */
+		tcp_send_ack(sk);
+	} else {
+		/* Else, send delayed ack. */
+		tcp_send_delayed_ack(tp, HZ/2);
+	}
+}
+
+static __inline__ void tcp_ack_snd_check(struct sock *sk)
+{
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	if (tp->delayed_acks == 0) {
+		/* We sent a data segment already. */
+		return;
+	}
+	__tcp_ack_snd_check(sk);
+}
+
+
+/*
+ *	This routine is only called when we have urgent data
+ *	signalled. Its the 'slow' part of tcp_urg. It could be
+ *	moved inline now as tcp_urg is only called from one
+ *	place. We handle URGent data wrong. We have to - as
+ *	BSD still doesn't use the correction from RFC961.
+ *	For 1003.1g we should support a new option TCP_STDURG to permit
+ *	either form (or just set the sysctl tcp_stdurg).
+ */
+ 
+static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
+{
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	u32 ptr = ntohs(th->urg_ptr);
+
+	if (ptr && !sysctl_tcp_stdurg)
+		ptr--;
+	ptr += ntohl(th->seq);
+
+	/* Ignore urgent data that we've already seen and read. */
+	if (after(tp->copied_seq, ptr))
+		return;
+
+	/* Do we already have a newer (or duplicate) urgent pointer? */
+	if (tp->urg_data && !after(ptr, tp->urg_seq))
+		return;
+
+	/* Tell the world about our new urgent pointer. */
+	if (sk->proc != 0) {
+		if (sk->proc > 0)
+			kill_proc(sk->proc, SIGURG, 1);
+		else
+			kill_pg(-sk->proc, SIGURG, 1);
+	}
+
+	/* We may be adding urgent data when the last byte read was
+	 * urgent. To do this requires some care. We cannot just ignore
+	 * tp->copied_seq since we would read the last urgent byte again
+	 * as data, nor can we alter copied_seq until this data arrives
+	 * or we break the sematics of SIOCATMARK (and thus sockatmark())
+	 */
+	if (tp->urg_seq == tp->copied_seq)
+		tp->copied_seq++;	/* Move the copied sequence on correctly */
+	tp->urg_data = URG_NOTYET;
+	tp->urg_seq = ptr;
+
+	/* Disable header prediction. */
+	tp->pred_flags = 0;
+}
+
+/* This is the 'fast' part of urgent handling. */
+static inline void tcp_urg(struct sock *sk, struct tcphdr *th, unsigned long len)
+{
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+	/* Check if we get a new urgent pointer - normally not. */
+	if (th->urg)
+		tcp_check_urg(sk,th);
+
+	/* Do we wait for any urgent data? - normally not... */
+	if (tp->urg_data == URG_NOTYET) {
+		u32 ptr = tp->urg_seq - ntohl(th->seq) + (th->doff*4);
+
+		/* Is the urgent pointer pointing into this packet? */	 
+		if (ptr < len) {
+			tp->urg_data = URG_VALID | *(ptr + (unsigned char *) th);
+			if (!sk->dead)
+				sk->data_ready(sk,0);
+		}
+	}
+}
+
+/* Clean the out_of_order queue if we can, trying to get
+ * the socket within its memory limits again.
+ *
+ * Return less than zero if we should start dropping frames
+ * until the socket owning process reads some of the data
+ * to stabilize the situation.
+ */
+static int prune_queue(struct sock *sk)
+{
+	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; 
+	struct sk_buff * skb;
+
+	SOCK_DEBUG(sk, "prune_queue: c=%x\n", tp->copied_seq);
+
+	net_statistics.PruneCalled++; 
+
+	/* First, purge the out_of_order queue. */
+	skb = __skb_dequeue_tail(&tp->out_of_order_queue);
+	if(skb != NULL) {
+		/* Free it all. */
+		do {	net_statistics.OfoPruned += skb->len; 
+			kfree_skb(skb);
+			skb = __skb_dequeue_tail(&tp->out_of_order_queue);
+		} while(skb != NULL);
+
+		/* Reset SACK state.  A conforming SACK implementation will
+		 * do the same at a timeout based retransmit.  When a connection
+		 * is in a sad state like this, we care only about integrity
+		 * of the connection not performance.
+		 */
+		if(tp->sack_ok)
+			tp->num_sacks = 0;
+	}
+	
+	/* If we are really being abused, tell the caller to silently
+	 * drop receive data on the floor.  It will get retransmitted
+	 * and hopefully then we'll have sufficient space.
+	 *
+	 * We used to try to purge the in-order packets too, but that
+	 * turns out to be deadly and fraught with races.  Consider:
+	 *
+	 * 1) If we acked the data, we absolutely cannot drop the
+	 *    packet.  This data would then never be retransmitted.
+	 * 2) It is possible, with a proper sequence of events involving
+	 *    delayed acks and backlog queue handling, to have the user
+	 *    read the data before it gets acked.  The previous code
+	 *    here got this wrong, and it lead to data corruption.
+	 * 3) Too much state changes happen when the FIN arrives, so once
+	 *    we've seen that we can't remove any in-order data safely.
+	 *
+	 * The net result is that removing in-order receive data is too
+	 * complex for anyones sanity.  So we don't do it anymore.  But
+	 * if we are really having our buffer space abused we stop accepting
+	 * new receive data.
+	 */
+	if(atomic_read(&sk->rmem_alloc) < (sk->rcvbuf << 1))
+		return 0;
+
+	/* Massive buffer overcommit. */
+	return -1;
+}
+
+/*
+ *	TCP receive function for the ESTABLISHED state. 
+ *
+ *	It is split into a fast path and a slow path. The fast path is 
+ * 	disabled when:
+ *	- A zero window was announced from us - zero window probing
+ *        is only handled properly in the slow path. 
+ *      - Out of order segments arrived.
+ *	- Urgent data is expected.
+ *	- There is no buffer space left
+ *	- Unexpected TCP flags/window values/header lengths are received
+ *	  (detected by checking the TCP header against pred_flags) 
+ *	- Data is sent in both directions. Fast path only supports pure senders
+ *	  or pure receivers (this means either the sequence number or the ack
+ *	  value must stay constant)
+ *
+ *	When these conditions are not satisfied it drops into a standard 
+ *	receive procedure patterned after RFC793 to handle all cases.
+ *	The first three cases are guaranteed by proper pred_flags setting,
+ *	the rest is checked inline. Fast processing is turned on in 
+ *	tcp_data_queue when everything is OK.
+ */
+int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
+			struct tcphdr *th, unsigned len)
+{
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	int queued;
+	u32 flg;
+
+	/*
+	 *	Header prediction.
+	 *	The code follows the one in the famous 
+	 *	"30 instruction TCP receive" Van Jacobson mail.
+	 *	
+	 *	Van's trick is to deposit buffers into socket queue 
+	 *	on a device interrupt, to call tcp_recv function
+	 *	on the receive process context and checksum and copy
+	 *	the buffer to user space. smart...
+	 *
+	 *	Our current scheme is not silly either but we take the 
+	 *	extra cost of the net_bh soft interrupt processing...
+	 *	We do checksum and copy also but from device to kernel.
+	 */
+
+	/*
+	 * RFC1323: H1. Apply PAWS check first.
+	 */
+	if (tcp_fast_parse_options(sk, th, tp)) {
+		if (tp->saw_tstamp) {
+			if (tcp_paws_discard(tp, th, len)) {
+				tcp_statistics.TcpInErrs++;
+				if (!th->rst) {
+					tcp_send_ack(sk);
+					goto discard;
+				}
+			}
+			tcp_replace_ts_recent(sk, tp,
+					      TCP_SKB_CB(skb)->seq,
+					      TCP_SKB_CB(skb)->end_seq);
+		}
+	}
+
+	flg = *(((u32 *)th) + 3) & ~htonl(0xFC8 << 16);
+
+	/*	pred_flags is 0xS?10 << 16 + snd_wnd
+	 *	if header_predition is to be made
+	 *	'S' will always be tp->tcp_header_len >> 2
+	 *	'?' will be 0 else it will be !0
+	 *	(when there are holes in the receive 
+	 *	 space for instance)
+	 *	PSH flag is ignored.
+         */
+
+	if (flg == tp->pred_flags && TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
+		if (len <= th->doff*4) {
+			/* Bulk data transfer: sender */
+			if (len == th->doff*4) {
+				tcp_ack(sk, th, TCP_SKB_CB(skb)->seq,
+					TCP_SKB_CB(skb)->ack_seq, len); 
+				kfree_skb(skb); 
+				tcp_data_snd_check(sk);
+				return 0;
+			} else { /* Header too small */
+				tcp_statistics.TcpInErrs++;
+				goto discard;
+			}
+		} else if (TCP_SKB_CB(skb)->ack_seq == tp->snd_una &&
+			   atomic_read(&sk->rmem_alloc) <= sk->rcvbuf) {
+			/* Bulk data transfer: receiver */
+			__skb_pull(skb,th->doff*4);
+
+			tcp_measure_rcv_mss(sk, skb); 
+
+			/* DO NOT notify forward progress here.
+			 * It saves dozen of CPU instructions in fast path. --ANK
+			 */
+			__skb_queue_tail(&sk->receive_queue, skb);
+			tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+
+			/* FIN bit check is not done since if FIN is set in
+			 * this frame, the pred_flags won't match up. -DaveM
+			 */
+			sk->data_ready(sk, 0);
+			tcp_delack_estimator(tp);
+
+			tcp_remember_ack(tp, th, skb); 
+
+			__tcp_ack_snd_check(sk);
+			return 0;
+		}
+	}
+
+	/*
+	 *	Standard slow path.
+	 */
+
+	if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
+		/* RFC793, page 37: "In all states except SYN-SENT, all reset
+		 * (RST) segments are validated by checking their SEQ-fields."
+		 * And page 69: "If an incoming segment is not acceptable,
+		 * an acknowledgment should be sent in reply (unless the RST bit
+		 * is set, if so drop the segment and return)".
+		 */
+		if (th->rst)
+			goto discard;
+		if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
+			SOCK_DEBUG(sk, "seq:%d end:%d wup:%d wnd:%d\n",
+				   TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
+				   tp->rcv_wup, tp->rcv_wnd);
+		}
+		tcp_send_ack(sk);
+		goto discard;
+	}
+
+	if(th->syn && TCP_SKB_CB(skb)->seq != tp->syn_seq) {
+		SOCK_DEBUG(sk, "syn in established state\n");
+		tcp_statistics.TcpInErrs++;
+		tcp_reset(sk);
+		return 1;
+	}
+	
+	if(th->rst) {
+		tcp_reset(sk);
+		goto discard;
+	}
+
+	if(th->ack)
+		tcp_ack(sk, th, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->ack_seq, len);
+	
+	/* Process urgent data. */
+	tcp_urg(sk, th, len);
+
+	/* step 7: process the segment text */
+	queued = tcp_data(skb, sk, len);
+
+	/* This must be after tcp_data() does the skb_pull() to
+	 * remove the header size from skb->len.
+	 *
+	 * Dave!!! Phrase above (and all about rcv_mss) has 
+	 * nothing to do with reality. rcv_mss must measure TOTAL
+	 * size, including sacks, IP options etc. Hence, measure_rcv_mss
+	 * must occure before pulling etc, otherwise it will flap
+	 * like hell. Even putting it before tcp_data is wrong,
+	 * it should use skb->tail - skb->nh.raw instead.
+	 *					--ANK (980805)
+	 * 
+	 * BTW I broke it. Now all TCP options are handled equally
+	 * in mss_clamp calculations (i.e. ignored, rfc1122),
+	 * and mss_cache does include all of them (i.e. tstamps)
+	 * except for sacks, to calulate effective mss faster.
+	 * 					--ANK (980805)
+	 */
+	tcp_measure_rcv_mss(sk, skb); 
+
+	/* Be careful, tcp_data() may have put this into TIME_WAIT. */
+	if(sk->state != TCP_CLOSE) {
+		tcp_data_snd_check(sk);
+		tcp_ack_snd_check(sk);
+	}
+
+	if (!queued) {
+	discard:
+		kfree_skb(skb);
+	}
+
+	return 0;
+}
+
+/* 
+ *	Process an incoming SYN or SYN-ACK for SYN_RECV sockets represented
+ *	as an open_request. 
+ */
+
+struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, 
+			   struct open_request *req)
+{
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	u32 flg;
+
+	/*	assumption: the socket is not in use.
+	 *	as we checked the user count on tcp_rcv and we're
+	 *	running from a soft interrupt.
+	 */
+
+	/* Check for syn retransmission */
+	flg = *(((u32 *)skb->h.th) + 3);
+	
+	flg &= __constant_htonl(0x00170000);
+	/* Only SYN set? */
+	if (flg == __constant_htonl(0x00020000)) {
+		if (TCP_SKB_CB(skb)->seq == req->rcv_isn) {
+			/*	retransmited syn.
+			 */
+			req->class->rtx_syn_ack(sk, req); 
+			return NULL;
+		} else {
+			return sk; /* Pass new SYN to the listen socket. */
+		}
+	}
+
+	/* We know it's an ACK here */	
+	if (req->sk) {
+		/*	socket already created but not
+		 *	yet accepted()...
+		 */
+		sk = req->sk;
+	} else {
+		/* In theory the packet could be for a cookie, but
+		 * TIME_WAIT should guard us against this. 
+		 * XXX: Nevertheless check for cookies?
+		 * This sequence number check is done again later,
+		 * but we do it here to prevent syn flood attackers
+		 * from creating big SYN_RECV sockets.
+		 */ 
+		if (!between(TCP_SKB_CB(skb)->ack_seq, req->snt_isn, req->snt_isn+1) ||
+		    !between(TCP_SKB_CB(skb)->seq, req->rcv_isn, 
+			     req->rcv_isn+1+req->rcv_wnd)) {
+			req->class->send_reset(skb);
+			return NULL;
+		}
+	
+		sk = tp->af_specific->syn_recv_sock(sk, skb, req, NULL);
+		tcp_dec_slow_timer(TCP_SLT_SYNACK);
+		if (sk == NULL)
+			return NULL;
+		
+		req->expires = 0UL;
+		req->sk = sk;
+	}
+	skb_orphan(skb); 
+	skb_set_owner_r(skb, sk);
+	return sk; 
+}
+
+/*
+ *	This function implements the receiving procedure of RFC 793 for
+ *	all states except ESTABLISHED and TIME_WAIT. 
+ *	It's called from both tcp_v4_rcv and tcp_v6_rcv and should be
+ *	address independent.
+ */
+	
+int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
+			  struct tcphdr *th, unsigned len)
+{
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	int queued = 0;
+
+	switch (sk->state) {
+	case TCP_CLOSE:
+		/* When state == CLOSED, hash lookup always fails.
+		 *
+		 * But, there is a back door, the backlog queue.
+		 * If we have a sequence of packets in the backlog
+		 * during __release_sock() which have a sequence such
+		 * that:
+		 *	packet X	causes entry to TCP_CLOSE state
+		 *	...
+		 *	packet X + N	has FIN bit set
+		 *
+		 * We report a (luckily) harmless error in this case.
+		 * The issue is that backlog queue processing bypasses
+		 * any hash lookups (we know which socket packets are for).
+		 * The correct behavior here is what 2.0.x did, since
+		 * a TCP_CLOSE socket does not exist.  Drop the frame
+		 * and send a RST back to the other end.
+		 */
+		return 1;
+
+	case TCP_LISTEN:
+		/* These use the socket TOS.. 
+		 * might want to be the received TOS 
+		 */
+		if(th->ack)
+			return 1;
+		
+		if(th->syn) {
+			if(tp->af_specific->conn_request(sk, skb, 0) < 0)
+				return 1;
+
+			/* Now we have several options: In theory there is 
+			 * nothing else in the frame. KA9Q has an option to 
+			 * send data with the syn, BSD accepts data with the
+			 * syn up to the [to be] advertised window and 
+			 * Solaris 2.1 gives you a protocol error. For now 
+			 * we just ignore it, that fits the spec precisely 
+			 * and avoids incompatibilities. It would be nice in
+			 * future to drop through and process the data.
+			 *
+			 * Now that TTCP is starting to be used we ought to 
+			 * queue this data.
+			 * But, this leaves one open to an easy denial of
+		 	 * service attack, and SYN cookies can't defend
+			 * against this problem. So, we drop the data
+			 * in the interest of security over speed.
+			 */
+			goto discard;
+		}
+		
+		goto discard;
+		break;
+
+	case TCP_SYN_SENT:
+		/* SYN sent means we have to look for a suitable ack and 
+		 * either reset for bad matches or go to connected. 
+		 * The SYN_SENT case is unusual and should
+		 * not be in line code. [AC]
+		 */
+		if(th->ack) {
+			/* rfc793:
+			 * "If the state is SYN-SENT then
+			 *    first check the ACK bit
+			 *      If the ACK bit is set
+			 *	  If SEG.ACK =< ISS, or SEG.ACK > SND.NXT, send
+			 *        a reset (unless the RST bit is set, if so drop
+			 *        the segment and return)"
+			 *
+			 *  I cite this place to emphasize one essential
+			 *  detail, this check is different of one
+			 *  in established state: SND.UNA <= SEG.ACK <= SND.NXT.
+			 *  SEG_ACK == SND.UNA == ISS is invalid in SYN-SENT,
+			 *  because we have no previous data sent before SYN.
+			 *                                        --ANK(990513)
+			 *
+			 *  We do not send data with SYN, so that RFC-correct
+			 *  test reduces to:
+			 */
+			if (sk->zapped ||
+			    TCP_SKB_CB(skb)->ack_seq != tp->snd_nxt)
+				return 1;
+
+			/* Now ACK is acceptable.
+			 *
+			 * "If the RST bit is set
+			 *    If the ACK was acceptable then signal the user "error:
+			 *    connection reset", drop the segment, enter CLOSED state,
+			 *    delete TCB, and return."
+			 */
+
+			if (th->rst) {
+				tcp_reset(sk);
+				goto discard;
+			}
+
+			/* rfc793:
+			 *   "fifth, if neither of the SYN or RST bits is set then
+			 *    drop the segment and return."
+			 *
+			 *    See note below!
+			 *                                        --ANK(990513)
+		         */
+			
+			if (!th->syn)
+				goto discard;
+
+			/* rfc793:
+			 *   "If the SYN bit is on ...
+			 *    are acceptable then ...
+			 *    (our SYN has been ACKed), change the connection
+			 *    state to ESTABLISHED..."
+			 *
+			 * Do you see? SYN-less ACKs in SYN-SENT state are
+			 * completely ignored.
+			 *
+			 * The bug causing stalled SYN-SENT sockets
+			 * was here: tcp_ack advanced snd_una and canceled
+			 * retransmit timer, so that bare ACK received
+			 * in SYN-SENT state (even with invalid ack==ISS,
+			 * because tcp_ack check is too weak for SYN-SENT)
+			 * causes moving socket to invalid semi-SYN-SENT,
+			 * semi-ESTABLISHED state and connection hangs.
+			 *
+			 * There exist buggy stacks, which really send
+			 * such ACKs: f.e. 202.226.91.94 (okigate.oki.co.jp)
+			 * Actually, if this host did not try to get something
+			 * from ftp.inr.ac.ru I'd never find this bug 8)
+			 *
+			 *                                     --ANK (990514)
+			 */
+
+			tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
+			tcp_ack(sk,th, TCP_SKB_CB(skb)->seq,
+				TCP_SKB_CB(skb)->ack_seq, len);
+
+			/* Ok.. it's good. Set up sequence numbers and
+			 * move to established.
+			 */
+			tp->rcv_nxt = TCP_SKB_CB(skb)->seq+1;
+			tp->rcv_wup = TCP_SKB_CB(skb)->seq+1;
+
+			/* RFC1323: The window in SYN & SYN/ACK segments is
+			 * never scaled.
+			 */
+			tp->snd_wnd = htons(th->window);
+			tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
+			tp->snd_wl2 = TCP_SKB_CB(skb)->ack_seq;
+			tp->fin_seq = TCP_SKB_CB(skb)->seq;
+
+			tcp_set_state(sk, TCP_ESTABLISHED);
+			tcp_parse_options(sk, th, tp, 0);
+
+        		if (tp->wscale_ok == 0) {
+                		tp->snd_wscale = tp->rcv_wscale = 0;
+                		tp->window_clamp = min(tp->window_clamp,65535);
+        		}
+
+			if (tp->tstamp_ok) {
+				tp->tcp_header_len =
+					sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
+			} else
+				tp->tcp_header_len = sizeof(struct tcphdr);
+			if (tp->saw_tstamp) {
+				tp->ts_recent = tp->rcv_tsval;
+				tp->ts_recent_stamp = tcp_time_stamp;
+			}
+
+			/* Can't be earlier, doff would be wrong. */
+			tcp_send_ack(sk);
+
+			sk->dport = th->source;
+			tp->copied_seq = tp->rcv_nxt;
+
+			if(!sk->dead) {
+				sk->state_change(sk);
+				sock_wake_async(sk->socket, 0);
+			}
+		} else {
+			if(th->syn && !th->rst) {
+				/* The previous version of the code
+				 * checked for "connecting to self"
+				 * here. that check is done now in
+				 * tcp_connect.
+				 */
+				tcp_set_state(sk, TCP_SYN_RECV);
+				tcp_parse_options(sk, th, tp, 0);
+				if (tp->saw_tstamp) {
+					tp->ts_recent = tp->rcv_tsval;
+					tp->ts_recent_stamp = tcp_time_stamp;
+				}
+				
+				tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
+				tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
+
+				/* RFC1323: The window in SYN & SYN/ACK segments is
+				 * never scaled.
+				 */
+				tp->snd_wnd = htons(th->window);
+				tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
+				
+				tcp_send_synack(sk);
+			} else
+				break; 
+		}
+
+		/* tp->tcp_header_len and tp->mss_clamp
+		   probably changed, synchronize mss.
+		   */
+		tcp_sync_mss(sk, tp->pmtu_cookie);
+		tp->rcv_mss = tp->mss_cache;
+
+		if (sk->state == TCP_SYN_RECV)
+			goto discard;
+		
+		goto step6; 
+	}
+
+	/*   Parse the tcp_options present on this header.
+	 *   By this point we really only expect timestamps.
+	 *   Note that this really has to be here and not later for PAWS
+	 *   (RFC1323) to work.
+	 */
+	if (tcp_fast_parse_options(sk, th, tp)) {
+		/* NOTE: assumes saw_tstamp is never set if we didn't
+		 * negotiate the option. tcp_fast_parse_options() must
+		 * guarantee this.
+		 */
+		if (tp->saw_tstamp) {
+			if (tcp_paws_discard(tp, th, len)) {
+				tcp_statistics.TcpInErrs++;
+				if (!th->rst) {
+					tcp_send_ack(sk);
+					goto discard;
+				}
+			}
+			tcp_replace_ts_recent(sk, tp,
+					      TCP_SKB_CB(skb)->seq,
+					      TCP_SKB_CB(skb)->end_seq);
+		}
+	}
+
+	/* The silly FIN test here is necessary to see an advancing ACK in
+	 * retransmitted FIN frames properly.  Consider the following sequence:
+	 *
+	 *	host1 --> host2		FIN XSEQ:XSEQ(0) ack YSEQ
+	 *	host2 --> host1		FIN YSEQ:YSEQ(0) ack XSEQ
+	 *	host1 --> host2		XSEQ:XSEQ(0) ack YSEQ+1
+	 *	host2 --> host1		FIN YSEQ:YSEQ(0) ack XSEQ+1	(fails tcp_sequence test)
+	 *
+	 * At this point the connection will deadlock with host1 believing
+	 * that his FIN is never ACK'd, and thus it will retransmit it's FIN
+	 * forever.  The following fix is from Taral (taral@taral.net).
+	 */
+
+	/* step 1: check sequence number */
+	if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq) &&
+	    !(th->fin && TCP_SKB_CB(skb)->end_seq == tp->rcv_nxt)) {
+		if (!th->rst) {
+			tcp_send_ack(sk);
+		}
+		goto discard;
+	}
+
+	/* step 2: check RST bit */
+	if(th->rst) {
+		tcp_reset(sk);
+		goto discard;
+	}
+
+	/* step 3: check security and precedence [ignored] */
+
+	/*	step 4:
+	 *
+	 *	Check for a SYN, and ensure it matches the SYN we were
+	 *	first sent. We have to handle the rather unusual (but valid)
+	 *	sequence that KA9Q derived products may generate of
+	 *
+	 *	SYN
+	 *				SYN|ACK Data
+	 *	ACK	(lost)
+	 *				SYN|ACK Data + More Data
+	 *	.. we must ACK not RST...
+	 *
+	 *	We keep syn_seq as the sequence space occupied by the 
+	 *	original syn. 
+	 */
+
+	if (th->syn && TCP_SKB_CB(skb)->seq != tp->syn_seq) {
+		tcp_reset(sk);
+		return 1;
+	}
+
+	/* step 5: check the ACK field */
+	if (th->ack) {
+		int acceptable = tcp_ack(sk, th, TCP_SKB_CB(skb)->seq,
+					 TCP_SKB_CB(skb)->ack_seq, len);
+		
+		switch(sk->state) {
+		case TCP_SYN_RECV:
+			if (acceptable) {
+				tcp_set_state(sk, TCP_ESTABLISHED);
+				sk->dport = th->source;
+				tp->copied_seq = tp->rcv_nxt;
+
+				if(!sk->dead)
+					sk->state_change(sk);		
+
+				tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
+				tp->snd_wnd = htons(th->window) << tp->snd_wscale;
+				tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
+				tp->snd_wl2 = TCP_SKB_CB(skb)->ack_seq;
+
+			} else {
+				SOCK_DEBUG(sk, "bad ack\n");
+				return 1;
+			}
+			break;
+
+		case TCP_FIN_WAIT1:
+			if (tp->snd_una == tp->write_seq) {
+				sk->shutdown |= SEND_SHUTDOWN;
+				tcp_set_state(sk, TCP_FIN_WAIT2);
+				if (!sk->dead)
+					sk->state_change(sk);
+				else
+					tcp_reset_msl_timer(sk, TIME_CLOSE, sysctl_tcp_fin_timeout);
+			}
+			break;
+
+		case TCP_CLOSING:	
+			if (tp->snd_una == tp->write_seq) {
+				tcp_time_wait(sk);
+				goto discard;
+			}
+			break;
+
+		case TCP_LAST_ACK:
+			if (tp->snd_una == tp->write_seq) {
+				sk->shutdown = SHUTDOWN_MASK;
+				tcp_set_state(sk,TCP_CLOSE);
+				if (!sk->dead)
+					sk->state_change(sk);
+				goto discard;
+			}
+			break;
+		}
+	} else
+		goto discard;
+
+step6:
+	/* step 6: check the URG bit */
+	tcp_urg(sk, th, len);
+
+	/* step 7: process the segment text */
+	switch (sk->state) {
+	case TCP_CLOSE_WAIT:
+	case TCP_CLOSING:
+		if (!before(TCP_SKB_CB(skb)->seq, tp->fin_seq))
+			break;
+	
+	case TCP_FIN_WAIT1:
+	case TCP_FIN_WAIT2:
+		/* RFC 793 says to queue data in these states,
+		 * RFC 1122 says we MUST send a reset. 
+		 * BSD 4.4 also does reset.
+		 */
+		if ((sk->shutdown & RCV_SHUTDOWN) && sk->dead) {
+			if (after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
+				tcp_reset(sk);
+				return 1;
+			}
+		}
+		
+	case TCP_ESTABLISHED: 
+		queued = tcp_data(skb, sk, len);
+
+		/* This must be after tcp_data() does the skb_pull() to
+		 * remove the header size from skb->len.
+		 */
+		tcp_measure_rcv_mss(sk, skb); 
+		break;
+	}
+
+	tcp_data_snd_check(sk);
+	tcp_ack_snd_check(sk);
+
+	if (!queued) { 
+discard:
+		kfree_skb(skb);
+	}
+	return 0;
+}
diff --git a/pfinet/linux-src/net/ipv4/tcp_ipv4.c b/pfinet/linux-src/net/ipv4/tcp_ipv4.c
new file mode 100644
index 00000000..c2c78365
--- /dev/null
+++ b/pfinet/linux-src/net/ipv4/tcp_ipv4.c
@@ -0,0 +1,2044 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		Implementation of the Transmission Control Protocol(TCP).
+ *
+ * Version:	$Id: tcp_ipv4.c,v 1.175.2.10 1999/08/13 16:14:35 davem Exp $
+ *
+ *		IPv4 specific functions
+ *
+ *
+ *		code split from:
+ *		linux/ipv4/tcp.c
+ *		linux/ipv4/tcp_input.c
+ *		linux/ipv4/tcp_output.c
+ *
+ *		See tcp.c for author information
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+/*
+ * Changes:
+ *		David S. Miller	:	New socket lookup architecture.
+ *					This code is dedicated to John Dyson.
+ *		David S. Miller :	Change semantics of established hash,
+ *					half is devoted to TIME_WAIT sockets
+ *					and the rest go in the other half.
+ *		Andi Kleen :		Add support for syncookies and fixed
+ *					some bugs: ip options weren't passed to
+ *					the TCP layer, missed a check for an ACK bit.
+ *		Andi Kleen :		Implemented fast path mtu discovery.
+ *	     				Fixed many serious bugs in the
+ *					open_request handling and moved
+ *					most of it into the af independent code.
+ *					Added tail drop and some other bugfixes.
+ *					Added new listen sematics.
+ *		Mike McLagan	:	Routing by source
+ *	Juan Jose Ciarlante:		ip_dynaddr bits
+ *		Andi Kleen:		various fixes.
+ *	Vitaly E. Lavrov	:	Transparent proxy revived after year coma.
+ *	Andi Kleen		:	Fix new listen.
+ *	Andi Kleen		:	Fix accept error reporting.
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/random.h>
+#include <linux/init.h>
+#include <linux/ipsec.h>
+
+#include <net/icmp.h>
+#include <net/tcp.h>
+#include <net/ipv6.h>
+
+#include <asm/segment.h>
+
+#include <linux/inet.h>
+#include <linux/stddef.h>
+
+extern int sysctl_tcp_timestamps;
+extern int sysctl_tcp_window_scaling;
+extern int sysctl_tcp_sack;
+extern int sysctl_tcp_syncookies;
+extern int sysctl_ip_dynaddr;
+extern __u32 sysctl_wmem_max;
+extern __u32 sysctl_rmem_max;
+
+/* Check TCP sequence numbers in ICMP packets. */
+#define ICMP_MIN_LENGTH 8
+
+/* Socket used for sending RSTs */ 	
+struct inode tcp_inode;
+struct socket *tcp_socket=&tcp_inode.u.socket_i;
+
+static void tcp_v4_send_reset(struct sk_buff *skb);
+
+void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, 
+		       struct sk_buff *skb);
+
+/* This is for sockets with full identity only.  Sockets here will always
+ * be without wildcards and will have the following invariant:
+ *          TCP_ESTABLISHED <= sk->state < TCP_CLOSE
+ *
+ * First half of the table is for sockets not in TIME_WAIT, second half
+ * is for TIME_WAIT sockets only.
+ */
+struct sock *tcp_established_hash[TCP_HTABLE_SIZE];
+
+/* Ok, let's try this, I give up, we do need a local binding
+ * TCP hash as well as the others for fast bind/connect.
+ */
+struct tcp_bind_bucket *tcp_bound_hash[TCP_BHTABLE_SIZE];
+
+/* All sockets in TCP_LISTEN state will be in here.  This is the only table
+ * where wildcard'd TCP sockets can exist.  Hash function here is just local
+ * port number.
+ */
+struct sock *tcp_listening_hash[TCP_LHTABLE_SIZE];
+
+/* Register cache. */
+struct sock *tcp_regs[TCP_NUM_REGS];
+
+/*
+ * This array holds the first and last local port number.
+ * For high-usage systems, use sysctl to change this to
+ * 32768-61000
+ */
+int sysctl_local_port_range[2] = { 1024, 4999 };
+int tcp_port_rover = (1024 - 1);
+
+static __inline__ int tcp_hashfn(__u32 laddr, __u16 lport,
+				 __u32 faddr, __u16 fport)
+{
+	return ((laddr ^ lport) ^ (faddr ^ fport)) & ((TCP_HTABLE_SIZE/2) - 1);
+}
+
+static __inline__ int tcp_sk_hashfn(struct sock *sk)
+{
+	__u32 laddr = sk->rcv_saddr;
+	__u16 lport = sk->num;
+	__u32 faddr = sk->daddr;
+	__u16 fport = sk->dport;
+
+	return tcp_hashfn(laddr, lport, faddr, fport);
+}
+
+/* Allocate and initialize a new TCP local port bind bucket.
+ * Always runs inside the socket hashing lock.
+ */
+struct tcp_bind_bucket *tcp_bucket_create(unsigned short snum)
+{
+	struct tcp_bind_bucket *tb;
+
+	tb = kmem_cache_alloc(tcp_bucket_cachep, SLAB_ATOMIC);
+	if(tb != NULL) {
+		struct tcp_bind_bucket **head =
+			&tcp_bound_hash[tcp_bhashfn(snum)];
+		tb->port = snum;
+		tb->fastreuse = 0;
+		tb->owners = NULL;
+		if((tb->next = *head) != NULL)
+			tb->next->pprev = &tb->next;
+		*head = tb;
+		tb->pprev = head;
+	}
+	return tb;
+}
+
+#ifdef CONFIG_IP_TRANSPARENT_PROXY
+/* Ensure that the bound bucket for the port exists.
+ * Return 0 and bump bucket reference count on success.
+ *
+ * Must run in a BH atomic section.
+ */
+static __inline__ int __tcp_bucket_check(unsigned short snum)
+{
+	struct tcp_bind_bucket *tb;
+
+	tb = tcp_bound_hash[tcp_bhashfn(snum)];
+	for( ; (tb && (tb->port != snum)); tb = tb->next)
+		;
+	if (tb == NULL) {
+		if ((tb = tcp_bucket_create(snum)) == NULL)
+			return 1;
+	}
+
+	return 0;
+}
+#endif
+
+static __inline__ void __tcp_inherit_port(struct sock *sk, struct sock *child)
+{
+	struct tcp_bind_bucket *tb = (struct tcp_bind_bucket *)sk->prev;
+
+#ifdef CONFIG_IP_TRANSPARENT_PROXY
+	if (child->num != sk->num) {
+		unsigned short snum = ntohs(child->num);
+		for(tb = tcp_bound_hash[tcp_bhashfn(snum)];
+		    tb && tb->port != snum;
+		    tb = tb->next)
+			;
+		if (tb == NULL)
+			tb = (struct tcp_bind_bucket *)sk->prev;
+	}
+#endif
+	if ((child->bind_next = tb->owners) != NULL)
+		tb->owners->bind_pprev = &child->bind_next;
+	tb->owners = child;
+	child->bind_pprev = &tb->owners;
+	child->prev = (struct sock *) tb;
+}
+
+__inline__ void tcp_inherit_port(struct sock *sk, struct sock *child)
+{
+	SOCKHASH_LOCK();
+	__tcp_inherit_port(sk, child);
+	SOCKHASH_UNLOCK();
+}
+
+/* Obtain a reference to a local port for the given sock,
+ * if snum is zero it means select any available local port.
+ */
+static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
+{
+	struct tcp_bind_bucket *tb;
+
+	SOCKHASH_LOCK();
+	if (snum == 0) {
+		int rover = tcp_port_rover;
+		int low = sysctl_local_port_range[0];
+		int high = sysctl_local_port_range[1];
+		int remaining = (high - low) + 1;
+
+		do {	rover++;
+			if ((rover < low) || (rover > high))
+				rover = low;
+			tb = tcp_bound_hash[tcp_bhashfn(rover)];
+			for ( ; tb; tb = tb->next)
+				if (tb->port == rover)
+					goto next;
+			break;
+		next:
+		} while (--remaining > 0);
+		tcp_port_rover = rover;
+
+		/* Exhausted local port range during search? */
+		if (remaining <= 0)
+			goto fail;
+
+		/* OK, here is the one we will use. */
+		snum = rover;
+		tb = NULL;
+	} else {
+		for (tb = tcp_bound_hash[tcp_bhashfn(snum)];
+		     tb != NULL;
+		     tb = tb->next)
+			if (tb->port == snum)
+				break;
+	}
+	if (tb != NULL && tb->owners != NULL) {
+		if (tb->fastreuse != 0 && sk->reuse != 0) {
+			goto success;
+		} else {
+			struct sock *sk2 = tb->owners;
+			int sk_reuse = sk->reuse;
+
+			for( ; sk2 != NULL; sk2 = sk2->bind_next) {
+				if (sk->bound_dev_if == sk2->bound_dev_if) {
+					if (!sk_reuse	||
+					    !sk2->reuse	||
+					    sk2->state == TCP_LISTEN) {
+						if (!sk2->rcv_saddr	||
+						    !sk->rcv_saddr	||
+						    (sk2->rcv_saddr == sk->rcv_saddr))
+							break;
+					}
+				}
+			}
+			/* If we found a conflict, fail. */
+			if (sk2 != NULL)
+				goto fail;
+		}
+	}
+	if (tb == NULL &&
+	    (tb = tcp_bucket_create(snum)) == NULL)
+			goto fail;
+	if (tb->owners == NULL) {
+		if (sk->reuse && sk->state != TCP_LISTEN)
+			tb->fastreuse = 1;
+		else
+			tb->fastreuse = 0;
+	} else if (tb->fastreuse &&
+		   ((sk->reuse == 0) || (sk->state == TCP_LISTEN)))
+		tb->fastreuse = 0;
+success:
+	sk->num = snum;
+	if ((sk->bind_next = tb->owners) != NULL)
+		tb->owners->bind_pprev = &sk->bind_next;
+	tb->owners = sk;
+	sk->bind_pprev = &tb->owners;
+	sk->prev = (struct sock *) tb;
+
+	SOCKHASH_UNLOCK();
+	return 0;
+
+fail:
+	SOCKHASH_UNLOCK();
+	return 1;
+}
+
+/* Get rid of any references to a local port held by the
+ * given sock.
+ */
+__inline__ void __tcp_put_port(struct sock *sk)
+{
+	struct tcp_bind_bucket *tb;
+
+	tb = (struct tcp_bind_bucket *) sk->prev;
+	if (sk->bind_next)
+		sk->bind_next->bind_pprev = sk->bind_pprev;
+	*(sk->bind_pprev) = sk->bind_next;
+	sk->prev = NULL;
+	if (tb->owners == NULL) {
+		if (tb->next)
+			tb->next->pprev = tb->pprev;
+		*(tb->pprev) = tb->next;
+		kmem_cache_free(tcp_bucket_cachep, tb);
+	}
+}
+
+void tcp_put_port(struct sock *sk)
+{
+	SOCKHASH_LOCK();
+	__tcp_put_port(sk);
+	SOCKHASH_UNLOCK();
+}
+
+static __inline__ void __tcp_v4_hash(struct sock *sk)
+{
+	struct sock **skp;
+
+	if(sk->state == TCP_LISTEN)
+		skp = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
+	else
+		skp = &tcp_established_hash[(sk->hashent = tcp_sk_hashfn(sk))];
+
+	if((sk->next = *skp) != NULL)
+		(*skp)->pprev = &sk->next;
+	*skp = sk;
+	sk->pprev = skp;
+}
+
+static void tcp_v4_hash(struct sock *sk)
+{
+	if (sk->state != TCP_CLOSE) {
+		SOCKHASH_LOCK();
+		__tcp_v4_hash(sk);
+		SOCKHASH_UNLOCK();
+	}
+}
+
+static void tcp_v4_unhash(struct sock *sk)
+{
+	SOCKHASH_LOCK();
+	if(sk->pprev) {
+		if(sk->next)
+			sk->next->pprev = sk->pprev;
+		*sk->pprev = sk->next;
+		sk->pprev = NULL;
+		tcp_reg_zap(sk);
+		__tcp_put_port(sk);
+	}
+	SOCKHASH_UNLOCK();
+}
+
+/* Don't inline this cruft.  Here are some nice properties to
+ * exploit here.  The BSD API does not allow a listening TCP
+ * to specify the remote port nor the remote address for the
+ * connection.  So always assume those are both wildcarded
+ * during the search since they can never be otherwise.
+ */
+static struct sock *tcp_v4_lookup_listener(u32 daddr, unsigned short hnum, int dif)
+{
+	struct sock *sk;
+	struct sock *result = NULL;
+	int score, hiscore;
+
+	hiscore=0;
+	for(sk = tcp_listening_hash[tcp_lhashfn(hnum)]; sk; sk = sk->next) {
+		if(sk->num == hnum) {
+			__u32 rcv_saddr = sk->rcv_saddr;
+
+			score = 1;
+			if(rcv_saddr) {
+				if (rcv_saddr != daddr)
+					continue;
+				score++;
+			}
+			if (sk->bound_dev_if) {
+				if (sk->bound_dev_if != dif)
+					continue;
+				score++;
+			}
+			if (score == 3)
+				return sk;
+			if (score > hiscore) {
+				hiscore = score;
+				result = sk;
+			}
+		}
+	}
+	return result;
+}
+
+/* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
+ * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
+ * It is assumed that this code only gets called from within NET_BH.
+ */
+static inline struct sock *__tcp_v4_lookup(struct tcphdr *th,
+					   u32 saddr, u16 sport,
+					   u32 daddr, u16 dport, int dif)
+{
+	TCP_V4_ADDR_COOKIE(acookie, saddr, daddr)
+	__u16 hnum = ntohs(dport);
+	__u32 ports = TCP_COMBINED_PORTS(sport, hnum);
+	struct sock *sk;
+	int hash;
+
+	/* Check TCP register quick cache first. */
+	sk = TCP_RHASH(sport);
+	if(sk && TCP_IPV4_MATCH(sk, acookie, saddr, daddr, ports, dif))
+		goto hit;
+
+	/* Optimize here for direct hit, only listening connections can
+	 * have wildcards anyways.
+	 */
+	hash = tcp_hashfn(daddr, hnum, saddr, sport);
+	for(sk = tcp_established_hash[hash]; sk; sk = sk->next) {
+		if(TCP_IPV4_MATCH(sk, acookie, saddr, daddr, ports, dif)) {
+			if (sk->state == TCP_ESTABLISHED)
+				TCP_RHASH(sport) = sk;
+			goto hit; /* You sunk my battleship! */
+		}
+	}
+	/* Must check for a TIME_WAIT'er before going to listener hash. */
+	for(sk = tcp_established_hash[hash+(TCP_HTABLE_SIZE/2)]; sk; sk = sk->next)
+		if(TCP_IPV4_MATCH(sk, acookie, saddr, daddr, ports, dif))
+			goto hit;
+	sk = tcp_v4_lookup_listener(daddr, hnum, dif);
+hit:
+	return sk;
+}
+
+__inline__ struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif)
+{
+	return __tcp_v4_lookup(0, saddr, sport, daddr, dport, dif);
+}
+
+#ifdef CONFIG_IP_TRANSPARENT_PROXY
+/* Cleaned up a little and adapted to new bind bucket scheme.
+ * Oddly, this should increase performance here for
+ * transparent proxy, as tests within the inner loop have
+ * been eliminated. -DaveM
+ */
+static struct sock *tcp_v4_proxy_lookup(unsigned short num, unsigned long raddr,
+					unsigned short rnum, unsigned long laddr,
+					struct device *dev, unsigned short pnum,
+					int dif)
+{
+	struct sock *s, *result = NULL;
+	int badness = -1;
+	u32 paddr = 0;
+	unsigned short hnum = ntohs(num);
+	unsigned short hpnum = ntohs(pnum);
+	int firstpass = 1;
+
+	if(dev && dev->ip_ptr) {
+		struct in_device *idev = dev->ip_ptr;
+
+		if(idev->ifa_list)
+			paddr = idev->ifa_list->ifa_local;
+	}
+
+	/* This code must run only from NET_BH. */
+	{
+		struct tcp_bind_bucket *tb = tcp_bound_hash[tcp_bhashfn(hnum)];
+		for( ; (tb && tb->port != hnum); tb = tb->next)
+			;
+		if(tb == NULL)
+			goto next;
+		s = tb->owners;
+	}
+pass2:
+	for(; s; s = s->bind_next) {
+		int score = 0;
+		if(s->rcv_saddr) {
+			if((s->num != hpnum || s->rcv_saddr != paddr) &&
+			   (s->num != hnum || s->rcv_saddr != laddr))
+				continue;
+			score++;
+		}
+		if(s->daddr) {
+			if(s->daddr != raddr)
+				continue;
+			score++;
+		}
+		if(s->dport) {
+			if(s->dport != rnum)
+				continue;
+			score++;
+		}
+		if(s->bound_dev_if) {
+			if(s->bound_dev_if != dif)
+				continue;
+			score++;
+		}
+		if(score == 4 && s->num == hnum) {
+			result = s;
+			goto gotit;
+		} else if(score > badness && (s->num == hpnum || s->rcv_saddr)) {
+			result = s;
+			badness = score;
+		}
+	}
+next:
+	if(firstpass--) {
+		struct tcp_bind_bucket *tb = tcp_bound_hash[tcp_bhashfn(hpnum)];
+		for( ; (tb && tb->port != hpnum); tb = tb->next)
+			;
+		if(tb) {
+			s = tb->owners;
+			goto pass2;
+		}
+	}
+gotit:
+	return result;
+}
+#endif /* CONFIG_IP_TRANSPARENT_PROXY */
+
+static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb)
+{
+	return secure_tcp_sequence_number(sk->saddr, sk->daddr,
+					  skb->h.th->dest,
+					  skb->h.th->source);
+}
+
+/* Check that a TCP address is unique, don't allow multiple
+ * connects to/from the same address.  Actually we can optimize
+ * quite a bit, since the socket about to connect is still
+ * in TCP_CLOSE, a tcp_bind_bucket for the local port he will
+ * use will exist, with a NULL owners list.  So check for that.
+ * The good_socknum and verify_bind scheme we use makes this
+ * work.
+ */
+static int tcp_v4_unique_address(struct sock *sk)
+{
+	struct tcp_bind_bucket *tb;
+	unsigned short snum = sk->num;
+	int retval = 1;
+
+	/* Freeze the hash while we snoop around. */
+	SOCKHASH_LOCK();
+	tb = tcp_bound_hash[tcp_bhashfn(snum)];
+	for(; tb; tb = tb->next) {
+		if(tb->port == snum && tb->owners != NULL) {
+			/* Almost certainly the re-use port case, search the real hashes
+			 * so it actually scales.
+			 */
+			sk = __tcp_v4_lookup(NULL, sk->daddr, sk->dport,
+					     sk->rcv_saddr, htons(snum),
+					     sk->bound_dev_if);
+			if((sk != NULL) && (sk->state != TCP_LISTEN))
+				retval = 0;
+			break;
+		}
+	}
+	SOCKHASH_UNLOCK();
+	return retval;
+}
+
+/* This will initiate an outgoing connection. */
+int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+{
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	struct sockaddr_in *usin = (struct sockaddr_in *) uaddr;
+	struct sk_buff *buff;
+	struct rtable *rt;
+	u32 daddr, nexthop;
+	int tmp;
+
+	if (sk->state != TCP_CLOSE) 
+		return(-EISCONN);
+
+	/* Don't allow a double connect. */
+	if (sk->daddr)
+		return -EINVAL;
+
+	if (addr_len < sizeof(struct sockaddr_in))
+		return(-EINVAL);
+
+	if (usin->sin_family != AF_INET) {
+		static int complained;
+		if (usin->sin_family)
+			return(-EAFNOSUPPORT);
+		if (!complained++)
+			printk(KERN_DEBUG "%s forgot to set AF_INET in " __FUNCTION__ "\n", current->comm);
+	}
+
+	nexthop = daddr = usin->sin_addr.s_addr;
+	if (sk->opt && sk->opt->srr) {
+		if (daddr == 0)
+			return -EINVAL;
+		nexthop = sk->opt->faddr;
+	}
+
+	tmp = ip_route_connect(&rt, nexthop, sk->saddr,
+			       RT_TOS(sk->ip_tos)|RTO_CONN|sk->localroute, sk->bound_dev_if);
+	if (tmp < 0)
+		return tmp;
+
+	if (rt->rt_flags&(RTCF_MULTICAST|RTCF_BROADCAST)) {
+		ip_rt_put(rt);
+		return -ENETUNREACH;
+	}
+
+	dst_release(xchg(&sk->dst_cache, rt));
+
+	buff = sock_wmalloc(sk, (MAX_HEADER + sk->prot->max_header),
+			    0, GFP_KERNEL);
+
+	if (buff == NULL)
+		return -ENOBUFS;
+
+	/* Socket has no identity, so lock_sock() is useless.  Also
+	 * since state==TCP_CLOSE (checked above) the socket cannot
+	 * possibly be in the hashes.  TCP hash locking is only
+	 * needed while checking quickly for a unique address.
+	 * However, the socket does need to be (and is) locked
+	 * in tcp_connect().
+	 * Perhaps this addresses all of ANK's concerns. 8-)  -DaveM
+	 */
+	sk->dport = usin->sin_port;
+	sk->daddr = rt->rt_dst;
+	if (sk->opt && sk->opt->srr)
+		sk->daddr = daddr;
+	if (!sk->saddr)
+		sk->saddr = rt->rt_src;
+	sk->rcv_saddr = sk->saddr;
+
+	if (!tcp_v4_unique_address(sk)) {
+		kfree_skb(buff);
+		sk->daddr = 0;
+		return -EADDRNOTAVAIL;
+	}
+
+	tp->write_seq = secure_tcp_sequence_number(sk->saddr, sk->daddr,
+						   sk->sport, usin->sin_port);
+
+	tp->ext_header_len = 0;
+	if (sk->opt)
+		tp->ext_header_len = sk->opt->optlen;
+
+	/* Reset mss clamp */
+	tp->mss_clamp = ~0;
+
+	if (!ip_dont_fragment(sk, &rt->u.dst) &&
+	    rt->u.dst.pmtu > 576 && rt->rt_dst != rt->rt_gateway) {
+		/* Clamp mss at maximum of 536 and user_mss.
+		   Probably, user ordered to override tiny segment size
+		   in gatewayed case.
+		 */
+		tp->mss_clamp = max(tp->user_mss, 536);
+	}
+
+	tcp_connect(sk, buff, rt->u.dst.pmtu);
+	return 0;
+}
+
+static int tcp_v4_sendmsg(struct sock *sk, struct msghdr *msg, int len)
+{
+	int retval = -EINVAL;
+
+	/* Do sanity checking for sendmsg/sendto/send. */
+	if (msg->msg_flags & ~(MSG_OOB|MSG_DONTROUTE|MSG_DONTWAIT|MSG_NOSIGNAL))
+		goto out;
+	if (msg->msg_name) {
+		struct sockaddr_in *addr=(struct sockaddr_in *)msg->msg_name;
+
+		if (msg->msg_namelen < sizeof(*addr))
+			goto out;
+		if (addr->sin_family && addr->sin_family != AF_INET)
+			goto out;
+		retval = -ENOTCONN;
+		if(sk->state == TCP_CLOSE)
+			goto out;
+		retval = -EISCONN;
+		if (addr->sin_port != sk->dport)
+			goto out;
+		if (addr->sin_addr.s_addr != sk->daddr)
+			goto out;
+	}
+	retval = tcp_do_sendmsg(sk, msg);
+
+out:
+	return retval;
+}
+
+
+/*
+ * Do a linear search in the socket open_request list. 
+ * This should be replaced with a global hash table.
+ */
+static struct open_request *tcp_v4_search_req(struct tcp_opt *tp, 
+					      struct iphdr *iph,
+					      struct tcphdr *th,
+					      struct open_request **prevp)
+{
+	struct open_request *req, *prev;  
+	__u16 rport = th->source; 
+
+	/*	assumption: the socket is not in use.
+	 *	as we checked the user count on tcp_rcv and we're
+	 *	running from a soft interrupt.
+	 */
+	prev = (struct open_request *) (&tp->syn_wait_queue); 
+	for (req = prev->dl_next; req; req = req->dl_next) {
+		if (req->af.v4_req.rmt_addr == iph->saddr &&
+		    req->af.v4_req.loc_addr == iph->daddr &&
+		    req->rmt_port == rport
+#ifdef CONFIG_IP_TRANSPARENT_PROXY
+		    && req->lcl_port == th->dest
+#endif
+		    ) {
+			*prevp = prev; 
+			return req; 
+		}
+		prev = req; 
+	}
+	return NULL; 
+}
+
+
+/* 
+ * This routine does path mtu discovery as defined in RFC1191.
+ */
+static inline void do_pmtu_discovery(struct sock *sk, struct iphdr *ip, unsigned mtu)
+{
+	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+
+	if (atomic_read(&sk->sock_readers))
+		return;
+
+	/* Don't interested in TCP_LISTEN and open_requests (SYN-ACKs
+	 * send out by Linux are always <576bytes so they should go through
+	 * unfragmented).
+	 */
+	if (sk->state == TCP_LISTEN)
+		return; 
+
+	/* We don't check in the destentry if pmtu discovery is forbidden
+	 * on this route. We just assume that no packet_to_big packets
+	 * are send back when pmtu discovery is not active.
+     	 * There is a small race when the user changes this flag in the
+	 * route, but I think that's acceptable.
+	 */
+	if (sk->dst_cache == NULL)
+		return;
+	ip_rt_update_pmtu(sk->dst_cache, mtu);
+	if (sk->ip_pmtudisc != IP_PMTUDISC_DONT &&
+	    tp->pmtu_cookie > sk->dst_cache->pmtu) {
+		tcp_sync_mss(sk, sk->dst_cache->pmtu);
+
+		/* Resend the TCP packet because it's  
+		 * clear that the old packet has been
+		 * dropped. This is the new "fast" path mtu
+		 * discovery.
+		 */
+		tcp_simple_retransmit(sk);
+	} /* else let the usual retransmit timer handle it */
+}
+
+/*
+ * This routine is called by the ICMP module when it gets some
+ * sort of error condition.  If err < 0 then the socket should
+ * be closed and the error returned to the user.  If err > 0
+ * it's just the icmp type << 8 | icmp code.  After adjustment
+ * header points to the first 8 bytes of the tcp header.  We need
+ * to find the appropriate port.
+ *
+ * The locking strategy used here is very "optimistic". When
+ * someone else accesses the socket the ICMP is just dropped
+ * and for some paths there is no check at all.
+ * A more general error queue to queue errors for later handling
+ * is probably better.
+ *
+ * sk->err and sk->err_soft should be atomic_t.
+ */
+
+void tcp_v4_err(struct sk_buff *skb, unsigned char *dp, int len)
+{
+	struct iphdr *iph = (struct iphdr*)dp;
+	struct tcphdr *th; 
+	struct tcp_opt *tp;
+	int type = skb->h.icmph->type;
+	int code = skb->h.icmph->code;
+#if ICMP_MIN_LENGTH < 14
+	int no_flags = 0;
+#else
+#define no_flags 0
+#endif
+	struct sock *sk;
+	__u32 seq;
+	int err;
+
+	if (len < (iph->ihl << 2) + ICMP_MIN_LENGTH) { 
+		icmp_statistics.IcmpInErrors++; 
+		return;
+	}
+#if ICMP_MIN_LENGTH < 14
+	if (len < (iph->ihl << 2) + 14)
+		no_flags = 1;
+#endif
+
+	th = (struct tcphdr*)(dp+(iph->ihl<<2));
+
+	sk = tcp_v4_lookup(iph->daddr, th->dest, iph->saddr, th->source, skb->dev->ifindex);
+	if (sk == NULL || sk->state == TCP_TIME_WAIT) {
+		icmp_statistics.IcmpInErrors++;
+		return; 
+	}
+
+	tp = &sk->tp_pinfo.af_tcp;
+	seq = ntohl(th->seq);
+	if (sk->state != TCP_LISTEN && !between(seq, tp->snd_una, tp->snd_nxt)) {
+		net_statistics.OutOfWindowIcmps++;
+		return; 
+	}
+
+	switch (type) {
+	case ICMP_SOURCE_QUENCH:
+#ifndef OLD_SOURCE_QUENCH /* This is deprecated */
+		tp->snd_ssthresh = tcp_recalc_ssthresh(tp);
+		tp->snd_cwnd = tp->snd_ssthresh;
+		tp->snd_cwnd_cnt = 0;
+		tp->high_seq = tp->snd_nxt;
+#endif
+		return;
+	case ICMP_PARAMETERPROB:
+		err = EPROTO;
+		break; 
+	case ICMP_DEST_UNREACH:
+		if (code > NR_ICMP_UNREACH)
+			return;
+
+		if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
+			do_pmtu_discovery(sk, iph, ntohs(skb->h.icmph->un.frag.mtu));
+			return;
+		}
+
+		err = icmp_err_convert[code].errno;
+		break;
+	case ICMP_TIME_EXCEEDED:
+		err = EHOSTUNREACH;
+		break;
+	default:
+		return;
+	}
+
+	switch (sk->state) {
+		struct open_request *req, *prev;
+	case TCP_LISTEN:
+		/* Prevent race conditions with accept() - 
+		 * ICMP is unreliable. 
+		 */
+		if (atomic_read(&sk->sock_readers)) {
+			net_statistics.LockDroppedIcmps++;
+			 /* If too many ICMPs get dropped on busy
+			  * servers this needs to be solved differently.
+			  */
+			return;
+		}
+
+		/* The final ACK of the handshake should be already 
+		 * handled in the new socket context, not here.
+		 * Strictly speaking - an ICMP error for the final
+		 * ACK should set the opening flag, but that is too
+		 * complicated right now. 
+		 */ 
+		if (!no_flags && !th->syn && !th->ack)
+			return;
+
+		req = tcp_v4_search_req(tp, iph, th, &prev); 
+		if (!req)
+			return;
+		if (seq != req->snt_isn) {
+			net_statistics.OutOfWindowIcmps++;
+			return;
+		}
+		if (req->sk) {	
+			/* 
+			 * Already in ESTABLISHED and a big socket is created,
+			 * set error code there.
+			 * The error will _not_ be reported in the accept(),
+			 * but only with the next operation on the socket after
+			 * accept. 
+			 */
+			sk = req->sk;
+		} else {
+			/* 
+			 * Still in SYN_RECV, just remove it silently.
+			 * There is no good way to pass the error to the newly
+			 * created socket, and POSIX does not want network
+			 * errors returned from accept(). 
+			 */ 
+			tp->syn_backlog--;
+			tcp_synq_unlink(tp, req, prev);
+			req->class->destructor(req);
+			tcp_openreq_free(req);
+			return; 
+		}
+		break;
+	case TCP_SYN_SENT:
+	case TCP_SYN_RECV:  /* Cannot happen */ 
+		if (!no_flags && !th->syn)
+			return;
+		tcp_statistics.TcpAttemptFails++;
+		sk->err = err;
+		sk->zapped = 1;
+		mb();
+		sk->error_report(sk);
+		return;
+	}
+
+	/* If we've already connected we will keep trying
+	 * until we time out, or the user gives up.
+	 *
+	 * rfc1122 4.2.3.9 allows to consider as hard errors
+	 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
+	 * but it is obsoleted by pmtu discovery).
+	 *
+	 * Note, that in modern internet, where routing is unreliable
+	 * and in each dark corner broken firewalls sit, sending random
+	 * errors ordered by their masters even this two messages finally lose
+	 * their original sense (even Linux sends invalid PORT_UNREACHs)
+	 *
+	 * Now we are in compliance with RFCs.
+	 *							--ANK (980905)
+	 */
+
+	if (sk->ip_recverr) {
+		/* This code isn't serialized with the socket code */
+		/* ANK (980927) ... which is harmless now,
+		   sk->err's may be safely lost.
+		 */
+		sk->err = err;
+		mb(); 
+		sk->error_report(sk);		/* Wake people up to see the error (see connect in sock.c) */
+	} else	{ /* Only an error on timeout */
+		sk->err_soft = err;
+		mb(); 
+	}
+}
+
+/* This routine computes an IPv4 TCP checksum. */
+void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, 
+		       struct sk_buff *skb)
+{
+	th->check = 0;
+	th->check = tcp_v4_check(th, len, sk->saddr, sk->daddr,
+				 csum_partial((char *)th, th->doff<<2, skb->csum));
+}
+
+/*
+ *	This routine will send an RST to the other tcp.
+ *
+ *	Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
+ *		      for reset.
+ *	Answer: if a packet caused RST, it is not for a socket
+ *		existing in our system, if it is matched to a socket,
+ *		it is just duplicate segment or bug in other side's TCP.
+ *		So that we build reply only basing on parameters
+ *		arrived with segment.
+ *	Exception: precedence violation. We do not implement it in any case.
+ */
+
+static void tcp_v4_send_reset(struct sk_buff *skb)
+{
+	struct tcphdr *th = skb->h.th;
+	struct tcphdr rth;
+	struct ip_reply_arg arg;
+
+	/* Never send a reset in response to a reset. */
+	if (th->rst)
+		return;
+
+	if (((struct rtable*)skb->dst)->rt_type != RTN_LOCAL) {
+#ifdef CONFIG_IP_TRANSPARENT_PROXY
+		if (((struct rtable*)skb->dst)->rt_type == RTN_UNICAST)
+			icmp_send(skb, ICMP_DEST_UNREACH,
+				  ICMP_PORT_UNREACH, 0);
+#endif
+		return;
+	}
+
+	/* Swap the send and the receive. */
+	memset(&rth, 0, sizeof(struct tcphdr)); 
+	rth.dest = th->source;
+	rth.source = th->dest; 
+	rth.doff = sizeof(struct tcphdr)/4;
+	rth.rst = 1;
+
+	if (th->ack) {
+		rth.seq = th->ack_seq;
+	} else {
+		rth.ack = 1;
+		rth.ack_seq = th->syn ? htonl(ntohl(th->seq)+1) : th->seq;
+	}
+
+	memset(&arg, 0, sizeof arg); 
+	arg.iov[0].iov_base = (unsigned char *)&rth; 
+	arg.iov[0].iov_len  = sizeof rth;
+	arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr, 
+				      skb->nh.iph->saddr, /*XXX*/
+				      sizeof(struct tcphdr),
+				      IPPROTO_TCP,
+				      0); 
+	arg.n_iov = 1;
+	arg.csumoffset = offsetof(struct tcphdr, check) / 2; 
+
+	ip_send_reply(tcp_socket->sk, skb, &arg, sizeof rth);
+
+	tcp_statistics.TcpOutSegs++;
+	tcp_statistics.TcpOutRsts++;
+}
+
+/* 
+ *	Send an ACK for a socket less packet (needed for time wait) 
+ *
+ *  FIXME: Does not echo timestamps yet.
+ *
+ *  Assumes that the caller did basic address and flag checks.
+ */
+static void tcp_v4_send_ack(struct sk_buff *skb, __u32 seq, __u32 ack, __u16 window)
+{
+	struct tcphdr *th = skb->h.th;
+	struct tcphdr rth;
+	struct ip_reply_arg arg;
+
+	/* Swap the send and the receive. */
+	memset(&rth, 0, sizeof(struct tcphdr)); 
+	rth.dest = th->source;
+	rth.source = th->dest; 
+	rth.doff = sizeof(struct tcphdr)/4;
+
+	rth.seq = seq;
+	rth.ack_seq = ack; 
+	rth.ack = 1;
+
+	rth.window = htons(window);
+
+	memset(&arg, 0, sizeof arg); 
+	arg.iov[0].iov_base = (unsigned char *)&rth; 
+	arg.iov[0].iov_len  = sizeof rth;
+	arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr, 
+				      skb->nh.iph->saddr, /*XXX*/
+				      sizeof(struct tcphdr),
+				      IPPROTO_TCP,
+				      0); 
+	arg.n_iov = 1;
+	arg.csumoffset = offsetof(struct tcphdr, check) / 2; 
+
+	ip_send_reply(tcp_socket->sk, skb, &arg, sizeof rth);
+
+	tcp_statistics.TcpOutSegs++;
+}
+
+
+#ifdef CONFIG_IP_TRANSPARENT_PROXY
+
+/*
+   Seems, I never wrote nothing more stupid.
+   I hope Gods will forgive me, but I cannot forgive myself 8)
+                                                --ANK (981001)
+ */
+
+static struct sock *tcp_v4_search_proxy_openreq(struct sk_buff *skb)
+{
+	struct iphdr *iph = skb->nh.iph;
+	struct tcphdr *th = (struct tcphdr *)(skb->nh.raw + iph->ihl*4);
+	struct sock *sk;
+	int i;
+
+	for (i=0; i<TCP_LHTABLE_SIZE; i++) {
+		for(sk = tcp_listening_hash[i]; sk; sk = sk->next) {
+			struct open_request *dummy;
+			if (tcp_v4_search_req(&sk->tp_pinfo.af_tcp, iph,
+					      th, &dummy) &&
+			    (!sk->bound_dev_if ||
+			     sk->bound_dev_if == skb->dev->ifindex))
+				return sk;
+		}
+	}
+	return NULL;
+}
+
+/*
+ *	Check whether a received TCP packet might be for one of our
+ *	connections.
+ */
+
+int tcp_chkaddr(struct sk_buff *skb)
+{
+	struct iphdr *iph = skb->nh.iph;
+	struct tcphdr *th = (struct tcphdr *)(skb->nh.raw + iph->ihl*4);
+	struct sock *sk;
+
+	sk = tcp_v4_lookup(iph->saddr, th->source, iph->daddr,
+			   th->dest, skb->dev->ifindex);
+
+	if (!sk)
+		return tcp_v4_search_proxy_openreq(skb) != NULL;
+
+	if (sk->state == TCP_LISTEN) {
+		struct open_request *dummy;
+		if (tcp_v4_search_req(&sk->tp_pinfo.af_tcp, skb->nh.iph,
+				      th, &dummy) &&
+		    (!sk->bound_dev_if ||
+		     sk->bound_dev_if == skb->dev->ifindex))
+			return 1;
+	}
+
+	/* 0 means accept all LOCAL addresses here, not all the world... */
+
+	if (sk->rcv_saddr == 0)
+		return 0;
+
+	return 1;
+}
+#endif
+
+/*
+ *	Send a SYN-ACK after having received an ACK. 
+ *	This still operates on a open_request only, not on a big
+ *	socket.
+ */ 
+static void tcp_v4_send_synack(struct sock *sk, struct open_request *req)
+{
+	struct rtable *rt;
+	struct ip_options *opt;
+	struct sk_buff * skb;
+	int mss;
+
+	/* First, grab a route. */
+	opt = req->af.v4_req.opt;
+	if(ip_route_output(&rt, ((opt && opt->srr) ?
+				 opt->faddr :
+				 req->af.v4_req.rmt_addr),
+			   req->af.v4_req.loc_addr,
+			   RT_TOS(sk->ip_tos) | RTO_CONN | sk->localroute,
+			   sk->bound_dev_if)) {
+		ip_statistics.IpOutNoRoutes++;
+		return;
+	}
+	if(opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) {
+		ip_rt_put(rt);
+		ip_statistics.IpOutNoRoutes++;
+		return;
+	}
+
+	mss = rt->u.dst.pmtu - sizeof(struct iphdr) - sizeof(struct tcphdr);
+
+	skb = tcp_make_synack(sk, &rt->u.dst, req, mss);
+	if (skb) {
+		struct tcphdr *th = skb->h.th;
+
+#ifdef CONFIG_IP_TRANSPARENT_PROXY
+		th->source = req->lcl_port; /* LVE */
+#endif
+
+		th->check = tcp_v4_check(th, skb->len,
+					 req->af.v4_req.loc_addr, req->af.v4_req.rmt_addr,
+					 csum_partial((char *)th, skb->len, skb->csum));
+
+		ip_build_and_send_pkt(skb, sk, req->af.v4_req.loc_addr,
+				      req->af.v4_req.rmt_addr, req->af.v4_req.opt);
+	}
+	ip_rt_put(rt);
+}
+
+/*
+ *	IPv4 open_request destructor.
+ */ 
+static void tcp_v4_or_free(struct open_request *req)
+{
+	if(!req->sk && req->af.v4_req.opt)
+		kfree_s(req->af.v4_req.opt, optlength(req->af.v4_req.opt));
+}
+
+static inline void syn_flood_warning(struct sk_buff *skb)
+{
+	static unsigned long warntime;
+	
+	if (jiffies - warntime > HZ*60) {
+		warntime = jiffies;
+		printk(KERN_INFO 
+		       "possible SYN flooding on port %d. Sending cookies.\n",  
+		       ntohs(skb->h.th->dest));
+	}
+}
+
+/* 
+ * Save and compile IPv4 options into the open_request if needed. 
+ */
+static inline struct ip_options * 
+tcp_v4_save_options(struct sock *sk, struct sk_buff *skb)
+{
+	struct ip_options *opt = &(IPCB(skb)->opt);
+	struct ip_options *dopt = NULL; 
+
+	if (opt && opt->optlen) {
+		int opt_size = optlength(opt); 
+		dopt = kmalloc(opt_size, GFP_ATOMIC);
+		if (dopt) {
+			if (ip_options_echo(dopt, skb)) {
+				kfree_s(dopt, opt_size);
+				dopt = NULL;
+			}
+		}
+	}
+	return dopt;
+}
+
+/* 
+ * Maximum number of SYN_RECV sockets in queue per LISTEN socket.
+ * One SYN_RECV socket costs about 80bytes on a 32bit machine.
+ * It would be better to replace it with a global counter for all sockets
+ * but then some measure against one socket starving all other sockets
+ * would be needed.
+ */
+int sysctl_max_syn_backlog = 128; 
+
+struct or_calltable or_ipv4 = {
+	tcp_v4_send_synack,
+	tcp_v4_or_free,
+	tcp_v4_send_reset
+};
+
+#define BACKLOG(sk) ((sk)->tp_pinfo.af_tcp.syn_backlog) /* lvalue! */
+#define BACKLOGMAX(sk) sysctl_max_syn_backlog
+
+int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb, __u32 isn)
+{
+	struct tcp_opt tp;
+	struct open_request *req;
+	struct tcphdr *th = skb->h.th;
+	__u32 saddr = skb->nh.iph->saddr;
+	__u32 daddr = skb->nh.iph->daddr;
+#ifdef CONFIG_SYN_COOKIES
+	int want_cookie = 0;
+#else
+#define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
+#endif
+
+	/* If the socket is dead, don't accept the connection.	*/
+	if (sk->dead) 
+		goto dead; 
+
+	/* Never answer to SYNs send to broadcast or multicast */
+	if (((struct rtable *)skb->dst)->rt_flags & 
+	    (RTCF_BROADCAST|RTCF_MULTICAST))
+		goto drop; 
+
+	/* XXX: Check against a global syn pool counter. */
+	if (BACKLOG(sk) > BACKLOGMAX(sk)) {
+#ifdef CONFIG_SYN_COOKIES
+		if (sysctl_tcp_syncookies) {
+			syn_flood_warning(skb);
+			want_cookie = 1; 
+		} else
+#endif
+		goto drop;
+	} else { 
+		if (isn == 0)
+			isn = tcp_v4_init_sequence(sk, skb);
+		BACKLOG(sk)++;
+	}
+
+	req = tcp_openreq_alloc();
+	if (req == NULL) {
+		goto dropbacklog;
+	}
+
+	req->rcv_wnd = 0;		/* So that tcp_send_synack() knows! */
+
+	req->rcv_isn = TCP_SKB_CB(skb)->seq;
+ 	tp.tstamp_ok = tp.sack_ok = tp.wscale_ok = tp.snd_wscale = 0;
+
+	tp.mss_clamp = 65535;
+	tcp_parse_options(NULL, th, &tp, want_cookie);
+	if (tp.mss_clamp == 65535)
+		tp.mss_clamp = 576 - sizeof(struct iphdr) - sizeof(struct iphdr);
+
+	if (sk->tp_pinfo.af_tcp.user_mss && sk->tp_pinfo.af_tcp.user_mss < tp.mss_clamp)
+		tp.mss_clamp = sk->tp_pinfo.af_tcp.user_mss;
+	req->mss = tp.mss_clamp;
+
+	if (tp.saw_tstamp)
+		req->ts_recent = tp.rcv_tsval;
+	req->tstamp_ok = tp.tstamp_ok;
+	req->sack_ok = tp.sack_ok;
+	req->snd_wscale = tp.snd_wscale;
+	req->wscale_ok = tp.wscale_ok;
+	req->rmt_port = th->source;
+#ifdef CONFIG_IP_TRANSPARENT_PROXY
+	req->lcl_port = th->dest ; /* LVE */
+#endif
+	req->af.v4_req.loc_addr = daddr;
+	req->af.v4_req.rmt_addr = saddr;
+
+	/* Note that we ignore the isn passed from the TIME_WAIT
+	 * state here. That's the price we pay for cookies.
+	 */
+	if (want_cookie)
+		isn = cookie_v4_init_sequence(sk, skb, &req->mss);
+
+	req->snt_isn = isn;
+
+	req->af.v4_req.opt = tcp_v4_save_options(sk, skb);
+
+	req->class = &or_ipv4;
+	req->retrans = 0;
+	req->sk = NULL;
+
+	tcp_v4_send_synack(sk, req);
+
+	if (want_cookie) {
+		if (req->af.v4_req.opt)
+			kfree(req->af.v4_req.opt);
+		tcp_v4_or_free(req); 
+	   	tcp_openreq_free(req); 
+	} else {
+		req->expires = jiffies + TCP_TIMEOUT_INIT;
+		tcp_inc_slow_timer(TCP_SLT_SYNACK);
+		tcp_synq_queue(&sk->tp_pinfo.af_tcp, req);
+	}
+
+	return 0;
+
+dead:
+	SOCK_DEBUG(sk, "Reset on %p: Connect on dead socket.\n",sk);
+	tcp_statistics.TcpAttemptFails++;
+	return -ENOTCONN; /* send reset */
+
+dropbacklog:
+	if (!want_cookie) 
+		BACKLOG(sk)--;
+drop:
+	tcp_statistics.TcpAttemptFails++;
+	return 0;
+}
+
+/* This is not only more efficient than what we used to do, it eliminates
+ * a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM
+ *
+ * This function wants to be moved to a common for IPv[46] file. --ANK
+ */
+struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req, struct sk_buff *skb)
+{
+	struct sock *newsk = sk_alloc(PF_INET, GFP_ATOMIC, 0);
+
+	if(newsk != NULL) {
+		struct tcp_opt *newtp;
+#ifdef CONFIG_FILTER
+		struct sk_filter *filter;
+#endif
+
+		memcpy(newsk, sk, sizeof(*newsk));
+		newsk->sklist_next = NULL;
+		newsk->state = TCP_SYN_RECV;
+
+		/* Clone the TCP header template */
+		newsk->dport = req->rmt_port;
+
+		atomic_set(&newsk->sock_readers, 0);
+		atomic_set(&newsk->rmem_alloc, 0);
+		skb_queue_head_init(&newsk->receive_queue);
+		atomic_set(&newsk->wmem_alloc, 0);
+		skb_queue_head_init(&newsk->write_queue);
+		atomic_set(&newsk->omem_alloc, 0);
+
+		newsk->done = 0;
+		newsk->proc = 0;
+		skb_queue_head_init(&newsk->back_log);
+		skb_queue_head_init(&newsk->error_queue);
+#ifdef CONFIG_FILTER
+		if ((filter = newsk->filter) != NULL)
+			sk_filter_charge(newsk, filter);
+#endif
+
+		/* Now setup tcp_opt */
+		newtp = &(newsk->tp_pinfo.af_tcp);
+		newtp->pred_flags = 0;
+		newtp->rcv_nxt = req->rcv_isn + 1;
+		newtp->snd_nxt = req->snt_isn + 1;
+		newtp->snd_una = req->snt_isn + 1;
+		newtp->srtt = 0;
+		newtp->ato = 0;
+		newtp->snd_wl1 = req->rcv_isn;
+		newtp->snd_wl2 = req->snt_isn;
+
+		/* RFC1323: The window in SYN & SYN/ACK segments
+		 * is never scaled.
+		 */
+		newtp->snd_wnd = ntohs(skb->h.th->window);
+
+		newtp->max_window = newtp->snd_wnd;
+		newtp->pending = 0;
+		newtp->retransmits = 0;
+		newtp->last_ack_sent = req->rcv_isn + 1;
+		newtp->backoff = 0;
+		newtp->mdev = TCP_TIMEOUT_INIT;
+
+		/* So many TCP implementations out there (incorrectly) count the
+		 * initial SYN frame in their delayed-ACK and congestion control
+		 * algorithms that we must have the following bandaid to talk
+		 * efficiently to them.  -DaveM
+		 */
+		newtp->snd_cwnd = 2;
+
+		newtp->rto = TCP_TIMEOUT_INIT;
+		newtp->packets_out = 0;
+		newtp->fackets_out = 0;
+		newtp->retrans_out = 0;
+		newtp->high_seq = 0;
+		newtp->snd_ssthresh = 0x7fffffff;
+		newtp->snd_cwnd_cnt = 0;
+		newtp->dup_acks = 0;
+		newtp->delayed_acks = 0;
+		init_timer(&newtp->retransmit_timer);
+		newtp->retransmit_timer.function = &tcp_retransmit_timer;
+		newtp->retransmit_timer.data = (unsigned long) newsk;
+		init_timer(&newtp->delack_timer);
+		newtp->delack_timer.function = &tcp_delack_timer;
+		newtp->delack_timer.data = (unsigned long) newsk;
+		skb_queue_head_init(&newtp->out_of_order_queue);
+		newtp->send_head = newtp->retrans_head = NULL;
+		newtp->rcv_wup = req->rcv_isn + 1;
+		newtp->write_seq = req->snt_isn + 1;
+		newtp->copied_seq = req->rcv_isn + 1;
+
+		newtp->saw_tstamp = 0;
+		newtp->mss_clamp = req->mss;
+
+		init_timer(&newtp->probe_timer);
+		newtp->probe_timer.function = &tcp_probe_timer;
+		newtp->probe_timer.data = (unsigned long) newsk;
+		newtp->probes_out = 0;
+		newtp->syn_seq = req->rcv_isn;
+		newtp->fin_seq = req->rcv_isn;
+		newtp->urg_data = 0;
+		tcp_synq_init(newtp);
+		newtp->syn_backlog = 0;
+		if (skb->len >= 536)
+			newtp->last_seg_size = skb->len; 
+
+		/* Back to base struct sock members. */
+		newsk->err = 0;
+		newsk->ack_backlog = 0;
+		newsk->max_ack_backlog = SOMAXCONN;
+		newsk->priority = 0;
+
+		/* IP layer stuff */
+		newsk->timeout = 0;
+		init_timer(&newsk->timer);
+		newsk->timer.function = &net_timer;
+		newsk->timer.data = (unsigned long) newsk;
+		newsk->socket = NULL;
+
+		newtp->tstamp_ok = req->tstamp_ok;
+		if((newtp->sack_ok = req->sack_ok) != 0)
+			newtp->num_sacks = 0;
+		newtp->window_clamp = req->window_clamp;
+		newtp->rcv_wnd = req->rcv_wnd;
+		newtp->wscale_ok = req->wscale_ok;
+		if (newtp->wscale_ok) {
+			newtp->snd_wscale = req->snd_wscale;
+			newtp->rcv_wscale = req->rcv_wscale;
+		} else {
+			newtp->snd_wscale = newtp->rcv_wscale = 0;
+			newtp->window_clamp = min(newtp->window_clamp,65535);
+		}
+		if (newtp->tstamp_ok) {
+			newtp->ts_recent = req->ts_recent;
+			newtp->ts_recent_stamp = tcp_time_stamp;
+			newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
+		} else {
+			newtp->tcp_header_len = sizeof(struct tcphdr);
+		}
+	}
+	return newsk;
+}
+
+/* 
+ * The three way handshake has completed - we got a valid synack - 
+ * now create the new socket. 
+ */
+struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
+				   struct open_request *req,
+				   struct dst_entry *dst)
+{
+	struct ip_options *opt = req->af.v4_req.opt;
+	struct tcp_opt *newtp;
+	struct sock *newsk;
+
+	if (sk->ack_backlog > sk->max_ack_backlog)
+		goto exit; /* head drop */
+	if (dst == NULL) { 
+		struct rtable *rt;
+		
+		if (ip_route_output(&rt,
+			opt && opt->srr ? opt->faddr : req->af.v4_req.rmt_addr,
+			req->af.v4_req.loc_addr, sk->ip_tos|RTO_CONN, 0))
+			return NULL;
+	        dst = &rt->u.dst;
+	}
+#ifdef CONFIG_IP_TRANSPARENT_PROXY
+	/* The new socket created for transparent proxy may fall
+	 * into a non-existed bind bucket because sk->num != newsk->num.
+	 * Ensure existance of the bucket now. The placement of the check
+	 * later will require to destroy just created newsk in the case of fail.
+	 * 1998/04/22 Andrey V. Savochkin <saw@msu.ru>
+	 */
+	if (__tcp_bucket_check(ntohs(skb->h.th->dest)))
+		goto exit;
+#endif
+
+	newsk = tcp_create_openreq_child(sk, req, skb);
+	if (!newsk) 
+		goto exit;
+
+	sk->tp_pinfo.af_tcp.syn_backlog--;
+	sk->ack_backlog++;
+
+	newsk->dst_cache = dst;
+
+	newtp = &(newsk->tp_pinfo.af_tcp);
+	newsk->daddr = req->af.v4_req.rmt_addr;
+	newsk->saddr = req->af.v4_req.loc_addr;
+	newsk->rcv_saddr = req->af.v4_req.loc_addr;
+#ifdef CONFIG_IP_TRANSPARENT_PROXY
+	newsk->num = ntohs(skb->h.th->dest);
+	newsk->sport = req->lcl_port;
+#endif
+	newsk->opt = req->af.v4_req.opt;
+	newtp->ext_header_len = 0;
+	if (newsk->opt)
+		newtp->ext_header_len = newsk->opt->optlen;
+
+	tcp_sync_mss(newsk, dst->pmtu);
+	newtp->rcv_mss = newtp->mss_clamp;
+
+	/* It would be better to use newtp->mss_clamp here */
+	if (newsk->rcvbuf < (3 * newtp->pmtu_cookie))
+		newsk->rcvbuf = min ((3 * newtp->pmtu_cookie), sysctl_rmem_max);
+	if (newsk->sndbuf < (3 * newtp->pmtu_cookie))
+		newsk->sndbuf = min ((3 * newtp->pmtu_cookie), sysctl_wmem_max);
+ 
+	/* We run in BH processing itself or within a BH atomic
+	 * sequence (backlog) so no locking is needed.
+	 */
+	__tcp_v4_hash(newsk);
+	__tcp_inherit_port(sk, newsk);
+	__add_to_prot_sklist(newsk);
+
+	sk->data_ready(sk, 0); /* Deliver SIGIO */ 
+
+	return newsk;
+
+exit:
+	dst_release(dst);
+	return NULL;
+}
+
+static void tcp_v4_rst_req(struct sock *sk, struct sk_buff *skb)
+{
+	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+	struct open_request *req, *prev;
+
+	req = tcp_v4_search_req(tp,skb->nh.iph, skb->h.th, &prev);
+	if (!req)
+		return;
+	/* Sequence number check required by RFC793 */
+	if (before(TCP_SKB_CB(skb)->seq, req->rcv_isn) ||
+	    after(TCP_SKB_CB(skb)->seq, req->rcv_isn+1))
+		return;
+	tcp_synq_unlink(tp, req, prev);
+	(req->sk ? sk->ack_backlog : tp->syn_backlog)--;
+	req->class->destructor(req);
+	tcp_openreq_free(req); 
+
+	net_statistics.EmbryonicRsts++;
+}
+
+/* Check for embryonic sockets (open_requests) We check packets with
+ * only the SYN bit set against the open_request queue too: This
+ * increases connection latency a bit, but is required to detect
+ * retransmitted SYNs.  
+ */
+static inline struct sock *tcp_v4_hnd_req(struct sock *sk,struct sk_buff *skb)
+{
+	struct tcphdr *th = skb->h.th; 
+	u32 flg = ((u32 *)th)[3]; 
+
+	/* Check for RST */
+	if (flg & __constant_htonl(0x00040000)) {
+		tcp_v4_rst_req(sk, skb);
+		return NULL;
+	}
+
+	/* Check for SYN|ACK */
+	if (flg & __constant_htonl(0x00120000)) {
+		struct open_request *req, *dummy; 
+		struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+		/* Find possible connection requests. */
+		req = tcp_v4_search_req(tp, skb->nh.iph, th, &dummy); 
+		if (req) {
+			sk = tcp_check_req(sk, skb, req);
+		}
+#ifdef CONFIG_SYN_COOKIES
+		else if (flg == __constant_htonl(0x00120000))  {
+			sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
+		}
+#endif
+	}
+	return sk; 
+}
+
+int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
+{
+#ifdef CONFIG_FILTER
+	struct sk_filter *filter = sk->filter;
+	if (filter && sk_filter(skb, filter))
+		goto discard;
+#endif /* CONFIG_FILTER */
+
+	/* 
+	 * This doesn't check if the socket has enough room for the packet.
+	 * Either process the packet _without_ queueing it and then free it,
+	 * or do the check later.
+	 */
+	skb_set_owner_r(skb, sk);
+
+	if (sk->state == TCP_ESTABLISHED) { /* Fast path */
+		if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
+			goto reset;
+		return 0; 
+	} 
+
+
+	if (sk->state == TCP_LISTEN) { 
+		struct sock *nsk;
+		
+		nsk = tcp_v4_hnd_req(sk, skb);
+		if (!nsk) 
+			goto discard;
+
+		/*
+		 * Queue it on the new socket if the new socket is active,
+		 * otherwise we just shortcircuit this and continue with
+		 * the new socket..
+		 */
+		if (atomic_read(&nsk->sock_readers)) {
+			skb_orphan(skb);
+			__skb_queue_tail(&nsk->back_log, skb);
+			return 0;
+		}
+		sk = nsk;
+	}
+	
+	if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
+		goto reset;
+	return 0;
+
+reset:
+	tcp_v4_send_reset(skb);
+discard:
+	kfree_skb(skb);
+	/* Be careful here. If this function gets more complicated and
+	 * gcc suffers from register pressure on the x86, sk (in %ebx) 
+	 * might be destroyed here. This current version compiles correctly,
+	 * but you have been warned.
+	 */
+	return 0;
+}
+
+/*
+ *	From tcp_input.c
+ */
+
+int tcp_v4_rcv(struct sk_buff *skb, unsigned short len)
+{
+	struct tcphdr *th;
+	struct sock *sk;
+
+	if (skb->pkt_type!=PACKET_HOST)
+		goto discard_it;
+
+	th = skb->h.th;
+
+	/* Pull up the IP header. */
+	__skb_pull(skb, skb->h.raw - skb->data);
+
+	/* Count it even if it's bad */
+	tcp_statistics.TcpInSegs++;
+
+	len = skb->len;
+	if (len < sizeof(struct tcphdr))
+		goto bad_packet;
+
+	/* Try to use the device checksum if provided. */
+	switch (skb->ip_summed) {
+	case CHECKSUM_NONE:
+		skb->csum = csum_partial((char *)th, len, 0);
+	case CHECKSUM_HW:
+		if (tcp_v4_check(th,len,skb->nh.iph->saddr,skb->nh.iph->daddr,skb->csum)) {
+			NETDEBUG(printk(KERN_DEBUG "TCPv4 bad checksum "
+					"from %d.%d.%d.%d:%04x to %d.%d.%d.%d:%04x, "
+					"len=%d/%d/%d\n",
+					NIPQUAD(skb->nh.iph->saddr),
+					ntohs(th->source), 
+					NIPQUAD(skb->nh.iph->daddr),
+					ntohs(th->dest),
+					len, skb->len,
+					ntohs(skb->nh.iph->tot_len)));
+	bad_packet:		
+			tcp_statistics.TcpInErrs++;
+			goto discard_it;
+		}
+	default:
+		/* CHECKSUM_UNNECESSARY */
+	}
+
+	if((th->doff * 4) < sizeof(struct tcphdr) ||
+	   len < (th->doff * 4))
+		goto bad_packet;
+
+#ifdef CONFIG_IP_TRANSPARENT_PROXY
+	if (IPCB(skb)->redirport)
+		sk = tcp_v4_proxy_lookup(th->dest, skb->nh.iph->saddr, th->source,
+					 skb->nh.iph->daddr, skb->dev,
+					 IPCB(skb)->redirport, skb->dev->ifindex);
+	else {
+#endif
+		sk = __tcp_v4_lookup(th, skb->nh.iph->saddr, th->source,
+				     skb->nh.iph->daddr, th->dest, skb->dev->ifindex);
+#ifdef CONFIG_IP_TRANSPARENT_PROXY
+		if (!sk)
+			sk = tcp_v4_search_proxy_openreq(skb);
+	}
+#endif
+	if (!sk)
+		goto no_tcp_socket;
+	if(!ipsec_sk_policy(sk,skb))
+		goto discard_it;
+
+	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
+	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
+				    len - th->doff*4);
+	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
+
+	skb->used = 0;
+
+	if (sk->state == TCP_TIME_WAIT)
+		goto do_time_wait;
+	if (!atomic_read(&sk->sock_readers))
+		return tcp_v4_do_rcv(sk, skb);
+
+	__skb_queue_tail(&sk->back_log, skb);
+	return 0;
+
+no_tcp_socket:
+	tcp_v4_send_reset(skb);
+
+discard_it:
+	/* Discard frame. */
+	kfree_skb(skb);
+  	return 0;
+
+do_time_wait:
+	/* Sorry for the ugly switch. 2.3 will have a better solution. */ 
+	switch (tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
+					   skb, th, skb->len)) {
+	case TCP_TW_ACK:
+		tcp_v4_send_ack(skb,
+				((struct tcp_tw_bucket *)sk)->snd_nxt,
+				((struct tcp_tw_bucket *)sk)->rcv_nxt,
+				((struct tcp_tw_bucket *)sk)->window);
+		goto discard_it; 
+	case TCP_TW_RST:
+		goto no_tcp_socket; 
+	default:
+		goto discard_it; 
+	}
+}
+
+static void __tcp_v4_rehash(struct sock *sk)
+{
+	struct sock **skp = &tcp_established_hash[(sk->hashent = tcp_sk_hashfn(sk))];
+
+	SOCKHASH_LOCK();
+	if(sk->pprev) {
+		if(sk->next)
+			sk->next->pprev = sk->pprev;
+		*sk->pprev = sk->next;
+		sk->pprev = NULL;
+		tcp_reg_zap(sk);
+	}
+	if((sk->next = *skp) != NULL)
+		(*skp)->pprev = &sk->next;
+	*skp = sk;
+	sk->pprev = skp;
+	SOCKHASH_UNLOCK();
+}
+
+int tcp_v4_rebuild_header(struct sock *sk)
+{
+	struct rtable *rt = (struct rtable *)sk->dst_cache;
+	__u32 new_saddr;
+        int want_rewrite = sysctl_ip_dynaddr && sk->state == TCP_SYN_SENT;
+
+	if(rt == NULL)
+		return 0;
+
+	/* Force route checking if want_rewrite.
+	 * The idea is good, the implementation is disguisting.
+	 * Well, if I made bind on this socket, you cannot randomly ovewrite
+	 * its source address. --ANK
+	 */
+	if (want_rewrite) {
+		int tmp;
+		struct rtable *new_rt;
+		__u32 old_saddr = rt->rt_src;
+
+		/* Query new route using another rt buffer */
+		tmp = ip_route_connect(&new_rt, rt->rt_dst, 0,
+					RT_TOS(sk->ip_tos)|sk->localroute,
+					sk->bound_dev_if);
+
+		/* Only useful if different source addrs */
+		if (tmp == 0) {
+			/*
+			 *	Only useful if different source addrs
+			 */
+			if (new_rt->rt_src != old_saddr ) {
+				dst_release(sk->dst_cache);
+				sk->dst_cache = &new_rt->u.dst;
+				rt = new_rt;
+				goto do_rewrite;
+			} 
+			dst_release(&new_rt->u.dst);
+		}
+	}
+	if (rt->u.dst.obsolete) {
+		int err;
+		err = ip_route_output(&rt, rt->rt_dst, rt->rt_src, rt->key.tos|RTO_CONN, rt->key.oif);
+		if (err) {
+			sk->err_soft=-err;
+			sk->error_report(sk);
+			return -1;
+		}
+		dst_release(xchg(&sk->dst_cache, &rt->u.dst));
+	}
+
+	return 0;
+
+do_rewrite:
+	new_saddr = rt->rt_src;
+                
+	/* Ouch!, this should not happen. */
+	if (!sk->saddr || !sk->rcv_saddr) {
+		printk(KERN_WARNING "tcp_v4_rebuild_header(): not valid sock addrs: "
+		       "saddr=%08lX rcv_saddr=%08lX\n",
+		       ntohl(sk->saddr), 
+		       ntohl(sk->rcv_saddr));
+		return 0;
+	}
+
+	if (new_saddr != sk->saddr) {
+		if (sysctl_ip_dynaddr > 1) {
+			printk(KERN_INFO "tcp_v4_rebuild_header(): shifting sk->saddr "
+			       "from %d.%d.%d.%d to %d.%d.%d.%d\n",
+			       NIPQUAD(sk->saddr), 
+			       NIPQUAD(new_saddr));
+		}
+
+		sk->saddr = new_saddr;
+		sk->rcv_saddr = new_saddr;
+
+		/* XXX The only one ugly spot where we need to
+		 * XXX really change the sockets identity after
+		 * XXX it has entered the hashes. -DaveM
+		 */
+		__tcp_v4_rehash(sk);
+	} 
+        
+	return 0;
+}
+
+static struct sock * tcp_v4_get_sock(struct sk_buff *skb, struct tcphdr *th)
+{
+	return tcp_v4_lookup(skb->nh.iph->saddr, th->source,
+			     skb->nh.iph->daddr, th->dest, skb->dev->ifindex);
+}
+
+static void v4_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
+{
+	struct sockaddr_in *sin = (struct sockaddr_in *) uaddr;
+
+	sin->sin_family		= AF_INET;
+	sin->sin_addr.s_addr	= sk->daddr;
+	sin->sin_port		= sk->dport;
+}
+
+struct tcp_func ipv4_specific = {
+	ip_queue_xmit,
+	tcp_v4_send_check,
+	tcp_v4_rebuild_header,
+	tcp_v4_conn_request,
+	tcp_v4_syn_recv_sock,
+	tcp_v4_get_sock,
+	sizeof(struct iphdr),
+
+	ip_setsockopt,
+	ip_getsockopt,
+	v4_addr2sockaddr,
+	sizeof(struct sockaddr_in)
+};
+
+/* NOTE: A lot of things set to zero explicitly by call to
+ *       sk_alloc() so need not be done here.
+ */
+static int tcp_v4_init_sock(struct sock *sk)
+{
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+	skb_queue_head_init(&tp->out_of_order_queue);
+	tcp_init_xmit_timers(sk);
+
+	tp->rto  = TCP_TIMEOUT_INIT;		/*TCP_WRITE_TIME*/
+	tp->mdev = TCP_TIMEOUT_INIT;
+	tp->mss_clamp = ~0;
+      
+	/* So many TCP implementations out there (incorrectly) count the
+	 * initial SYN frame in their delayed-ACK and congestion control
+	 * algorithms that we must have the following bandaid to talk
+	 * efficiently to them.  -DaveM
+	 */
+	tp->snd_cwnd = 2;
+
+	/* See draft-stevens-tcpca-spec-01 for discussion of the
+	 * initialization of these values.
+	 */
+	tp->snd_cwnd_cnt = 0;
+	tp->snd_ssthresh = 0x7fffffff;	/* Infinity */
+
+	sk->state = TCP_CLOSE;
+	sk->max_ack_backlog = SOMAXCONN;
+	tp->rcv_mss = 536; 
+
+	sk->write_space = tcp_write_space; 
+
+	/* Init SYN queue. */
+	tcp_synq_init(tp);
+
+	sk->tp_pinfo.af_tcp.af_specific = &ipv4_specific;
+
+	return 0;
+}
+
+static int tcp_v4_destroy_sock(struct sock *sk)
+{
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	struct sk_buff *skb;
+
+	tcp_clear_xmit_timers(sk);
+
+	if (sk->keepopen)
+		tcp_dec_slow_timer(TCP_SLT_KEEPALIVE);
+
+	/* Cleanup up the write buffer. */
+  	while((skb = __skb_dequeue(&sk->write_queue)) != NULL)
+		kfree_skb(skb);
+
+	/* Cleans up our, hopefuly empty, out_of_order_queue. */
+  	while((skb = __skb_dequeue(&tp->out_of_order_queue)) != NULL)
+		kfree_skb(skb);
+
+	/* Clean up a referenced TCP bind bucket, this only happens if a
+	 * port is allocated for a socket, but it never fully connects.
+	 */
+	if(sk->prev != NULL)
+		tcp_put_port(sk);
+
+	return 0;
+}
+
+struct proto tcp_prot = {
+	(struct sock *)&tcp_prot,	/* sklist_next */
+	(struct sock *)&tcp_prot,	/* sklist_prev */
+	tcp_close,			/* close */
+	tcp_v4_connect,			/* connect */
+	tcp_accept,			/* accept */
+	NULL,				/* retransmit */
+	tcp_write_wakeup,		/* write_wakeup */
+	tcp_read_wakeup,		/* read_wakeup */
+	tcp_poll,			/* poll */
+	tcp_ioctl,			/* ioctl */
+	tcp_v4_init_sock,		/* init */
+	tcp_v4_destroy_sock,		/* destroy */
+	tcp_shutdown,			/* shutdown */
+	tcp_setsockopt,			/* setsockopt */
+	tcp_getsockopt,			/* getsockopt */
+	tcp_v4_sendmsg,			/* sendmsg */
+	tcp_recvmsg,			/* recvmsg */
+	NULL,				/* bind */
+	tcp_v4_do_rcv,			/* backlog_rcv */
+	tcp_v4_hash,			/* hash */
+	tcp_v4_unhash,			/* unhash */
+	tcp_v4_get_port,		/* get_port */
+	128,				/* max_header */
+	0,				/* retransmits */
+	"TCP",				/* name */
+	0,				/* inuse */
+	0				/* highestinuse */
+};
+
+
+
+__initfunc(void tcp_v4_init(struct net_proto_family *ops))
+{
+	int err;
+
+	tcp_inode.i_mode = S_IFSOCK;
+	tcp_inode.i_sock = 1;
+	tcp_inode.i_uid = 0;
+	tcp_inode.i_gid = 0;
+
+	tcp_socket->inode = &tcp_inode;
+	tcp_socket->state = SS_UNCONNECTED;
+	tcp_socket->type=SOCK_RAW;
+
+	if ((err=ops->create(tcp_socket, IPPROTO_TCP))<0)
+		panic("Failed to create the TCP control socket.\n");
+	tcp_socket->sk->allocation=GFP_ATOMIC;
+	tcp_socket->sk->num = 256;		/* Don't receive any data */
+	tcp_socket->sk->ip_ttl = MAXTTL;
+}
diff --git a/pfinet/linux-src/net/ipv4/tcp_output.c b/pfinet/linux-src/net/ipv4/tcp_output.c
new file mode 100644
index 00000000..2ac5e8a2
--- /dev/null
+++ b/pfinet/linux-src/net/ipv4/tcp_output.c
@@ -0,0 +1,1143 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		Implementation of the Transmission Control Protocol(TCP).
+ *
+ * Version:	$Id: tcp_output.c,v 1.108.2.1 1999/05/14 23:07:36 davem Exp $
+ *
+ * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
+ *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *		Mark Evans, <evansmp@uhura.aston.ac.uk>
+ *		Corey Minyard <wf-rch!minyard@relay.EU.net>
+ *		Florian La Roche, <flla@stud.uni-sb.de>
+ *		Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
+ *		Linus Torvalds, <torvalds@cs.helsinki.fi>
+ *		Alan Cox, <gw4pts@gw4pts.ampr.org>
+ *		Matthew Dillon, <dillon@apollo.west.oic.com>
+ *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
+ *		Jorge Cwik, <jorge@laser.satlink.net>
+ */
+
+/*
+ * Changes:	Pedro Roque	:	Retransmit queue handled by TCP.
+ *				:	Fragmentation on mtu decrease
+ *				:	Segment collapse on retransmit
+ *				:	AF independence
+ *
+ *		Linus Torvalds	:	send_delayed_ack
+ *		David S. Miller	:	Charge memory using the right skb
+ *					during syn/ack processing.
+ *		David S. Miller :	Output engine completely rewritten.
+ *		Andrea Arcangeli:	SYNACK carry ts_recent in tsecr.
+ *
+ */
+
+#include <net/tcp.h>
+
+extern int sysctl_tcp_timestamps;
+extern int sysctl_tcp_window_scaling;
+extern int sysctl_tcp_sack;
+
+/* People can turn this off for buggy TCP's found in printers etc. */
+int sysctl_tcp_retrans_collapse = 1;
+
+/* Get rid of any delayed acks, we sent one already.. */
+static __inline__ void clear_delayed_acks(struct sock * sk)
+{
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+	tp->delayed_acks = 0;
+	if(tcp_in_quickack_mode(tp))
+		tcp_exit_quickack_mode(tp);
+	tcp_clear_xmit_timer(sk, TIME_DACK);
+}
+
+static __inline__ void update_send_head(struct sock *sk)
+{
+	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+	
+	tp->send_head = tp->send_head->next;
+	if (tp->send_head == (struct sk_buff *) &sk->write_queue)
+		tp->send_head = NULL;
+}
+
+/* This routine actually transmits TCP packets queued in by
+ * tcp_do_sendmsg().  This is used by both the initial
+ * transmission and possible later retransmissions.
+ * All SKB's seen here are completely headerless.  It is our
+ * job to build the TCP header, and pass the packet down to
+ * IP so it can do the same plus pass the packet off to the
+ * device.
+ *
+ * We are working here with either a clone of the original
+ * SKB, or a fresh unique copy made by the retransmit engine.
+ */
+void tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
+{
+	if(skb != NULL) {
+		struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+		struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
+		int tcp_header_size = tp->tcp_header_len;
+		struct tcphdr *th;
+		int sysctl_flags;
+
+#define SYSCTL_FLAG_TSTAMPS	0x1
+#define SYSCTL_FLAG_WSCALE	0x2
+#define SYSCTL_FLAG_SACK	0x4
+
+		sysctl_flags = 0;
+		if(tcb->flags & TCPCB_FLAG_SYN) {
+			tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS;
+			if(sysctl_tcp_timestamps) {
+				tcp_header_size += TCPOLEN_TSTAMP_ALIGNED;
+				sysctl_flags |= SYSCTL_FLAG_TSTAMPS;
+			}
+			if(sysctl_tcp_window_scaling) {
+				tcp_header_size += TCPOLEN_WSCALE_ALIGNED;
+				sysctl_flags |= SYSCTL_FLAG_WSCALE;
+			}
+			if(sysctl_tcp_sack) {
+				sysctl_flags |= SYSCTL_FLAG_SACK;
+				if(!(sysctl_flags & SYSCTL_FLAG_TSTAMPS))
+					tcp_header_size += TCPOLEN_SACKPERM_ALIGNED;
+			}
+		} else if(tp->sack_ok && tp->num_sacks) {
+			/* A SACK is 2 pad bytes, a 2 byte header, plus
+			 * 2 32-bit sequence numbers for each SACK block.
+			 */
+			tcp_header_size += (TCPOLEN_SACK_BASE_ALIGNED +
+					    (tp->num_sacks * TCPOLEN_SACK_PERBLOCK));
+		}
+		th = (struct tcphdr *) skb_push(skb, tcp_header_size);
+		skb->h.th = th;
+		skb_set_owner_w(skb, sk);
+
+		/* Build TCP header and checksum it. */
+		th->source		= sk->sport;
+		th->dest		= sk->dport;
+		th->seq			= htonl(TCP_SKB_CB(skb)->seq);
+		th->ack_seq		= htonl(tp->rcv_nxt);
+		th->doff		= (tcp_header_size >> 2);
+		th->res1		= 0;
+		*(((__u8 *)th) + 13)	= tcb->flags;
+		if(!(tcb->flags & TCPCB_FLAG_SYN))
+			th->window	= htons(tcp_select_window(sk));
+		th->check		= 0;
+		th->urg_ptr		= ntohs(tcb->urg_ptr);
+		if(tcb->flags & TCPCB_FLAG_SYN) {
+			/* RFC1323: The window in SYN & SYN/ACK segments
+			 * is never scaled.
+			 */
+			th->window	= htons(tp->rcv_wnd);
+			tcp_syn_build_options((__u32 *)(th + 1), tp->mss_clamp,
+					      (sysctl_flags & SYSCTL_FLAG_TSTAMPS),
+					      (sysctl_flags & SYSCTL_FLAG_SACK),
+					      (sysctl_flags & SYSCTL_FLAG_WSCALE),
+					      tp->rcv_wscale,
+					      TCP_SKB_CB(skb)->when,
+		      			      tp->ts_recent);
+		} else {
+			tcp_build_and_update_options((__u32 *)(th + 1),
+						     tp, TCP_SKB_CB(skb)->when);
+		}
+		tp->af_specific->send_check(sk, th, skb->len, skb);
+
+		clear_delayed_acks(sk);
+		tp->last_ack_sent = tp->rcv_nxt;
+		tcp_statistics.TcpOutSegs++;
+		tp->af_specific->queue_xmit(skb);
+	}
+#undef SYSCTL_FLAG_TSTAMPS
+#undef SYSCTL_FLAG_WSCALE
+#undef SYSCTL_FLAG_SACK
+}
+
+/* This is the main buffer sending routine. We queue the buffer
+ * and decide whether to queue or transmit now.
+ */
+void tcp_send_skb(struct sock *sk, struct sk_buff *skb, int force_queue)
+{
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+	/* Advance write_seq and place onto the write_queue. */
+	tp->write_seq += (TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq);
+	__skb_queue_tail(&sk->write_queue, skb);
+
+	if (!force_queue && tp->send_head == NULL && tcp_snd_test(sk, skb)) {
+		/* Send it out now. */
+		TCP_SKB_CB(skb)->when = tcp_time_stamp;
+		tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
+		tp->packets_out++;
+		tcp_transmit_skb(sk, skb_clone(skb, GFP_KERNEL));
+		if(!tcp_timer_is_set(sk, TIME_RETRANS))
+			tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
+	} else {
+		/* Queue it, remembering where we must start sending. */
+		if (tp->send_head == NULL)
+			tp->send_head = skb;
+		if (!force_queue && tp->packets_out == 0 && !tp->pending) {
+			tp->pending = TIME_PROBE0;
+			tcp_reset_xmit_timer(sk, TIME_PROBE0, tp->rto);
+		}
+	}
+}
+
+/* Function to create two new TCP segments.  Shrinks the given segment
+ * to the specified size and appends a new segment with the rest of the
+ * packet to the list.  This won't be called frequently, I hope. 
+ * Remember, these are still headerless SKBs at this point.
+ */
+static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len)
+{
+	struct sk_buff *buff;
+	int nsize = skb->len - len;
+	u16 flags;
+
+	/* Get a new skb... force flag on. */
+	buff = sock_wmalloc(sk,
+			    (nsize + MAX_HEADER + sk->prot->max_header),
+			    1, GFP_ATOMIC);
+	if (buff == NULL)
+		return -1; /* We'll just try again later. */
+
+	/* Reserve space for headers. */
+	skb_reserve(buff, MAX_HEADER + sk->prot->max_header);
+		
+	/* Correct the sequence numbers. */
+	TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
+	TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
+	
+	/* PSH and FIN should only be set in the second packet. */
+	flags = TCP_SKB_CB(skb)->flags;
+	TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN | TCPCB_FLAG_PSH);
+	if(flags & TCPCB_FLAG_URG) {
+		u16 old_urg_ptr = TCP_SKB_CB(skb)->urg_ptr;
+
+		/* Urgent data is always a pain in the ass. */
+		if(old_urg_ptr > len) {
+			TCP_SKB_CB(skb)->flags &= ~(TCPCB_FLAG_URG);
+			TCP_SKB_CB(skb)->urg_ptr = 0;
+			TCP_SKB_CB(buff)->urg_ptr = old_urg_ptr - len;
+		} else {
+			flags &= ~(TCPCB_FLAG_URG);
+		}
+	}
+	if(!(flags & TCPCB_FLAG_URG))
+		TCP_SKB_CB(buff)->urg_ptr = 0;
+	TCP_SKB_CB(buff)->flags = flags;
+	TCP_SKB_CB(buff)->sacked = 0;
+
+	/* Copy and checksum data tail into the new buffer. */
+	buff->csum = csum_partial_copy(skb->data + len, skb_put(buff, nsize),
+				       nsize, 0);
+
+	/* This takes care of the FIN sequence number too. */
+	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
+	skb_trim(skb, len);
+
+	/* Rechecksum original buffer. */
+	skb->csum = csum_partial(skb->data, skb->len, 0);
+
+	/* Looks stupid, but our code really uses when of
+	 * skbs, which it never sent before. --ANK
+	 */
+	TCP_SKB_CB(buff)->when = TCP_SKB_CB(skb)->when;
+
+	/* Link BUFF into the send queue. */
+	__skb_append(skb, buff);
+
+	return 0;
+}
+
+/* This function synchronize snd mss to current pmtu/exthdr set.
+
+   tp->user_mss is mss set by user by TCP_MAXSEG. It does NOT counts
+   for TCP options, but includes only bare TCP header.
+
+   tp->mss_clamp is mss negotiated at connection setup.
+   It is minumum of user_mss and mss received with SYN.
+   It also does not include TCP options.
+
+   tp->pmtu_cookie is last pmtu, seen by this function.
+
+   tp->mss_cache is current effective sending mss, including
+   all tcp options except for SACKs. It is evaluated,
+   taking into account current pmtu, but never exceeds
+   tp->mss_clamp.
+
+   NOTE1. rfc1122 clearly states that advertised MSS
+   DOES NOT include either tcp or ip options.
+
+   NOTE2. tp->pmtu_cookie and tp->mss_cache are READ ONLY outside
+   this function.			--ANK (980731)
+ */
+
+int tcp_sync_mss(struct sock *sk, u32 pmtu)
+{
+	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+	int mss_now;
+
+	/* Calculate base mss without TCP options:
+	   It is MMS_S - sizeof(tcphdr) of rfc1122
+	*/
+	mss_now = pmtu - tp->af_specific->net_header_len - sizeof(struct tcphdr);
+
+	/* Clamp it (mss_clamp does not include tcp options) */
+	if (mss_now > tp->mss_clamp)
+		mss_now = tp->mss_clamp;
+
+	/* Now subtract TCP options size, not including SACKs */
+	mss_now -= tp->tcp_header_len - sizeof(struct tcphdr);
+
+	/* Now subtract optional transport overhead */
+	mss_now -= tp->ext_header_len;
+
+	/* It we got too small (or even negative) value,
+	   clamp it by 8 from below. Why 8 ?
+	   Well, it could be 1 with the same success,
+	   but if IP accepted segment of length 1,
+	   it would love 8 even more 8)		--ANK (980731)
+	 */
+	if (mss_now < 8)
+		mss_now = 8;
+
+	/* And store cached results */
+	tp->pmtu_cookie = pmtu;
+	tp->mss_cache = mss_now;
+	return mss_now;
+}
+
+
+/* This routine writes packets to the network.  It advances the
+ * send_head.  This happens as incoming acks open up the remote
+ * window for us.
+ */
+void tcp_write_xmit(struct sock *sk)
+{
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	unsigned int mss_now;
+
+	/* Account for SACKS, we may need to fragment due to this.
+	 * It is just like the real MSS changing on us midstream.
+	 * We also handle things correctly when the user adds some
+	 * IP options mid-stream.  Silly to do, but cover it.
+	 */
+	mss_now = tcp_current_mss(sk); 
+
+	/* If we are zapped, the bytes will have to remain here.
+	 * In time closedown will empty the write queue and all
+	 * will be happy.
+	 */
+	if(!sk->zapped) {
+		struct sk_buff *skb;
+		int sent_pkts = 0;
+
+		/* Anything on the transmit queue that fits the window can
+		 * be added providing we are:
+		 *
+		 * a) following SWS avoidance [and Nagle algorithm]
+		 * b) not exceeding our congestion window.
+		 * c) not retransmitting [Nagle]
+		 */
+		while((skb = tp->send_head) && tcp_snd_test(sk, skb)) {
+			if (skb->len > mss_now) {
+				if (tcp_fragment(sk, skb, mss_now))
+					break;
+			}
+
+			/* Advance the send_head.  This one is going out. */
+			update_send_head(sk);
+			TCP_SKB_CB(skb)->when = tcp_time_stamp;
+			tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
+			tp->packets_out++;
+			tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC));
+			sent_pkts = 1;
+		}
+
+		/* If we sent anything, make sure the retransmit
+		 * timer is active.
+		 */
+		if (sent_pkts && !tcp_timer_is_set(sk, TIME_RETRANS))
+			tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
+	}
+}
+
+/* This function returns the amount that we can raise the
+ * usable window based on the following constraints
+ *  
+ * 1. The window can never be shrunk once it is offered (RFC 793)
+ * 2. We limit memory per socket
+ *
+ * RFC 1122:
+ * "the suggested [SWS] avoidance algorithm for the receiver is to keep
+ *  RECV.NEXT + RCV.WIN fixed until:
+ *  RCV.BUFF - RCV.USER - RCV.WINDOW >= min(1/2 RCV.BUFF, MSS)"
+ *
+ * i.e. don't raise the right edge of the window until you can raise
+ * it at least MSS bytes.
+ *
+ * Unfortunately, the recommended algorithm breaks header prediction,
+ * since header prediction assumes th->window stays fixed.
+ *
+ * Strictly speaking, keeping th->window fixed violates the receiver
+ * side SWS prevention criteria. The problem is that under this rule
+ * a stream of single byte packets will cause the right side of the
+ * window to always advance by a single byte.
+ * 
+ * Of course, if the sender implements sender side SWS prevention
+ * then this will not be a problem.
+ * 
+ * BSD seems to make the following compromise:
+ * 
+ *	If the free space is less than the 1/4 of the maximum
+ *	space available and the free space is less than 1/2 mss,
+ *	then set the window to 0.
+ *	Otherwise, just prevent the window from shrinking
+ *	and from being larger than the largest representable value.
+ *
+ * This prevents incremental opening of the window in the regime
+ * where TCP is limited by the speed of the reader side taking
+ * data out of the TCP receive queue. It does nothing about
+ * those cases where the window is constrained on the sender side
+ * because the pipeline is full.
+ *
+ * BSD also seems to "accidentally" limit itself to windows that are a
+ * multiple of MSS, at least until the free space gets quite small.
+ * This would appear to be a side effect of the mbuf implementation.
+ * Combining these two algorithms results in the observed behavior
+ * of having a fixed window size at almost all times.
+ *
+ * Below we obtain similar behavior by forcing the offered window to
+ * a multiple of the mss when it is feasible to do so.
+ *
+ * Note, we don't "adjust" for TIMESTAMP or SACK option bytes.
+ */
+u32 __tcp_select_window(struct sock *sk)
+{
+	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+	unsigned int mss = tp->mss_cache;
+	int free_space;
+	u32 window;
+
+	/* Sometimes free_space can be < 0. */
+	free_space = (sk->rcvbuf - atomic_read(&sk->rmem_alloc)) / 2;
+	if (tp->window_clamp) {
+		if (free_space > ((int) tp->window_clamp))
+			free_space = tp->window_clamp;
+		mss = min(tp->window_clamp, mss);
+	} else {
+		printk("tcp_select_window: tp->window_clamp == 0.\n");
+	}
+
+	if (mss < 1) {
+		mss = 1;
+		printk("tcp_select_window: sk->mss fell to 0.\n");
+	}
+	
+	if ((free_space < (sk->rcvbuf/4)) && (free_space < ((int) (mss/2)))) {
+		window = 0;
+		tp->pred_flags = 0; 
+	} else {
+		/* Get the largest window that is a nice multiple of mss.
+		 * Window clamp already applied above.
+		 * If our current window offering is within 1 mss of the
+		 * free space we just keep it. This prevents the divide
+		 * and multiply from happening most of the time.
+		 * We also don't do any window rounding when the free space
+		 * is too small.
+		 */
+		window = tp->rcv_wnd;
+		if ((((int) window) <= (free_space - ((int) mss))) ||
+				(((int) window) > free_space))
+			window = (((unsigned int) free_space)/mss)*mss;
+	}
+	return window;
+}
+
+/* Attempt to collapse two adjacent SKB's during retransmission. */
+static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int mss_now)
+{
+	struct sk_buff *next_skb = skb->next;
+
+	/* The first test we must make is that neither of these two
+	 * SKB's are still referenced by someone else.
+	 */
+	if(!skb_cloned(skb) && !skb_cloned(next_skb)) {
+		int skb_size = skb->len, next_skb_size = next_skb->len;
+		u16 flags = TCP_SKB_CB(skb)->flags;
+
+		/* Punt if the first SKB has URG set. */
+		if(flags & TCPCB_FLAG_URG)
+			return;
+	
+		/* Also punt if next skb has been SACK'd. */
+		if(TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_ACKED)
+			return;
+
+		/* Punt if not enough space exists in the first SKB for
+		 * the data in the second, or the total combined payload
+		 * would exceed the MSS.
+		 */
+		if ((next_skb_size > skb_tailroom(skb)) ||
+		    ((skb_size + next_skb_size) > mss_now))
+			return;
+
+		/* Ok.  We will be able to collapse the packet. */
+		__skb_unlink(next_skb, next_skb->list);
+
+		if(skb->len % 4) {
+			/* Must copy and rechecksum all data. */
+			memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size);
+			skb->csum = csum_partial(skb->data, skb->len, 0);
+		} else {
+			/* Optimize, actually we could also combine next_skb->csum
+			 * to skb->csum using a single add w/carry operation too.
+			 */
+			skb->csum = csum_partial_copy(next_skb->data,
+						      skb_put(skb, next_skb_size),
+						      next_skb_size, skb->csum);
+		}
+	
+		/* Update sequence range on original skb. */
+		TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
+
+		/* Merge over control information. */
+		flags |= TCP_SKB_CB(next_skb)->flags; /* This moves PSH/FIN etc. over */
+		if(flags & TCPCB_FLAG_URG) {
+			u16 urgptr = TCP_SKB_CB(next_skb)->urg_ptr;
+			TCP_SKB_CB(skb)->urg_ptr = urgptr + skb_size;
+		}
+		TCP_SKB_CB(skb)->flags = flags;
+
+		/* All done, get rid of second SKB and account for it so
+		 * packet counting does not break.
+		 */
+		kfree_skb(next_skb);
+		sk->tp_pinfo.af_tcp.packets_out--;
+	}
+}
+
+/* Do a simple retransmit without using the backoff mechanisms in
+ * tcp_timer. This is used for path mtu discovery. 
+ * The socket is already locked here.
+ */ 
+void tcp_simple_retransmit(struct sock *sk)
+{
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	struct sk_buff *skb, *old_next_skb;
+	unsigned int mss = tcp_current_mss(sk);
+
+ 	/* Don't muck with the congestion window here. */
+ 	tp->dup_acks = 0;
+ 	tp->high_seq = tp->snd_nxt;
+ 	tp->retrans_head = NULL;
+
+ 	/* Input control flow will see that this was retransmitted
+	 * and not use it for RTT calculation in the absence of
+	 * the timestamp option.
+	 */
+	for (old_next_skb = skb = skb_peek(&sk->write_queue);
+	     ((skb != tp->send_head) &&
+	      (skb != (struct sk_buff *)&sk->write_queue));
+	     skb = skb->next) {
+		int resend_skb = 0;
+
+		/* Our goal is to push out the packets which we
+		 * sent already, but are being chopped up now to
+		 * account for the PMTU information we have.
+		 *
+		 * As we resend the queue, packets are fragmented
+		 * into two pieces, and when we try to send the
+		 * second piece it may be collapsed together with
+		 * a subsequent packet, and so on.  -DaveM
+		 */
+		if (old_next_skb != skb || skb->len > mss)
+			resend_skb = 1;
+		old_next_skb = skb->next;
+		if (resend_skb != 0)
+			tcp_retransmit_skb(sk, skb);
+	}
+}
+
+static __inline__ void update_retrans_head(struct sock *sk)
+{
+	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+	
+	tp->retrans_head = tp->retrans_head->next;
+	if((tp->retrans_head == tp->send_head) ||
+	   (tp->retrans_head == (struct sk_buff *) &sk->write_queue)) {
+		tp->retrans_head = NULL;
+		tp->rexmt_done = 1;
+	}
+}
+
+/* This retransmits one SKB.  Policy decisions and retransmit queue
+ * state updates are done by the caller.  Returns non-zero if an
+ * error occurred which prevented the send.
+ */
+int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
+{
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	unsigned int cur_mss = tcp_current_mss(sk);
+
+	if(skb->len > cur_mss) {
+		if(tcp_fragment(sk, skb, cur_mss))
+			return 1; /* We'll try again later. */
+
+		/* New SKB created, account for it. */
+		tp->packets_out++;
+	}
+
+	/* Collapse two adjacent packets if worthwhile and we can. */
+	if(!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) &&
+	   (skb->len < (cur_mss >> 1)) &&
+	   (skb->next != tp->send_head) &&
+	   (skb->next != (struct sk_buff *)&sk->write_queue) &&
+	   (sysctl_tcp_retrans_collapse != 0))
+		tcp_retrans_try_collapse(sk, skb, cur_mss);
+
+	if(tp->af_specific->rebuild_header(sk))
+		return 1; /* Routing failure or similar. */
+
+	/* Some Solaris stacks overoptimize and ignore the FIN on a
+	 * retransmit when old data is attached.  So strip it off
+	 * since it is cheap to do so and saves bytes on the network.
+	 */
+	if(skb->len > 0 &&
+	   (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&
+	   tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
+		TCP_SKB_CB(skb)->seq = TCP_SKB_CB(skb)->end_seq - 1;
+		skb_trim(skb, 0);
+		skb->csum = 0;
+	}
+
+	/* Ok, we're gonna send it out, update state. */
+	TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_RETRANS;
+	tp->retrans_out++;
+
+	/* Make a copy, if the first transmission SKB clone we made
+	 * is still in somebody's hands, else make a clone.
+	 */
+	TCP_SKB_CB(skb)->when = tcp_time_stamp;
+	if(skb_cloned(skb))
+		skb = skb_copy(skb, GFP_ATOMIC);
+	else
+		skb = skb_clone(skb, GFP_ATOMIC);
+
+	tcp_transmit_skb(sk, skb);
+
+	/* Update global TCP statistics and return success. */
+	sk->prot->retransmits++;
+	tcp_statistics.TcpRetransSegs++;
+
+	return 0;
+}
+
+/* This gets called after a retransmit timeout, and the initially
+ * retransmitted data is acknowledged.  It tries to continue
+ * resending the rest of the retransmit queue, until either
+ * we've sent it all or the congestion window limit is reached.
+ * If doing SACK, the first ACK which comes back for a timeout
+ * based retransmit packet might feed us FACK information again.
+ * If so, we use it to avoid unnecessarily retransmissions.
+ */
+void tcp_xmit_retransmit_queue(struct sock *sk)
+{
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	struct sk_buff *skb;
+
+	if (tp->retrans_head == NULL &&
+	    tp->rexmt_done == 0)
+		tp->retrans_head = skb_peek(&sk->write_queue);
+	if (tp->retrans_head == tp->send_head)
+		tp->retrans_head = NULL;
+
+	/* Each time, advance the retrans_head if we got
+	 * a packet out or we skipped one because it was
+	 * SACK'd.  -DaveM
+	 */
+	while ((skb = tp->retrans_head) != NULL) {
+		/* If it has been ack'd by a SACK block, we don't
+		 * retransmit it.
+		 */
+		if(!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
+			/* Send it out, punt if error occurred. */
+			if(tcp_retransmit_skb(sk, skb))
+				break;
+
+			update_retrans_head(sk);
+		
+			/* Stop retransmitting if we've hit the congestion
+			 * window limit.
+			 */
+			if (tp->retrans_out >= tp->snd_cwnd)
+				break;
+		} else {
+			update_retrans_head(sk);
+		}
+	}
+}
+
+/* Using FACK information, retransmit all missing frames at the receiver
+ * up to the forward most SACK'd packet (tp->fackets_out) if the packet
+ * has not been retransmitted already.
+ */
+void tcp_fack_retransmit(struct sock *sk)
+{
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	struct sk_buff *skb = skb_peek(&sk->write_queue);
+	int packet_cnt = 0;
+
+	while((skb != NULL) &&
+	      (skb != tp->send_head) &&
+	      (skb != (struct sk_buff *)&sk->write_queue)) {
+		__u8 sacked = TCP_SKB_CB(skb)->sacked;
+
+		if(sacked & (TCPCB_SACKED_ACKED | TCPCB_SACKED_RETRANS))
+			goto next_packet;
+
+		/* Ok, retransmit it. */
+		if(tcp_retransmit_skb(sk, skb))
+			break;
+
+		if(tcp_packets_in_flight(tp) >= tp->snd_cwnd)
+			break;
+next_packet:
+		packet_cnt++;
+		if(packet_cnt >= tp->fackets_out)
+			break;
+		skb = skb->next;
+	}
+}
+
+/* Send a fin.  The caller locks the socket for us.  This cannot be
+ * allowed to fail queueing a FIN frame under any circumstances.
+ */
+void tcp_send_fin(struct sock *sk)
+{
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);	
+	struct sk_buff *skb = skb_peek_tail(&sk->write_queue);
+	unsigned int mss_now;
+	
+	/* Optimization, tack on the FIN if we have a queue of
+	 * unsent frames.  But be careful about outgoing SACKS
+	 * and IP options.
+	 */
+	mss_now = tcp_current_mss(sk); 
+
+	if((tp->send_head != NULL) && (skb->len < mss_now)) {
+		/* tcp_write_xmit() takes care of the rest. */
+		TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN;
+		TCP_SKB_CB(skb)->end_seq++;
+		tp->write_seq++;
+
+		/* Special case to avoid Nagle bogosity.  If this
+		 * segment is the last segment, and it was queued
+		 * due to Nagle/SWS-avoidance, send it out now.
+		 */
+		if(tp->send_head == skb &&
+		   !sk->nonagle &&
+		   skb->len < (tp->mss_cache >> 1) &&
+		   tp->packets_out &&
+		   !(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_URG)) {
+			update_send_head(sk);
+			TCP_SKB_CB(skb)->when = tcp_time_stamp;
+			tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
+			tp->packets_out++;
+			tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC));
+			if(!tcp_timer_is_set(sk, TIME_RETRANS))
+				tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
+		}
+	} else {
+		/* Socket is locked, keep trying until memory is available. */
+		do {
+			skb = sock_wmalloc(sk,
+					   (MAX_HEADER +
+					    sk->prot->max_header),
+					   1, GFP_KERNEL);
+		} while (skb == NULL);
+
+		/* Reserve space for headers and prepare control bits. */
+		skb_reserve(skb, MAX_HEADER + sk->prot->max_header);
+		skb->csum = 0;
+		TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_FIN);
+		TCP_SKB_CB(skb)->sacked = 0;
+		TCP_SKB_CB(skb)->urg_ptr = 0;
+
+		/* FIN eats a sequence byte, write_seq advanced by tcp_send_skb(). */
+		TCP_SKB_CB(skb)->seq = tp->write_seq;
+		TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;
+		tcp_send_skb(sk, skb, 0);
+	}
+}
+
+/* We get here when a process closes a file descriptor (either due to
+ * an explicit close() or as a byproduct of exit()'ing) and there
+ * was unread data in the receive queue.  This behavior is recommended
+ * by draft-ietf-tcpimpl-prob-03.txt section 3.10.  -DaveM
+ */
+void tcp_send_active_reset(struct sock *sk)
+{
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	struct sk_buff *skb;
+
+	/* NOTE: No TCP options attached and we never retransmit this. */
+	skb = alloc_skb(MAX_HEADER + sk->prot->max_header, GFP_KERNEL);
+	if (!skb)
+		return;
+
+	/* Reserve space for headers and prepare control bits. */
+	skb_reserve(skb, MAX_HEADER + sk->prot->max_header);
+	skb->csum = 0;
+	TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_RST);
+	TCP_SKB_CB(skb)->sacked = 0;
+	TCP_SKB_CB(skb)->urg_ptr = 0;
+
+	/* Send it off. */
+	TCP_SKB_CB(skb)->seq = tp->write_seq;
+	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
+	TCP_SKB_CB(skb)->when = tcp_time_stamp;
+	tcp_transmit_skb(sk, skb);
+}
+
+/* WARNING: This routine must only be called when we have already sent
+ * a SYN packet that crossed the incoming SYN that caused this routine
+ * to get called. If this assumption fails then the initial rcv_wnd
+ * and rcv_wscale values will not be correct.
+ */
+int tcp_send_synack(struct sock *sk)
+{
+	struct tcp_opt* tp = &(sk->tp_pinfo.af_tcp);
+	struct sk_buff* skb;	
+	
+	skb = sock_wmalloc(sk, (MAX_HEADER + sk->prot->max_header),
+			   1, GFP_ATOMIC);
+	if (skb == NULL) 
+		return -ENOMEM;
+
+	/* Reserve space for headers and prepare control bits. */
+	skb_reserve(skb, MAX_HEADER + sk->prot->max_header);
+	skb->csum = 0;
+	TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_SYN);
+	TCP_SKB_CB(skb)->sacked = 0;
+	TCP_SKB_CB(skb)->urg_ptr = 0;
+
+	/* SYN eats a sequence byte. */
+	TCP_SKB_CB(skb)->seq = tp->snd_una;
+	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;
+	__skb_queue_tail(&sk->write_queue, skb);
+	TCP_SKB_CB(skb)->when = tcp_time_stamp;
+	tp->packets_out++;
+	tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC));
+	return 0;
+}
+
+/*
+ * Prepare a SYN-ACK.
+ */
+struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
+				 struct open_request *req, int mss)
+{
+	struct tcphdr *th;
+	int tcp_header_size;
+	struct sk_buff *skb;
+
+	skb = sock_wmalloc(sk, MAX_HEADER + sk->prot->max_header, 1, GFP_ATOMIC);
+	if (skb == NULL)
+		return NULL;
+
+	/* Reserve space for headers. */
+	skb_reserve(skb, MAX_HEADER + sk->prot->max_header);
+
+	skb->dst = dst_clone(dst);
+
+	/* Don't offer more than they did.
+	 * This way we don't have to memorize who said what.
+	 * FIXME: maybe this should be changed for better performance
+	 * with syncookies.
+	 */
+	req->mss = min(mss, req->mss);
+	if (req->mss < 8) {
+		printk(KERN_DEBUG "initial req->mss below 8\n");
+		req->mss = 8;
+	}
+
+	tcp_header_size = (sizeof(struct tcphdr) + TCPOLEN_MSS +
+			   (req->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0) +
+			   (req->wscale_ok ? TCPOLEN_WSCALE_ALIGNED : 0) +
+			   /* SACK_PERM is in the place of NOP NOP of TS */
+			   ((req->sack_ok && !req->tstamp_ok) ? TCPOLEN_SACKPERM_ALIGNED : 0));
+	skb->h.th = th = (struct tcphdr *) skb_push(skb, tcp_header_size);
+
+	memset(th, 0, sizeof(struct tcphdr));
+	th->syn = 1;
+	th->ack = 1;
+	th->source = sk->sport;
+	th->dest = req->rmt_port;
+	TCP_SKB_CB(skb)->seq = req->snt_isn;
+	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;
+	th->seq = htonl(TCP_SKB_CB(skb)->seq);
+	th->ack_seq = htonl(req->rcv_isn + 1);
+	if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
+		__u8 rcv_wscale; 
+		/* Set this up on the first call only */
+		req->window_clamp = skb->dst->window;
+		tcp_select_initial_window(sock_rspace(sk)/2,req->mss,
+			&req->rcv_wnd,
+			&req->window_clamp,
+			req->wscale_ok,
+			&rcv_wscale);
+		req->rcv_wscale = rcv_wscale; 
+	}
+
+	/* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
+	th->window = htons(req->rcv_wnd);
+
+	TCP_SKB_CB(skb)->when = tcp_time_stamp;
+	tcp_syn_build_options((__u32 *)(th + 1), req->mss, req->tstamp_ok,
+			      req->sack_ok, req->wscale_ok, req->rcv_wscale,
+			      TCP_SKB_CB(skb)->when,
+			      req->ts_recent);
+
+	skb->csum = 0;
+	th->doff = (tcp_header_size >> 2);
+	tcp_statistics.TcpOutSegs++; 
+	return skb;
+}
+
+void tcp_connect(struct sock *sk, struct sk_buff *buff, int mtu)
+{
+	struct dst_entry *dst = sk->dst_cache;
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+	/* Reserve space for headers. */
+	skb_reserve(buff, MAX_HEADER + sk->prot->max_header);
+
+	tp->snd_wnd = 0;
+	tp->snd_wl1 = 0;
+	tp->snd_wl2 = tp->write_seq;
+	tp->snd_una = tp->write_seq;
+	tp->rcv_nxt = 0;
+
+	sk->err = 0;
+	
+	/* We'll fix this up when we get a response from the other end.
+	 * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT.
+	 */
+	tp->tcp_header_len = sizeof(struct tcphdr) +
+		(sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0);
+
+	/* If user gave his TCP_MAXSEG, record it to clamp */
+	if (tp->user_mss)
+		tp->mss_clamp = tp->user_mss;
+	tcp_sync_mss(sk, mtu);
+
+	/* Now unpleasant action: if initial pmtu is too low
+	   set lower clamp. I am not sure that it is good.
+	   To be more exact, I do not think that clamping at value, which
+	   is apparently transient and may improve in future is good idea.
+	   It would be better to wait until peer will returns its MSS
+	   (probably 65535 too) and now advertise something sort of 65535
+	   or at least first hop device mtu. Is it clear, what I mean?
+	   We should tell peer what maximal mss we expect to RECEIVE,
+	   it has nothing to do with pmtu.
+	   I am afraid someone will be confused by such huge value.
+	                                                   --ANK (980731)
+	 */
+	if (tp->mss_cache + tp->tcp_header_len - sizeof(struct tcphdr) < tp->mss_clamp )
+		tp->mss_clamp = tp->mss_cache + tp->tcp_header_len - sizeof(struct tcphdr);
+
+	TCP_SKB_CB(buff)->flags = TCPCB_FLAG_SYN;
+	TCP_SKB_CB(buff)->sacked = 0;
+	TCP_SKB_CB(buff)->urg_ptr = 0;
+	buff->csum = 0;
+	TCP_SKB_CB(buff)->seq = tp->write_seq++;
+	TCP_SKB_CB(buff)->end_seq = tp->write_seq;
+	tp->snd_nxt = TCP_SKB_CB(buff)->end_seq;
+
+	tp->window_clamp = dst->window;
+	tcp_select_initial_window(sock_rspace(sk)/2,tp->mss_clamp,
+		&tp->rcv_wnd,
+		&tp->window_clamp,
+		sysctl_tcp_window_scaling,
+		&tp->rcv_wscale);
+	/* Ok, now lock the socket before we make it visible to
+	 * the incoming packet engine.
+	 */
+	lock_sock(sk);
+
+	/* Socket identity change complete, no longer
+	 * in TCP_CLOSE, so enter ourselves into the
+	 * hash tables.
+	 */
+	tcp_set_state(sk,TCP_SYN_SENT);
+	sk->prot->hash(sk);
+
+	tp->rto = dst->rtt;
+	tcp_init_xmit_timers(sk);
+	tp->retransmits = 0;
+	tp->fackets_out = 0;
+	tp->retrans_out = 0;
+
+	/* Send it off. */
+	__skb_queue_tail(&sk->write_queue, buff);
+	TCP_SKB_CB(buff)->when = tcp_time_stamp;
+	tp->packets_out++;
+	tcp_transmit_skb(sk, skb_clone(buff, GFP_KERNEL));
+	tcp_statistics.TcpActiveOpens++;
+
+	/* Timer for repeating the SYN until an answer. */
+	tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
+
+	/* Now, it is safe to release the socket. */
+	release_sock(sk);
+}
+
+/* Send out a delayed ack, the caller does the policy checking
+ * to see if we should even be here.  See tcp_input.c:tcp_ack_snd_check()
+ * for details.
+ */
+void tcp_send_delayed_ack(struct tcp_opt *tp, int max_timeout)
+{
+	unsigned long timeout;
+
+	/* Stay within the limit we were given */
+	timeout = tp->ato;
+	if (timeout > max_timeout)
+		timeout = max_timeout;
+	timeout += jiffies;
+
+	/* Use new timeout only if there wasn't a older one earlier. */
+	if (!tp->delack_timer.prev) {
+		tp->delack_timer.expires = timeout;
+		add_timer(&tp->delack_timer);
+        } else {
+		if (time_before(timeout, tp->delack_timer.expires))
+			mod_timer(&tp->delack_timer, timeout);
+	}
+}
+
+/* This routine sends an ack and also updates the window. */
+void tcp_send_ack(struct sock *sk)
+{
+	/* If we have been reset, we may not send again. */
+	if(!sk->zapped) {
+		struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+		struct sk_buff *buff;
+
+		/* We are not putting this on the write queue, so
+		 * tcp_transmit_skb() will set the ownership to this
+		 * sock.
+		 */
+		buff = alloc_skb(MAX_HEADER + sk->prot->max_header, GFP_ATOMIC);
+		if (buff == NULL) {
+			/* Force it to send an ack. We don't have to do this
+			 * (ACK is unreliable) but it's much better use of
+			 * bandwidth on slow links to send a spare ack than
+			 * resend packets.
+			 *
+			 * This is the one possible way that we can delay an
+			 * ACK and have tp->ato indicate that we are in
+			 * quick ack mode, so clear it.
+			 */
+			if(tcp_in_quickack_mode(tp))
+				tcp_exit_quickack_mode(tp);
+			tcp_send_delayed_ack(tp, HZ/2);
+			return;
+		}
+
+		/* Reserve space for headers and prepare control bits. */
+		skb_reserve(buff, MAX_HEADER + sk->prot->max_header);
+		buff->csum = 0;
+		TCP_SKB_CB(buff)->flags = TCPCB_FLAG_ACK;
+		TCP_SKB_CB(buff)->sacked = 0;
+		TCP_SKB_CB(buff)->urg_ptr = 0;
+
+		/* Send it off, this clears delayed acks for us. */
+		TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tp->snd_nxt;
+		TCP_SKB_CB(buff)->when = tcp_time_stamp;
+		tcp_transmit_skb(sk, buff);
+	}
+}
+
+/* This routine sends a packet with an out of date sequence
+ * number. It assumes the other end will try to ack it.
+ */
+void tcp_write_wakeup(struct sock *sk)
+{
+	/* After a valid reset we can send no more. */
+	if (!sk->zapped) {
+		struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+		struct sk_buff *skb;
+
+		/* Write data can still be transmitted/retransmitted in the
+		 * following states.  If any other state is encountered, return.
+		 * [listen/close will never occur here anyway]
+		 */
+		if ((1 << sk->state) &
+		    ~(TCPF_ESTABLISHED|TCPF_CLOSE_WAIT|TCPF_FIN_WAIT1|
+		      TCPF_LAST_ACK|TCPF_CLOSING))
+			return;
+
+		if (before(tp->snd_nxt, tp->snd_una + tp->snd_wnd) &&
+		    ((skb = tp->send_head) != NULL)) {
+			unsigned long win_size;
+
+			/* We are probing the opening of a window
+			 * but the window size is != 0
+			 * must have been a result SWS avoidance ( sender )
+			 */
+			win_size = tp->snd_wnd - (tp->snd_nxt - tp->snd_una);
+			if (win_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq) {
+				if (tcp_fragment(sk, skb, win_size))
+					return; /* Let a retransmit get it. */
+			}
+			update_send_head(sk);
+			TCP_SKB_CB(skb)->when = tcp_time_stamp;
+			tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
+			tp->packets_out++;
+			tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC));
+			if (!tcp_timer_is_set(sk, TIME_RETRANS))
+				tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
+		} else {
+			/* We don't queue it, tcp_transmit_skb() sets ownership. */
+			skb = alloc_skb(MAX_HEADER + sk->prot->max_header,
+					GFP_ATOMIC);
+			if (skb == NULL) 
+				return;
+
+			/* Reserve space for headers and set control bits. */
+			skb_reserve(skb, MAX_HEADER + sk->prot->max_header);
+			skb->csum = 0;
+			TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK;
+			TCP_SKB_CB(skb)->sacked = 0;
+			TCP_SKB_CB(skb)->urg_ptr = 0;
+
+			/* Use a previous sequence.  This should cause the other
+			 * end to send an ack.  Don't queue or clone SKB, just
+			 * send it.
+			 */
+			TCP_SKB_CB(skb)->seq = tp->snd_nxt - 1;
+			TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
+			TCP_SKB_CB(skb)->when = tcp_time_stamp;
+			tcp_transmit_skb(sk, skb);
+		}
+	}
+}
+
+/* A window probe timeout has occurred.  If window is not closed send
+ * a partial packet else a zero probe.
+ */
+void tcp_send_probe0(struct sock *sk)
+{
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+	tcp_write_wakeup(sk);
+	tp->pending = TIME_PROBE0;
+	tp->backoff++;
+	tp->probes_out++;
+	tcp_reset_xmit_timer (sk, TIME_PROBE0, 
+			      min(tp->rto << tp->backoff, 120*HZ));
+}
diff --git a/pfinet/linux-src/net/ipv4/tcp_timer.c b/pfinet/linux-src/net/ipv4/tcp_timer.c
new file mode 100644
index 00000000..21029f8e
--- /dev/null
+++ b/pfinet/linux-src/net/ipv4/tcp_timer.c
@@ -0,0 +1,595 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		Implementation of the Transmission Control Protocol(TCP).
+ *
+ * Version:	$Id: tcp_timer.c,v 1.62.2.3 1999/06/20 20:14:30 davem Exp $
+ *
+ * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
+ *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *		Mark Evans, <evansmp@uhura.aston.ac.uk>
+ *		Corey Minyard <wf-rch!minyard@relay.EU.net>
+ *		Florian La Roche, <flla@stud.uni-sb.de>
+ *		Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
+ *		Linus Torvalds, <torvalds@cs.helsinki.fi>
+ *		Alan Cox, <gw4pts@gw4pts.ampr.org>
+ *		Matthew Dillon, <dillon@apollo.west.oic.com>
+ *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
+ *		Jorge Cwik, <jorge@laser.satlink.net>
+ */
+
+#include <net/tcp.h>
+
+int sysctl_tcp_syn_retries = TCP_SYN_RETRIES; 
+int sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
+int sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
+int sysctl_tcp_retries1 = TCP_RETR1;
+int sysctl_tcp_retries2 = TCP_RETR2;
+
+static void tcp_sltimer_handler(unsigned long);
+static void tcp_syn_recv_timer(unsigned long);
+static void tcp_keepalive(unsigned long data);
+static void tcp_twkill(unsigned long);
+
+struct timer_list	tcp_slow_timer = {
+	NULL, NULL,
+	0, 0,
+	tcp_sltimer_handler,
+};
+
+
+struct tcp_sl_timer tcp_slt_array[TCP_SLT_MAX] = {
+	{ATOMIC_INIT(0), TCP_SYNACK_PERIOD, 0, tcp_syn_recv_timer},/* SYNACK	*/
+	{ATOMIC_INIT(0), TCP_KEEPALIVE_PERIOD, 0, tcp_keepalive},  /* KEEPALIVE	*/
+	{ATOMIC_INIT(0), TCP_TWKILL_PERIOD, 0, tcp_twkill}         /* TWKILL	*/
+};
+
+const char timer_bug_msg[] = KERN_DEBUG "tcpbug: unknown timer value\n";
+
+/*
+ * Using different timers for retransmit, delayed acks and probes
+ * We may wish use just one timer maintaining a list of expire jiffies 
+ * to optimize.
+ */
+
+void tcp_init_xmit_timers(struct sock *sk)
+{
+	init_timer(&sk->tp_pinfo.af_tcp.retransmit_timer);
+	sk->tp_pinfo.af_tcp.retransmit_timer.function=&tcp_retransmit_timer;
+	sk->tp_pinfo.af_tcp.retransmit_timer.data = (unsigned long) sk;
+	
+	init_timer(&sk->tp_pinfo.af_tcp.delack_timer);
+	sk->tp_pinfo.af_tcp.delack_timer.function=&tcp_delack_timer;
+	sk->tp_pinfo.af_tcp.delack_timer.data = (unsigned long) sk;
+
+	init_timer(&sk->tp_pinfo.af_tcp.probe_timer);
+	sk->tp_pinfo.af_tcp.probe_timer.function=&tcp_probe_timer;
+	sk->tp_pinfo.af_tcp.probe_timer.data = (unsigned long) sk;
+}
+
+/*
+ *	Reset the retransmission timer
+ */
+ 
+void tcp_reset_xmit_timer(struct sock *sk, int what, unsigned long when)
+{
+	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+
+	switch (what) {
+	case TIME_RETRANS:
+		/* When seting the transmit timer the probe timer 
+		 * should not be set.
+		 * The delayed ack timer can be set if we are changing the
+		 * retransmit timer when removing acked frames.
+		 */
+		if(tp->probe_timer.prev)
+			del_timer(&tp->probe_timer);
+		mod_timer(&tp->retransmit_timer, jiffies+when);
+		break;
+
+	case TIME_DACK:
+		mod_timer(&tp->delack_timer, jiffies+when);
+		break;
+
+	case TIME_PROBE0:
+		mod_timer(&tp->probe_timer, jiffies+when);
+		break;	
+
+	case TIME_WRITE:
+		printk(KERN_DEBUG "bug: tcp_reset_xmit_timer TIME_WRITE\n");
+		break;
+
+	default:
+		printk(KERN_DEBUG "bug: unknown timer value\n");
+	};
+}
+
+void tcp_clear_xmit_timers(struct sock *sk)
+{	
+	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+
+	if(tp->retransmit_timer.prev)
+		del_timer(&tp->retransmit_timer);
+	if(tp->delack_timer.prev)
+		del_timer(&tp->delack_timer);
+	if(tp->probe_timer.prev)
+		del_timer(&tp->probe_timer);
+}
+
+static int tcp_write_err(struct sock *sk, int force)
+{
+	sk->err = sk->err_soft ? sk->err_soft : ETIMEDOUT;
+	sk->error_report(sk);
+	
+	tcp_clear_xmit_timers(sk);
+	
+	/* Time wait the socket. */
+	if (!force && ((1<<sk->state) & (TCPF_FIN_WAIT1|TCPF_FIN_WAIT2|TCPF_CLOSING))) {
+		tcp_time_wait(sk);
+	} else {
+		/* Clean up time. */
+		tcp_set_state(sk, TCP_CLOSE);
+		return 0;
+	}
+	return 1;
+}
+
+/* A write timeout has occurred. Process the after effects. */
+static int tcp_write_timeout(struct sock *sk)
+{
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+	/* Look for a 'soft' timeout. */
+	if ((sk->state == TCP_ESTABLISHED &&
+	     tp->retransmits && (tp->retransmits % TCP_QUICK_TRIES) == 0) ||
+	    (sk->state != TCP_ESTABLISHED && tp->retransmits > sysctl_tcp_retries1)) {
+		dst_negative_advice(&sk->dst_cache);
+	}
+	
+	/* Have we tried to SYN too many times (repent repent 8)) */
+	if(tp->retransmits > sysctl_tcp_syn_retries && sk->state==TCP_SYN_SENT) {
+		tcp_write_err(sk, 1);
+		/* Don't FIN, we got nothing back */
+		return 0;
+	}
+
+	/* Has it gone just too far? */
+	if (tp->retransmits > sysctl_tcp_retries2) 
+		return tcp_write_err(sk, 0);
+
+	return 1;
+}
+
+void tcp_delack_timer(unsigned long data)
+{
+	struct sock *sk = (struct sock*)data;
+
+	if(!sk->zapped &&
+	   sk->tp_pinfo.af_tcp.delayed_acks &&
+	   sk->state != TCP_CLOSE) {
+		/* If socket is currently locked, defer the ACK. */
+		if (!atomic_read(&sk->sock_readers))
+			tcp_send_ack(sk);
+		else
+			tcp_send_delayed_ack(&(sk->tp_pinfo.af_tcp), HZ/10);
+	}
+}
+
+void tcp_probe_timer(unsigned long data)
+{
+	struct sock *sk = (struct sock*)data;
+	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+
+	if(sk->zapped) 
+		return;
+	
+	if (atomic_read(&sk->sock_readers)) {
+		/* Try again later. */
+		tcp_reset_xmit_timer(sk, TIME_PROBE0, HZ/5);
+		return;
+	}
+
+	/* *WARNING* RFC 1122 forbids this 
+	 * It doesn't AFAIK, because we kill the retransmit timer -AK
+	 * FIXME: We ought not to do it, Solaris 2.5 actually has fixing
+	 * this behaviour in Solaris down as a bug fix. [AC]
+	 */
+	if (tp->probes_out > sysctl_tcp_retries2) {
+		if(sk->err_soft)
+			sk->err = sk->err_soft;
+		else
+			sk->err = ETIMEDOUT;
+		sk->error_report(sk);
+
+		if ((1<<sk->state) & (TCPF_FIN_WAIT1|TCPF_FIN_WAIT2|TCPF_CLOSING)) {
+			/* Time wait the socket. */
+			tcp_time_wait(sk);
+		} else {
+			/* Clean up time. */
+			tcp_set_state(sk, TCP_CLOSE);
+		}
+	} else {
+		/* Only send another probe if we didn't close things up. */
+		tcp_send_probe0(sk);
+	}
+}
+
+static __inline__ int tcp_keepopen_proc(struct sock *sk)
+{
+	int res = 0;
+
+	if ((1<<sk->state) & (TCPF_ESTABLISHED|TCPF_CLOSE_WAIT|TCPF_FIN_WAIT2)) {
+		struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+		__u32 elapsed = tcp_time_stamp - tp->rcv_tstamp;
+
+		if (elapsed >= sysctl_tcp_keepalive_time) {
+			if (tp->probes_out > sysctl_tcp_keepalive_probes) {
+				if(sk->err_soft)
+					sk->err = sk->err_soft;
+				else
+					sk->err = ETIMEDOUT;
+
+				tcp_set_state(sk, TCP_CLOSE);
+				sk->shutdown = SHUTDOWN_MASK;
+				if (!sk->dead)
+					sk->state_change(sk);
+			} else {
+				tp->probes_out++;
+				tp->pending = TIME_KEEPOPEN;
+				tcp_write_wakeup(sk);
+				res = 1;
+			}
+		}
+	}
+	return res;
+}
+
+/* Kill off TIME_WAIT sockets once their lifetime has expired. */
+int tcp_tw_death_row_slot = 0;
+static struct tcp_tw_bucket *tcp_tw_death_row[TCP_TWKILL_SLOTS] =
+	{ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL };
+
+extern void tcp_timewait_kill(struct tcp_tw_bucket *tw);
+
+static void tcp_twkill(unsigned long data)
+{
+	struct tcp_tw_bucket *tw;
+	int killed = 0;
+
+	tw = tcp_tw_death_row[tcp_tw_death_row_slot];
+	tcp_tw_death_row[tcp_tw_death_row_slot] = NULL;
+	while(tw != NULL) {
+		struct tcp_tw_bucket *next = tw->next_death;
+
+		tcp_timewait_kill(tw);
+		killed++;
+		tw = next;
+	}
+	if(killed != 0) {
+		struct tcp_sl_timer *slt = (struct tcp_sl_timer *)data;
+		atomic_sub(killed, &slt->count);
+	}
+	tcp_tw_death_row_slot =
+	  ((tcp_tw_death_row_slot + 1) & (TCP_TWKILL_SLOTS - 1));
+}
+
+/* These are always called from BH context.  See callers in
+ * tcp_input.c to verify this.
+ */
+void tcp_tw_schedule(struct tcp_tw_bucket *tw)
+{
+	int slot = (tcp_tw_death_row_slot - 1) & (TCP_TWKILL_SLOTS - 1);
+	struct tcp_tw_bucket **tpp = &tcp_tw_death_row[slot];
+
+	if((tw->next_death = *tpp) != NULL)
+		(*tpp)->pprev_death = &tw->next_death;
+	*tpp = tw;
+	tw->pprev_death = tpp;
+
+	tw->death_slot = slot;
+
+	tcp_inc_slow_timer(TCP_SLT_TWKILL);
+}
+
+/* Happens rarely if at all, no care about scalability here. */
+void tcp_tw_reschedule(struct tcp_tw_bucket *tw)
+{
+	struct tcp_tw_bucket **tpp;
+	int slot;
+
+	if(tw->next_death)
+		tw->next_death->pprev_death = tw->pprev_death;
+	*tw->pprev_death = tw->next_death;
+	tw->pprev_death = NULL;
+
+	slot = (tcp_tw_death_row_slot - 1) & (TCP_TWKILL_SLOTS - 1);
+	tpp = &tcp_tw_death_row[slot];
+	if((tw->next_death = *tpp) != NULL)
+		(*tpp)->pprev_death = &tw->next_death;
+	*tpp = tw;
+	tw->pprev_death = tpp;
+
+	tw->death_slot = slot;
+	/* Timer was incremented when we first entered the table. */
+}
+
+/* This is for handling early-kills of TIME_WAIT sockets. */
+void tcp_tw_deschedule(struct tcp_tw_bucket *tw)
+{
+	if(tw->next_death)
+		tw->next_death->pprev_death = tw->pprev_death;
+	*tw->pprev_death = tw->next_death;
+	tw->pprev_death = NULL;
+	tcp_dec_slow_timer(TCP_SLT_TWKILL);
+}
+
+/*
+ *	Check all sockets for keepalive timer
+ *	Called every 75 seconds
+ *	This timer is started by af_inet init routine and is constantly
+ *	running.
+ *
+ *	It might be better to maintain a count of sockets that need it using
+ *	setsockopt/tcp_destroy_sk and only set the timer when needed.
+ */
+
+/*
+ *	don't send over 5 keepopens at a time to avoid burstiness 
+ *	on big servers [AC]
+ */
+#define MAX_KA_PROBES	5
+
+int sysctl_tcp_max_ka_probes = MAX_KA_PROBES;
+
+/* Keepopen's are only valid for "established" TCP's, nicely our listener
+ * hash gets rid of most of the useless testing, so we run through a couple
+ * of the established hash chains each clock tick.  -DaveM
+ *
+ * And now, even more magic... TIME_WAIT TCP's cannot have keepalive probes
+ * going off for them, so we only need check the first half of the established
+ * hash table, even less testing under heavy load.
+ *
+ * I _really_ would rather do this by adding a new timer_struct to struct sock,
+ * and this way only those who set the keepalive option will get the overhead.
+ * The idea is you set it for 2 hours when the sock is first connected, when it
+ * does fire off (if at all, most sockets die earlier) you check for the keepalive
+ * option and also if the sock has been idle long enough to start probing.
+ */
+static void tcp_keepalive(unsigned long data)
+{
+	static int chain_start = 0;
+	int count = 0;
+	int i;
+	
+	for(i = chain_start; i < (chain_start + ((TCP_HTABLE_SIZE/2) >> 2)); i++) {
+		struct sock *sk = tcp_established_hash[i];
+		while(sk) {
+			if(!atomic_read(&sk->sock_readers) && sk->keepopen) {
+				count += tcp_keepopen_proc(sk);
+				if(count == sysctl_tcp_max_ka_probes)
+					goto out;
+			}
+			sk = sk->next;
+		}
+	}
+out:
+	chain_start = ((chain_start + ((TCP_HTABLE_SIZE/2)>>2)) &
+		       ((TCP_HTABLE_SIZE/2) - 1));
+}
+
+/*
+ *	The TCP retransmit timer. This lacks a few small details.
+ *
+ *	1. 	An initial rtt timeout on the probe0 should cause what we can
+ *		of the first write queue buffer to be split and sent.
+ *	2.	On a 'major timeout' as defined by RFC1122 we shouldn't report
+ *		ETIMEDOUT if we know an additional 'soft' error caused this.
+ *		tcp_err should save a 'soft error' for us.
+ *	[Unless someone has broken it then it does, except for one 2.0 
+ *	broken case of a send when the route/device is directly unreachable,
+ *	and we error but should retry! - FIXME] [AC]
+ */
+
+void tcp_retransmit_timer(unsigned long data)
+{
+	struct sock *sk = (struct sock*)data;
+	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+
+	/* We are reset. We will send no more retransmits. */
+	if(sk->zapped) {
+		tcp_clear_xmit_timer(sk, TIME_RETRANS);
+		return;
+	}
+
+	if (atomic_read(&sk->sock_readers)) {
+		/* Try again later */  
+		tcp_reset_xmit_timer(sk, TIME_RETRANS, HZ/20);
+		return;
+	}
+
+	/* Clear delay ack timer. */
+	tcp_clear_xmit_timer(sk, TIME_DACK);
+
+	/* RFC 2018, clear all 'sacked' flags in retransmission queue,
+	 * the sender may have dropped out of order frames and we must
+	 * send them out should this timer fire on us.
+	 */
+	if(tp->sack_ok) {
+		struct sk_buff *skb = skb_peek(&sk->write_queue);
+
+		while((skb != NULL) &&
+		      (skb != tp->send_head) &&
+		      (skb != (struct sk_buff *)&sk->write_queue)) {
+			TCP_SKB_CB(skb)->sacked &=
+				~(TCPCB_SACKED_ACKED | TCPCB_SACKED_RETRANS);
+			skb = skb->next;
+		}
+	}
+
+	/* Retransmission. */
+	tp->retrans_head = NULL;
+	tp->rexmt_done = 0;
+	tp->fackets_out = 0;
+	tp->retrans_out = 0;
+	if (tp->retransmits == 0) {
+		/* Remember window where we lost:
+		 * "one half of the current window but at least 2 segments"
+		 *
+		 * Here "current window" means the effective one, which
+		 * means it must be an accurate representation of our current
+		 * sending rate _and_ the snd_wnd.
+		 */
+		tp->snd_ssthresh = tcp_recalc_ssthresh(tp);
+		tp->snd_cwnd_cnt = 0;
+		tp->snd_cwnd = 1;
+	}
+
+	tp->retransmits++;
+
+	tp->dup_acks = 0;
+	tp->high_seq = tp->snd_nxt;
+	tcp_retransmit_skb(sk, skb_peek(&sk->write_queue));
+
+	/* Increase the timeout each time we retransmit.  Note that
+	 * we do not increase the rtt estimate.  rto is initialized
+	 * from rtt, but increases here.  Jacobson (SIGCOMM 88) suggests
+	 * that doubling rto each time is the least we can get away with.
+	 * In KA9Q, Karn uses this for the first few times, and then
+	 * goes to quadratic.  netBSD doubles, but only goes up to *64,
+	 * and clamps at 1 to 64 sec afterwards.  Note that 120 sec is
+	 * defined in the protocol as the maximum possible RTT.  I guess
+	 * we'll have to use something other than TCP to talk to the
+	 * University of Mars.
+	 *
+	 * PAWS allows us longer timeouts and large windows, so once
+	 * implemented ftp to mars will work nicely. We will have to fix
+	 * the 120 second clamps though!
+	 */
+	tp->backoff++;
+	tp->rto = min(tp->rto << 1, 120*HZ);
+	tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
+
+	tcp_write_timeout(sk);
+}
+
+/*
+ *	Slow timer for SYN-RECV sockets
+ */
+
+/* This now scales very nicely. -DaveM */
+static void tcp_syn_recv_timer(unsigned long data)
+{
+	struct sock *sk;
+	unsigned long now = jiffies;
+	int i;
+
+	for(i = 0; i < TCP_LHTABLE_SIZE; i++) {
+		sk = tcp_listening_hash[i];
+
+		while(sk) {
+			struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+			
+			/* TCP_LISTEN is implied. */
+			if (!atomic_read(&sk->sock_readers) && tp->syn_wait_queue) {
+				struct open_request *prev = (struct open_request *)(&tp->syn_wait_queue);
+				struct open_request *req = tp->syn_wait_queue;
+				do {
+					struct open_request *conn;
+				  
+					conn = req;
+					req = req->dl_next;
+
+					if (conn->sk ||
+					    ((long)(now - conn->expires)) <= 0) {
+						prev = conn; 
+						continue; 
+					}
+
+					tcp_synq_unlink(tp, conn, prev);
+					if (conn->retrans >= sysctl_tcp_retries1) {
+#ifdef TCP_DEBUG
+						printk(KERN_DEBUG "syn_recv: "
+						       "too many retransmits\n");
+#endif
+						(*conn->class->destructor)(conn);
+						tcp_dec_slow_timer(TCP_SLT_SYNACK);
+						tp->syn_backlog--;
+						tcp_openreq_free(conn);
+
+						if (!tp->syn_wait_queue)
+							break;
+					} else {
+						unsigned long timeo;
+						struct open_request *op; 
+
+						(*conn->class->rtx_syn_ack)(sk, conn);
+
+						conn->retrans++;
+#ifdef TCP_DEBUG
+						printk(KERN_DEBUG "syn_ack rtx %d\n",
+						       conn->retrans);
+#endif
+						timeo = min((TCP_TIMEOUT_INIT 
+							     << conn->retrans),
+							    120*HZ);
+						conn->expires = now + timeo;
+						op = prev->dl_next; 
+						tcp_synq_queue(tp, conn);
+						if (op != prev->dl_next)
+							prev = prev->dl_next;
+					}
+					/* old prev still valid here */
+				} while (req);
+			}
+			sk = sk->next;
+		}
+	}
+}
+
+void tcp_sltimer_handler(unsigned long data)
+{
+	struct tcp_sl_timer *slt = tcp_slt_array;
+	unsigned long next = ~0UL;
+	unsigned long now = jiffies;
+	int i;
+
+	for (i=0; i < TCP_SLT_MAX; i++, slt++) {
+		if (atomic_read(&slt->count)) {
+			long trigger;
+
+			trigger = slt->period - ((long)(now - slt->last));
+
+			if (trigger <= 0) {
+				(*slt->handler)((unsigned long) slt);
+				slt->last = now;
+				trigger = slt->period;
+			}
+
+			/* Only reschedule if some events remain. */
+			if (atomic_read(&slt->count))
+				next = min(next, trigger);
+		}
+	}
+	if (next != ~0UL)
+		mod_timer(&tcp_slow_timer, (now + next));
+}
+
+void __tcp_inc_slow_timer(struct tcp_sl_timer *slt)
+{
+	unsigned long now = jiffies;
+	unsigned long when;
+
+	slt->last = now;
+
+	when = now + slt->period;
+
+	if (tcp_slow_timer.prev) {
+		if ((long)(tcp_slow_timer.expires - when) >= 0)
+			mod_timer(&tcp_slow_timer, when);
+	} else {
+		tcp_slow_timer.expires = when;
+		add_timer(&tcp_slow_timer);
+	}
+}
diff --git a/pfinet/linux-src/net/ipv4/timer.c b/pfinet/linux-src/net/ipv4/timer.c
new file mode 100644
index 00000000..3821a7c4
--- /dev/null
+++ b/pfinet/linux-src/net/ipv4/timer.c
@@ -0,0 +1,127 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		TIMER - implementation of software timers for IP.
+ *
+ * Version:	$Id: timer.c,v 1.15 1999/02/22 13:54:29 davem Exp $
+ *
+ * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
+ *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *		Corey Minyard <wf-rch!minyard@relay.EU.net>
+ *		Fred Baumgarten, <dc6iq@insu1.etec.uni-karlsruhe.de>
+ *		Florian La Roche, <flla@stud.uni-sb.de>
+ *
+ * Fixes:
+ *		Alan Cox	:	To avoid destroying a wait queue as we use it
+ *					we defer destruction until the destroy timer goes
+ *					off.
+ *		Alan Cox	:	Destroy socket doesn't write a status value to the
+ *					socket buffer _AFTER_ freeing it! Also sock ensures
+ *					the socket will get removed BEFORE this is called
+ *					otherwise if the timer TIME_DESTROY occurs inside
+ *					of inet_bh() with this socket being handled it goes
+ *					BOOM! Have to stop timer going off if net_bh is
+ *					active or the destroy causes crashes.
+ *		Alan Cox	:	Cleaned up unused code.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <asm/system.h>
+#include <linux/interrupt.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/tcp.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/arp.h>
+
+void net_delete_timer (struct sock *t)
+{
+	if(t->timer.prev)
+		del_timer (&t->timer);
+	t->timeout = 0;
+}
+
+void net_reset_timer (struct sock *t, int timeout, unsigned long len)
+{
+	t->timeout = timeout;
+	mod_timer(&t->timer, jiffies+len);
+}
+
+/* Now we will only be called whenever we need to do
+ * something, but we must be sure to process all of the
+ * sockets that need it.
+ */
+void net_timer (unsigned long data)
+{
+	struct sock *sk = (struct sock*)data;
+	int why = sk->timeout;
+
+	/* Only process if socket is not in use. */
+	if (atomic_read(&sk->sock_readers)) {
+		/* Try again later. */ 
+		mod_timer(&sk->timer, jiffies+HZ/20);
+		return;
+	}
+
+	/* Always see if we need to send an ack. */
+	if (sk->tp_pinfo.af_tcp.delayed_acks && !sk->zapped) {
+		sk->prot->read_wakeup (sk);
+		if (!sk->dead)
+			sk->data_ready(sk,0);
+	}
+
+	/* Now we need to figure out why the socket was on the timer. */
+	switch (why) {
+		case TIME_DONE:
+			/* If the socket hasn't been closed off, re-try a bit later. */
+			if (!sk->dead) {
+				net_reset_timer(sk, TIME_DONE, TCP_DONE_TIME);
+				break;
+			}
+
+			if (sk->state != TCP_CLOSE) {
+				printk (KERN_DEBUG "non CLOSE socket in time_done\n");
+				break;
+			}
+			destroy_sock (sk);
+			break;
+
+		case TIME_DESTROY:
+			/* We've waited for a while for all the memory associated with
+			 * the socket to be freed.
+			 */
+			destroy_sock(sk);
+			break;
+
+		case TIME_CLOSE:
+			/* We've waited long enough, close the socket. */
+			tcp_set_state(sk, TCP_CLOSE);
+			sk->shutdown = SHUTDOWN_MASK;
+			if (!sk->dead)
+				sk->state_change(sk);
+			net_reset_timer (sk, TIME_DONE, TCP_DONE_TIME);
+			break;
+
+		default:
+			/* I want to see these... */
+			printk ("net_timer: timer expired - reason %d is unknown\n", why);
+			break;
+	}
+}
+
diff --git a/pfinet/linux-src/net/ipv4/udp.c b/pfinet/linux-src/net/ipv4/udp.c
new file mode 100644
index 00000000..909e858f
--- /dev/null
+++ b/pfinet/linux-src/net/ipv4/udp.c
@@ -0,0 +1,1191 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		The User Datagram Protocol (UDP).
+ *
+ * Version:	$Id: udp.c,v 1.66.2.3 1999/08/07 10:56:36 davem Exp $
+ *
+ * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
+ *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
+ *		Alan Cox, <Alan.Cox@linux.org>
+ *
+ * Fixes:
+ *		Alan Cox	:	verify_area() calls
+ *		Alan Cox	: 	stopped close while in use off icmp
+ *					messages. Not a fix but a botch that
+ *					for udp at least is 'valid'.
+ *		Alan Cox	:	Fixed icmp handling properly
+ *		Alan Cox	: 	Correct error for oversized datagrams
+ *		Alan Cox	:	Tidied select() semantics. 
+ *		Alan Cox	:	udp_err() fixed properly, also now 
+ *					select and read wake correctly on errors
+ *		Alan Cox	:	udp_send verify_area moved to avoid mem leak
+ *		Alan Cox	:	UDP can count its memory
+ *		Alan Cox	:	send to an unknown connection causes
+ *					an ECONNREFUSED off the icmp, but
+ *					does NOT close.
+ *		Alan Cox	:	Switched to new sk_buff handlers. No more backlog!
+ *		Alan Cox	:	Using generic datagram code. Even smaller and the PEEK
+ *					bug no longer crashes it.
+ *		Fred Van Kempen	: 	Net2e support for sk->broadcast.
+ *		Alan Cox	:	Uses skb_free_datagram
+ *		Alan Cox	:	Added get/set sockopt support.
+ *		Alan Cox	:	Broadcasting without option set returns EACCES.
+ *		Alan Cox	:	No wakeup calls. Instead we now use the callbacks.
+ *		Alan Cox	:	Use ip_tos and ip_ttl
+ *		Alan Cox	:	SNMP Mibs
+ *		Alan Cox	:	MSG_DONTROUTE, and 0.0.0.0 support.
+ *		Matt Dillon	:	UDP length checks.
+ *		Alan Cox	:	Smarter af_inet used properly.
+ *		Alan Cox	:	Use new kernel side addressing.
+ *		Alan Cox	:	Incorrect return on truncated datagram receive.
+ *	Arnt Gulbrandsen 	:	New udp_send and stuff
+ *		Alan Cox	:	Cache last socket
+ *		Alan Cox	:	Route cache
+ *		Jon Peatfield	:	Minor efficiency fix to sendto().
+ *		Mike Shaver	:	RFC1122 checks.
+ *		Alan Cox	:	Nonblocking error fix.
+ *	Willy Konynenberg	:	Transparent proxying support.
+ *		Mike McLagan	:	Routing by source
+ *		David S. Miller	:	New socket lookup architecture.
+ *					Last socket cache retained as it
+ *					does have a high hit rate.
+ *		Olaf Kirch	:	Don't linearise iovec on sendmsg.
+ *		Andi Kleen	:	Some cleanups, cache destination entry
+ *					for connect. 
+ *	Vitaly E. Lavrov	:	Transparent proxy revived after year coma.
+ *		Melvin Smith	:	Check msg_name not msg_namelen in sendto(),
+ *					return ENOTCONN for unconnected sockets (POSIX)
+ *		Janos Farkas	:	don't deliver multi/broadcasts to a different
+ *					bound-to-device socket
+ *
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+ 
+/* RFC1122 Status:
+   4.1.3.1 (Ports):
+     SHOULD send ICMP_PORT_UNREACHABLE in response to datagrams to 
+       an un-listened port. (OK)
+   4.1.3.2 (IP Options)
+     MUST pass IP options from IP -> application (OK)
+     MUST allow application to specify IP options (OK)
+   4.1.3.3 (ICMP Messages)
+     MUST pass ICMP error messages to application (OK -- except when SO_BSDCOMPAT is set)
+   4.1.3.4 (UDP Checksums)
+     MUST provide facility for checksumming (OK)
+     MAY allow application to control checksumming (OK)
+     MUST default to checksumming on (OK)
+     MUST discard silently datagrams with bad csums (OK, except during debugging)
+   4.1.3.5 (UDP Multihoming)
+     MUST allow application to specify source address (OK)
+     SHOULD be able to communicate the chosen src addr up to application
+       when application doesn't choose (DOES - use recvmsg cmsgs)
+   4.1.3.6 (Invalid Addresses)
+     MUST discard invalid source addresses (OK -- done in the new routing code)
+     MUST only send datagrams with one of our addresses (OK)
+*/
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/timer.h>
+#include <linux/mm.h>
+#include <linux/config.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <net/snmp.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/udp.h>
+#include <net/icmp.h>
+#include <net/route.h>
+#include <net/checksum.h>
+
+/*
+ *	Snmp MIB for the UDP layer
+ */
+
+struct udp_mib		udp_statistics;
+
+struct sock *udp_hash[UDP_HTABLE_SIZE];
+
+/* Shared by v4/v6 udp. */
+int udp_port_rover = 0;
+
+static int udp_v4_get_port(struct sock *sk, unsigned short snum)
+{
+	SOCKHASH_LOCK();
+	if (snum == 0) {
+		int best_size_so_far, best, result, i;
+
+		if (udp_port_rover > sysctl_local_port_range[1] ||
+		    udp_port_rover < sysctl_local_port_range[0])
+			udp_port_rover = sysctl_local_port_range[0];
+		best_size_so_far = 32767;
+		best = result = udp_port_rover;
+		for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
+			struct sock *sk;
+			int size;
+
+			sk = udp_hash[result & (UDP_HTABLE_SIZE - 1)];
+			if (!sk) {
+				if (result > sysctl_local_port_range[1])
+					result = sysctl_local_port_range[0] +
+						((result - sysctl_local_port_range[0]) &
+						 (UDP_HTABLE_SIZE - 1));
+				goto gotit;
+			}
+			size = 0;
+			do {
+				if (++size >= best_size_so_far)
+					goto next;
+			} while ((sk = sk->next) != NULL);
+			best_size_so_far = size;
+			best = result;
+		next:
+		}
+		result = best;
+		for(;; result += UDP_HTABLE_SIZE) {
+			if (result > sysctl_local_port_range[1])
+				result = sysctl_local_port_range[0]
+					+ ((result - sysctl_local_port_range[0]) &
+					   (UDP_HTABLE_SIZE - 1));
+			if (!udp_lport_inuse(result))
+				break;
+		}
+gotit:
+		udp_port_rover = snum = result;
+	} else {
+		struct sock *sk2;
+
+		for (sk2 = udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
+		     sk2 != NULL;
+		     sk2 = sk2->next) {
+			if (sk2->num == snum &&
+			    sk2 != sk &&
+			    sk2->bound_dev_if == sk->bound_dev_if &&
+			    (!sk2->rcv_saddr ||
+			     !sk->rcv_saddr ||
+			     sk2->rcv_saddr == sk->rcv_saddr) &&
+			    (!sk2->reuse || !sk->reuse))
+				goto fail;
+		}
+	}
+	sk->num = snum;
+	SOCKHASH_UNLOCK();
+	return 0;
+
+fail:
+	SOCKHASH_UNLOCK();
+	return 1;
+}
+
+/* Last hit UDP socket cache, this is ipv4 specific so make it static. */
+static u32 uh_cache_saddr, uh_cache_daddr;
+static u16 uh_cache_dport, uh_cache_sport;
+static struct sock *uh_cache_sk = NULL;
+
+static void udp_v4_hash(struct sock *sk)
+{
+	struct sock **skp = &udp_hash[sk->num & (UDP_HTABLE_SIZE - 1)];
+
+	SOCKHASH_LOCK();
+	if ((sk->next = *skp) != NULL)
+		(*skp)->pprev = &sk->next;
+	*skp = sk;
+	sk->pprev = skp;
+	SOCKHASH_UNLOCK();
+}
+
+static void udp_v4_unhash(struct sock *sk)
+{
+	SOCKHASH_LOCK();
+	if (sk->pprev) {
+		if (sk->next)
+			sk->next->pprev = sk->pprev;
+		*sk->pprev = sk->next;
+		sk->pprev = NULL;
+		if(uh_cache_sk == sk)
+			uh_cache_sk = NULL;
+	}
+	SOCKHASH_UNLOCK();
+}
+
+/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
+ * harder than this here plus the last hit cache. -DaveM
+ */
+struct sock *udp_v4_lookup_longway(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif)
+{
+	struct sock *sk, *result = NULL;
+	unsigned short hnum = ntohs(dport);
+	int badness = -1;
+
+	for(sk = udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]; sk != NULL; sk = sk->next) {
+		if((sk->num == hnum) && !(sk->dead && (sk->state == TCP_CLOSE))) {
+			int score = 0;
+			if(sk->rcv_saddr) {
+				if(sk->rcv_saddr != daddr)
+					continue;
+				score++;
+			}
+			if(sk->daddr) {
+				if(sk->daddr != saddr)
+					continue;
+				score++;
+			}
+			if(sk->dport) {
+				if(sk->dport != sport)
+					continue;
+				score++;
+			}
+			if(sk->bound_dev_if) {
+				if(sk->bound_dev_if != dif)
+					continue;
+				score++;
+			}
+			if(score == 4) {
+				result = sk;
+				break;
+			} else if(score > badness) {
+				result = sk;
+				badness = score;
+			}
+		}
+	}
+	return result;
+}
+
+__inline__ struct sock *udp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif)
+{
+	struct sock *sk;
+
+	if(!dif && uh_cache_sk		&&
+	   uh_cache_saddr == saddr	&&
+	   uh_cache_sport == sport	&&
+	   uh_cache_dport == dport	&&
+	   uh_cache_daddr == daddr)
+		return uh_cache_sk;
+
+	sk = udp_v4_lookup_longway(saddr, sport, daddr, dport, dif);
+	if(!dif) {
+		uh_cache_sk	= sk;
+		uh_cache_saddr	= saddr;
+		uh_cache_daddr	= daddr;
+		uh_cache_sport	= sport;
+		uh_cache_dport	= dport;
+	}
+	return sk;
+}
+
+#ifdef CONFIG_IP_TRANSPARENT_PROXY
+#define secondlist(hpnum, sk, fpass) \
+({ struct sock *s1; if(!(sk) && (fpass)--) \
+	s1 = udp_hash[(hpnum) & (UDP_HTABLE_SIZE - 1)]; \
+   else \
+	s1 = (sk); \
+   s1; \
+})
+
+#define udp_v4_proxy_loop_init(hnum, hpnum, sk, fpass) \
+	secondlist((hpnum), udp_hash[(hnum)&(UDP_HTABLE_SIZE-1)],(fpass))
+
+#define udp_v4_proxy_loop_next(hnum, hpnum, sk, fpass) \
+	secondlist((hpnum),(sk)->next,(fpass))
+
+static struct sock *udp_v4_proxy_lookup(unsigned short num, unsigned long raddr,
+					unsigned short rnum, unsigned long laddr,
+					struct device *dev, unsigned short pnum,
+					int dif)
+{
+	struct sock *s, *result = NULL;
+	int badness = -1;
+	u32 paddr = 0;
+	unsigned short hnum = ntohs(num);
+	unsigned short hpnum = ntohs(pnum);
+	int firstpass = 1;
+
+	if(dev && dev->ip_ptr) {
+		struct in_device *idev = dev->ip_ptr;
+
+		if(idev->ifa_list)
+			paddr = idev->ifa_list->ifa_local;
+	}
+
+	SOCKHASH_LOCK();
+	for(s = udp_v4_proxy_loop_init(hnum, hpnum, s, firstpass);
+	    s != NULL;
+	    s = udp_v4_proxy_loop_next(hnum, hpnum, s, firstpass)) {
+		if(s->num == hnum || s->num == hpnum) {
+			int score = 0;
+			if(s->dead && (s->state == TCP_CLOSE))
+				continue;
+			if(s->rcv_saddr) {
+				if((s->num != hpnum || s->rcv_saddr != paddr) &&
+				   (s->num != hnum || s->rcv_saddr != laddr))
+					continue;
+				score++;
+			}
+			if(s->daddr) {
+				if(s->daddr != raddr)
+					continue;
+				score++;
+			}
+			if(s->dport) {
+				if(s->dport != rnum)
+					continue;
+				score++;
+			}
+			if(s->bound_dev_if) {
+				if(s->bound_dev_if != dif)
+					continue;
+				score++;
+			}
+			if(score == 4 && s->num == hnum) {
+				result = s;
+				break;
+			} else if(score > badness && (s->num == hpnum || s->rcv_saddr)) {
+					result = s;
+					badness = score;
+			}
+		}
+	}
+	SOCKHASH_UNLOCK();
+	return result;
+}
+
+#undef secondlist
+#undef udp_v4_proxy_loop_init
+#undef udp_v4_proxy_loop_next
+
+#endif
+
+static inline struct sock *udp_v4_mcast_next(struct sock *sk,
+					     unsigned short num,
+					     unsigned long raddr,
+					     unsigned short rnum,
+					     unsigned long laddr,
+					     int dif)
+{
+	struct sock *s = sk;
+	unsigned short hnum = ntohs(num);
+	for(; s; s = s->next) {
+		if ((s->num != hnum)					||
+		    (s->dead && (s->state == TCP_CLOSE))		||
+		    (s->daddr && s->daddr!=raddr)			||
+		    (s->dport != rnum && s->dport != 0)			||
+		    (s->rcv_saddr  && s->rcv_saddr != laddr)		||
+		    (s->bound_dev_if && s->bound_dev_if != dif))
+			continue;
+		break;
+  	}
+  	return s;
+}
+
+/*
+ * This routine is called by the ICMP module when it gets some
+ * sort of error condition.  If err < 0 then the socket should
+ * be closed and the error returned to the user.  If err > 0
+ * it's just the icmp type << 8 | icmp code.  
+ * Header points to the ip header of the error packet. We move
+ * on past this. Then (as it used to claim before adjustment)
+ * header points to the first 8 bytes of the udp header.  We need
+ * to find the appropriate port.
+ */
+
+void udp_err(struct sk_buff *skb, unsigned char *dp, int len)
+{
+	struct iphdr *iph = (struct iphdr*)dp;
+	struct udphdr *uh = (struct udphdr*)(dp+(iph->ihl<<2));
+	int type = skb->h.icmph->type;
+	int code = skb->h.icmph->code;
+	struct sock *sk;
+	int harderr;
+	u32 info;
+	int err;
+
+	if (len < (iph->ihl<<2)+sizeof(struct udphdr)) {
+		icmp_statistics.IcmpInErrors++;
+		return;
+	}
+
+	sk = udp_v4_lookup(iph->daddr, uh->dest, iph->saddr, uh->source, skb->dev->ifindex);
+	if (sk == NULL) {
+		icmp_statistics.IcmpInErrors++;
+    	  	return;	/* No socket for error */
+	}
+
+	err = 0;
+	info = 0;
+	harderr = 0;
+
+	switch (type) {
+	default:
+	case ICMP_TIME_EXCEEDED:
+		err = EHOSTUNREACH;
+		break;
+	case ICMP_SOURCE_QUENCH:
+		return;
+	case ICMP_PARAMETERPROB:
+		err = EPROTO;
+		info = ntohl(skb->h.icmph->un.gateway)>>24;
+		harderr = 1;
+		break;
+	case ICMP_DEST_UNREACH:
+		if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
+			if (sk->ip_pmtudisc != IP_PMTUDISC_DONT) {
+				err = EMSGSIZE;
+				info = ntohs(skb->h.icmph->un.frag.mtu);
+				harderr = 1;
+				break;
+			}
+			return;
+		}
+		err = EHOSTUNREACH;
+		if (code <= NR_ICMP_UNREACH) {
+			harderr = icmp_err_convert[code].fatal;
+			err = icmp_err_convert[code].errno;
+		}
+		break;
+	}
+
+	/*
+	 *	Various people wanted BSD UDP semantics. Well they've come 
+	 *	back out because they slow down response to stuff like dead
+	 *	or unreachable name servers and they screw term users something
+	 *	chronic. Oh and it violates RFC1122. So basically fix your 
+	 *	client code people.
+	 */
+	 
+	/*
+	 *      RFC1122: OK.  Passes ICMP errors back to application, as per 
+	 *	4.1.3.3. After the comment above, that should be no surprise. 
+	 */
+
+	if (!harderr && !sk->ip_recverr)
+		return;
+
+	/*
+	 *	4.x BSD compatibility item. Break RFC1122 to
+	 *	get BSD socket semantics.
+	 */
+	if(sk->bsdism && sk->state!=TCP_ESTABLISHED)
+		return;
+
+	if (sk->ip_recverr)
+		ip_icmp_error(sk, skb, err, uh->dest, info, (u8*)(uh+1));
+	sk->err = err;
+	sk->error_report(sk);
+}
+
+
+static unsigned short udp_check(struct udphdr *uh, int len, unsigned long saddr, unsigned long daddr, unsigned long base)
+{
+	return(csum_tcpudp_magic(saddr, daddr, len, IPPROTO_UDP, base));
+}
+
+struct udpfakehdr 
+{
+	struct udphdr uh;
+	u32 saddr;
+	u32 daddr;
+	struct iovec *iov;
+	u32 wcheck;
+};
+
+/*
+ *	Copy and checksum a UDP packet from user space into a buffer. We still have
+ *	to do the planning to get ip_build_xmit to spot direct transfer to network
+ *	card and provide an additional callback mode for direct user->board I/O
+ *	transfers. That one will be fun.
+ */
+ 
+static int udp_getfrag(const void *p, char * to, unsigned int offset, unsigned int fraglen) 
+{
+	struct udpfakehdr *ufh = (struct udpfakehdr *)p;
+	if (offset==0) {
+		if (csum_partial_copy_fromiovecend(to+sizeof(struct udphdr), ufh->iov, offset,
+						   fraglen-sizeof(struct udphdr), &ufh->wcheck))
+			return -EFAULT;
+ 		ufh->wcheck = csum_partial((char *)ufh, sizeof(struct udphdr),
+					   ufh->wcheck);
+		ufh->uh.check = csum_tcpudp_magic(ufh->saddr, ufh->daddr, 
+					  ntohs(ufh->uh.len),
+					  IPPROTO_UDP, ufh->wcheck);
+		if (ufh->uh.check == 0)
+			ufh->uh.check = -1;
+		memcpy(to, ufh, sizeof(struct udphdr));
+		return 0;
+	}
+	if (csum_partial_copy_fromiovecend(to, ufh->iov, offset-sizeof(struct udphdr),
+					   fraglen, &ufh->wcheck))
+		return -EFAULT;
+	return 0;
+}
+
+/*
+ *	Unchecksummed UDP is sufficiently critical to stuff like ATM video conferencing
+ *	that we use two routines for this for speed. Probably we ought to have a
+ *	CONFIG_FAST_NET set for >10Mb/second boards to activate this sort of coding.
+ *	Timing needed to verify if this is a valid decision.
+ */
+ 
+static int udp_getfrag_nosum(const void *p, char * to, unsigned int offset, unsigned int fraglen) 
+{
+	struct udpfakehdr *ufh = (struct udpfakehdr *)p;
+
+	if (offset==0) {
+		memcpy(to, ufh, sizeof(struct udphdr));
+		return memcpy_fromiovecend(to+sizeof(struct udphdr), ufh->iov, offset,
+					   fraglen-sizeof(struct udphdr));
+	}
+	return memcpy_fromiovecend(to, ufh->iov, offset-sizeof(struct udphdr),
+				   fraglen);
+}
+
+int udp_sendmsg(struct sock *sk, struct msghdr *msg, int len)
+{
+	int ulen = len + sizeof(struct udphdr);
+	struct ipcm_cookie ipc;
+	struct udpfakehdr ufh;
+	struct rtable *rt = NULL;
+	int free = 0;
+	int connected = 0;
+	u32 daddr;
+	u8  tos;
+	int err;
+
+	/* This check is ONLY to check for arithmetic overflow
+	   on integer(!) len. Not more! Real check will be made
+	   in ip_build_xmit --ANK
+
+	   BTW socket.c -> af_*.c -> ... make multiple
+	   invalid conversions size_t -> int. We MUST repair it f.e.
+	   by replacing all of them with size_t and revise all
+	   the places sort of len += sizeof(struct iphdr)
+	   If len was ULONG_MAX-10 it would be cathastrophe  --ANK
+	 */
+
+	if (len < 0 || len > 0xFFFF)
+		return -EMSGSIZE;
+
+	/* 
+	 *	Check the flags.
+	 */
+
+	if (msg->msg_flags&MSG_OOB)	/* Mirror BSD error message compatibility */
+		return -EOPNOTSUPP;
+
+#ifdef CONFIG_IP_TRANSPARENT_PROXY
+	if (msg->msg_flags&~(MSG_DONTROUTE|MSG_DONTWAIT|MSG_PROXY|MSG_NOSIGNAL))
+	  	return -EINVAL;
+	if ((msg->msg_flags&MSG_PROXY) && !capable(CAP_NET_ADMIN))
+	  	return -EPERM;
+#else
+	if (msg->msg_flags&~(MSG_DONTROUTE|MSG_DONTWAIT|MSG_NOSIGNAL))
+	  	return -EINVAL;
+#endif
+
+	/*
+	 *	Get and verify the address. 
+	 */
+	 
+	if (msg->msg_name) {
+		struct sockaddr_in * usin = (struct sockaddr_in*)msg->msg_name;
+		if (msg->msg_namelen < sizeof(*usin))
+			return(-EINVAL);
+		if (usin->sin_family != AF_INET) {
+			static int complained;
+			if (!complained++)
+				printk(KERN_WARNING "%s forgot to set AF_INET in udp sendmsg. Fix it!\n", current->comm);
+			if (usin->sin_family)
+				return -EINVAL;
+		}
+		ufh.daddr = usin->sin_addr.s_addr;
+		ufh.uh.dest = usin->sin_port;
+		if (ufh.uh.dest == 0)
+			return -EINVAL;
+	} else {
+		if (sk->state != TCP_ESTABLISHED)
+			return -ENOTCONN;
+		ufh.daddr = sk->daddr;
+		ufh.uh.dest = sk->dport;
+		/* Open fast path for connected socket.
+		   Route will not be used, if at least one option is set.
+		 */
+		connected = 1;
+  	}
+#ifdef CONFIG_IP_TRANSPARENT_PROXY
+	if (msg->msg_flags&MSG_PROXY) {
+		/*
+		 * We map the first 8 bytes of a second sockaddr_in
+		 * into the last 8 (unused) bytes of a sockaddr_in.
+		 */
+		struct sockaddr_in *from = (struct sockaddr_in *)msg->msg_name;
+		from = (struct sockaddr_in *)&from->sin_zero;
+		if (from->sin_family != AF_INET)
+			return -EINVAL;
+		ipc.addr = from->sin_addr.s_addr;
+		ufh.uh.source = from->sin_port;
+		if (ipc.addr == 0)
+			ipc.addr = sk->saddr;
+		connected = 0;
+	} else
+#endif
+	{
+		ipc.addr = sk->saddr;
+		ufh.uh.source = sk->sport;
+	}
+
+	ipc.opt = NULL;
+	ipc.oif = sk->bound_dev_if;
+	if (msg->msg_controllen) {
+		err = ip_cmsg_send(msg, &ipc);
+		if (err)
+			return err;
+		if (ipc.opt)
+			free = 1;
+		connected = 0;
+	}
+	if (!ipc.opt)
+		ipc.opt = sk->opt;
+
+	ufh.saddr = ipc.addr;
+	ipc.addr = daddr = ufh.daddr;
+
+	if (ipc.opt && ipc.opt->srr) {
+		if (!daddr)
+			return -EINVAL;
+		daddr = ipc.opt->faddr;
+		connected = 0;
+	}
+	tos = RT_TOS(sk->ip_tos);
+	if (sk->localroute || (msg->msg_flags&MSG_DONTROUTE) || 
+	    (ipc.opt && ipc.opt->is_strictroute)) {
+		tos |= RTO_ONLINK;
+		connected = 0;
+	}
+
+	if (MULTICAST(daddr)) {
+		if (!ipc.oif)
+			ipc.oif = sk->ip_mc_index;
+		if (!ufh.saddr)
+			ufh.saddr = sk->ip_mc_addr;
+		connected = 0;
+	}
+
+	if (connected && sk->dst_cache) {
+		rt = (struct rtable*)sk->dst_cache;
+		if (rt->u.dst.obsolete) {
+			sk->dst_cache = NULL;
+			dst_release(&rt->u.dst);
+			rt = NULL;
+		} else
+			dst_clone(&rt->u.dst);
+	}
+
+	if (rt == NULL) {
+		err = ip_route_output(&rt, daddr, ufh.saddr,
+#ifdef CONFIG_IP_TRANSPARENT_PROXY
+			(msg->msg_flags&MSG_PROXY ? RTO_TPROXY : 0) |
+#endif
+			 tos, ipc.oif);
+		if (err) 
+			goto out;
+
+		err = -EACCES;
+		if (rt->rt_flags&RTCF_BROADCAST && !sk->broadcast) 
+			goto out;
+		if (connected && sk->dst_cache == NULL)
+			sk->dst_cache = dst_clone(&rt->u.dst);
+	}
+
+	ufh.saddr = rt->rt_src;
+	if (!ipc.addr)
+		ufh.daddr = ipc.addr = rt->rt_dst;
+	ufh.uh.len = htons(ulen);
+	ufh.uh.check = 0;
+	ufh.iov = msg->msg_iov;
+	ufh.wcheck = 0;
+
+	/* RFC1122: OK.  Provides the checksumming facility (MUST) as per */
+	/* 4.1.3.4. It's configurable by the application via setsockopt() */
+	/* (MAY) and it defaults to on (MUST). */
+
+	err = ip_build_xmit(sk,sk->no_check ? udp_getfrag_nosum : udp_getfrag,
+			    &ufh, ulen, &ipc, rt, msg->msg_flags);
+
+out:
+	ip_rt_put(rt);
+	if (free)
+		kfree(ipc.opt);
+	if (!err) {
+		udp_statistics.UdpOutDatagrams++;
+		return len;
+	}
+	return err;
+}
+
+/*
+ *	IOCTL requests applicable to the UDP protocol
+ */
+ 
+int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
+{
+	switch(cmd) 
+	{
+		case TIOCOUTQ:
+		{
+			unsigned long amount;
+
+			amount = sock_wspace(sk);
+			return put_user(amount, (int *)arg);
+		}
+
+		case TIOCINQ:
+		{
+			struct sk_buff *skb;
+			unsigned long amount;
+
+			amount = 0;
+			/* N.B. Is this interrupt safe??
+			   -> Yes. Interrupts do not remove skbs. --ANK (980725)
+			 */
+			skb = skb_peek(&sk->receive_queue);
+			if (skb != NULL) {
+				/*
+				 * We will only return the amount
+				 * of this packet since that is all
+				 * that will be read.
+				 */
+				amount = skb->len - sizeof(struct udphdr);
+			}
+			return put_user(amount, (int *)arg);
+		}
+
+		default:
+			return(-ENOIOCTLCMD);
+	}
+	return(0);
+}
+
+#ifndef HAVE_CSUM_COPY_USER
+#undef CONFIG_UDP_DELAY_CSUM
+#endif
+
+/*
+ * 	This should be easy, if there is something there we
+ * 	return it, otherwise we block.
+ */
+
+int udp_recvmsg(struct sock *sk, struct msghdr *msg, int len,
+		int noblock, int flags, int *addr_len)
+{
+  	struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
+  	struct sk_buff *skb;
+  	int copied, err;
+
+	if (flags & MSG_ERRQUEUE)
+		return ip_recv_error(sk, msg, len);
+
+	/*
+	 *	From here the generic datagram does a lot of the work. Come
+	 *	the finished NET3, it will do _ALL_ the work!
+	 */
+
+	skb = skb_recv_datagram(sk, flags, noblock, &err);
+	if (!skb)
+		goto out;
+  
+  	copied = skb->len - sizeof(struct udphdr);
+	if (copied > len) {
+		copied = len;
+		msg->msg_flags |= MSG_TRUNC;
+	}
+
+#ifndef CONFIG_UDP_DELAY_CSUM
+	err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
+					copied);
+#else
+	if (skb->ip_summed==CHECKSUM_UNNECESSARY) {
+		err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
+					      copied);
+	} else if (copied > msg->msg_iov[0].iov_len || (msg->msg_flags&MSG_TRUNC)) {
+		if ((unsigned short)csum_fold(csum_partial(skb->h.raw, skb->len, skb->csum))) 
+			goto csum_copy_err;
+		err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
+					      copied);
+	} else {
+		unsigned int csum;
+
+		err = 0;
+		csum = csum_partial(skb->h.raw, sizeof(struct udphdr), skb->csum);
+		csum = csum_and_copy_to_user((char*)&skb->h.uh[1], msg->msg_iov[0].iov_base, 
+					     copied, csum, &err);
+		if (err)
+			goto out_free;
+		if ((unsigned short)csum_fold(csum)) 
+			goto csum_copy_err;
+	}
+#endif
+	if (err)
+		goto out_free;
+	sk->stamp=skb->stamp;
+
+	/* Copy the address. */
+	if (sin)
+	{
+		/*
+		 *	Check any passed addresses
+		 */
+		if (addr_len) 
+			*addr_len=sizeof(*sin);
+
+		sin->sin_family = AF_INET;
+		sin->sin_port = skb->h.uh->source;
+		sin->sin_addr.s_addr = skb->nh.iph->saddr;
+#ifdef CONFIG_IP_TRANSPARENT_PROXY
+		if (flags&MSG_PROXY)
+		{
+			/*
+			 * We map the first 8 bytes of a second sockaddr_in
+			 * into the last 8 (unused) bytes of a sockaddr_in.
+			 * This _is_ ugly, but it's the only way to do it
+			 * easily,  without adding system calls.
+			 */
+			struct sockaddr_in *sinto =
+				(struct sockaddr_in *) sin->sin_zero;
+
+			sinto->sin_family = AF_INET;
+			sinto->sin_port = skb->h.uh->dest;
+			sinto->sin_addr.s_addr = skb->nh.iph->daddr;
+		}
+#endif
+  	}
+	if (sk->ip_cmsg_flags)
+		ip_cmsg_recv(msg, skb);
+	err = copied;
+  
+out_free:
+  	skb_free_datagram(sk, skb);
+out:
+  	return err;
+
+#ifdef CONFIG_UDP_DELAY_CSUM
+csum_copy_err:
+	udp_statistics.UdpInErrors++;
+	skb_free_datagram(sk, skb);
+
+	/* 
+	 * Error for blocking case is chosen to masquerade
+   	 * as some normal condition.
+	 */
+	return (flags&MSG_DONTWAIT) ? -EAGAIN : -EHOSTUNREACH;	
+#endif
+}
+
+int udp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+{
+	struct sockaddr_in *usin = (struct sockaddr_in *) uaddr;
+	struct rtable *rt;
+	int err;
+
+	
+	if (addr_len < sizeof(*usin)) 
+	  	return(-EINVAL);
+
+	/*
+	 *	1003.1g - break association.
+	 */
+	 
+	if (usin->sin_family==AF_UNSPEC)
+	{
+		sk->saddr=INADDR_ANY;
+		sk->rcv_saddr=INADDR_ANY;
+		sk->daddr=INADDR_ANY;
+		sk->state = TCP_CLOSE;
+		if(uh_cache_sk == sk)
+			uh_cache_sk = NULL;
+		return 0;
+	}
+
+	if (usin->sin_family && usin->sin_family != AF_INET) 
+	  	return(-EAFNOSUPPORT);
+
+	dst_release(xchg(&sk->dst_cache, NULL));
+
+	err = ip_route_connect(&rt, usin->sin_addr.s_addr, sk->saddr,
+			       sk->ip_tos|sk->localroute, sk->bound_dev_if);
+	if (err)
+		return err;
+	if ((rt->rt_flags&RTCF_BROADCAST) && !sk->broadcast) {
+		ip_rt_put(rt);
+		return -EACCES;
+	}
+  	if(!sk->saddr)
+	  	sk->saddr = rt->rt_src;		/* Update source address */
+	if(!sk->rcv_saddr)
+		sk->rcv_saddr = rt->rt_src;
+	sk->daddr = rt->rt_dst;
+	sk->dport = usin->sin_port;
+	sk->state = TCP_ESTABLISHED;
+
+	if(uh_cache_sk == sk)
+		uh_cache_sk = NULL;
+
+	sk->dst_cache = &rt->u.dst;
+	return(0);
+}
+
+
+static void udp_close(struct sock *sk, long timeout)
+{
+	/* See for explanation: raw_close in ipv4/raw.c */
+	sk->state = TCP_CLOSE;
+	udp_v4_unhash(sk);
+	sk->dead = 1;
+	destroy_sock(sk);
+}
+
+static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
+{
+	/*
+	 *	Charge it to the socket, dropping if the queue is full.
+	 */
+
+#if defined(CONFIG_FILTER) && defined(CONFIG_UDP_DELAY_CSUM)
+	if (sk->filter && skb->ip_summed != CHECKSUM_UNNECESSARY) {
+		if ((unsigned short)csum_fold(csum_partial(skb->h.raw, skb->len, skb->csum))) {
+			udp_statistics.UdpInErrors++;
+			ip_statistics.IpInDiscards++;
+			ip_statistics.IpInDelivers--;
+			kfree_skb(skb);
+			return -1;
+		}
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	}
+#endif
+
+	if (sock_queue_rcv_skb(sk,skb)<0) {
+		udp_statistics.UdpInErrors++;
+		ip_statistics.IpInDiscards++;
+		ip_statistics.IpInDelivers--;
+		kfree_skb(skb);
+		return -1;
+	}
+	udp_statistics.UdpInDatagrams++;
+	return 0;
+}
+
+
+static inline void udp_deliver(struct sock *sk, struct sk_buff *skb)
+{
+	udp_queue_rcv_skb(sk, skb);
+}
+
+/*
+ *	Multicasts and broadcasts go to each listener.
+ *
+ *	Note: called only from the BH handler context,
+ *	so we don't need to lock the hashes.
+ */
+static int udp_v4_mcast_deliver(struct sk_buff *skb, struct udphdr *uh,
+				 u32 saddr, u32 daddr)
+{
+	struct sock *sk;
+	int dif;
+
+	sk = udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)];
+	dif = skb->dev->ifindex;
+	sk = udp_v4_mcast_next(sk, uh->dest, saddr, uh->source, daddr, dif);
+	if (sk) {
+		struct sock *sknext = NULL;
+
+		do {
+			struct sk_buff *skb1 = skb;
+
+			sknext = udp_v4_mcast_next(sk->next, uh->dest, saddr,
+						   uh->source, daddr, dif);
+			if(sknext)
+				skb1 = skb_clone(skb, GFP_ATOMIC);
+
+			if(skb1)
+				udp_deliver(sk, skb1);
+			sk = sknext;
+		} while(sknext);
+	} else
+		kfree_skb(skb);
+	return 0;
+}
+
+#ifdef CONFIG_IP_TRANSPARENT_PROXY
+/*
+ *	Check whether a received UDP packet might be for one of our
+ *	sockets.
+ */
+
+int udp_chkaddr(struct sk_buff *skb)
+{
+	struct iphdr *iph = skb->nh.iph;
+	struct udphdr *uh = (struct udphdr *)(skb->nh.raw + iph->ihl*4);
+	struct sock *sk;
+
+	sk = udp_v4_lookup(iph->saddr, uh->source, iph->daddr, uh->dest, skb->dev->ifindex);
+	if (!sk)
+		return 0;
+
+	/* 0 means accept all LOCAL addresses here, not all the world... */
+	if (sk->rcv_saddr == 0)
+		return 0;
+
+	return 1;
+}
+#endif
+
+/*
+ *	All we need to do is get the socket, and then do a checksum. 
+ */
+ 
+int udp_rcv(struct sk_buff *skb, unsigned short len)
+{
+  	struct sock *sk;
+  	struct udphdr *uh;
+	unsigned short ulen;
+	struct rtable *rt = (struct rtable*)skb->dst;
+	u32 saddr = skb->nh.iph->saddr;
+	u32 daddr = skb->nh.iph->daddr;
+
+	/*
+	 * First time through the loop.. Do all the setup stuff
+	 * (including finding out the socket we go to etc)
+	 */
+
+	/*
+	 *	Get the header.
+	 */
+	 
+  	uh = skb->h.uh;
+	__skb_pull(skb, skb->h.raw - skb->data);
+
+  	ip_statistics.IpInDelivers++;
+
+	/*
+	 *	Validate the packet and the UDP length.
+	 */
+	 
+	ulen = ntohs(uh->len);
+
+	if (ulen > len || ulen < sizeof(*uh)) {
+		NETDEBUG(printk(KERN_DEBUG "UDP: short packet: %d/%d\n", ulen, len));
+		udp_statistics.UdpInErrors++;
+		kfree_skb(skb);
+		return(0);
+	}
+	skb_trim(skb, ulen);
+
+#ifndef CONFIG_UDP_DELAY_CSUM
+	if (uh->check &&
+	    (((skb->ip_summed==CHECKSUM_HW)&&udp_check(uh,ulen,saddr,daddr,skb->csum)) ||
+	     ((skb->ip_summed==CHECKSUM_NONE) &&
+	      (udp_check(uh,ulen,saddr,daddr, csum_partial((char*)uh, ulen, 0)))))) 
+		goto csum_error;
+#else
+	if (uh->check==0)
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	else if (skb->ip_summed==CHECKSUM_HW) {
+		if (udp_check(uh,ulen,saddr,daddr,skb->csum)) 
+			goto csum_error;
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	} else if (skb->ip_summed != CHECKSUM_UNNECESSARY)
+		skb->csum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
+#endif
+
+	if(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
+		return udp_v4_mcast_deliver(skb, uh, saddr, daddr);
+
+#ifdef CONFIG_IP_TRANSPARENT_PROXY
+	if (IPCB(skb)->redirport)
+		sk = udp_v4_proxy_lookup(uh->dest, saddr, uh->source,
+					 daddr, skb->dev, IPCB(skb)->redirport,
+					 skb->dev->ifindex);
+	else
+#endif
+	sk = udp_v4_lookup(saddr, uh->source, daddr, uh->dest, skb->dev->ifindex);
+	
+	if (sk == NULL) {
+#ifdef CONFIG_UDP_DELAY_CSUM
+		if (skb->ip_summed != CHECKSUM_UNNECESSARY &&
+		    (unsigned short)csum_fold(csum_partial((char*)uh, ulen, skb->csum))) 
+			goto csum_error;
+#endif
+  		udp_statistics.UdpNoPorts++;
+		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
+
+		/*
+		 * Hmm.  We got an UDP broadcast to a port to which we
+		 * don't wanna listen.  Ignore it.
+		 */
+		kfree_skb(skb);
+		return(0);
+  	}
+	udp_deliver(sk, skb);
+	return 0;
+
+csum_error:
+	/* 
+	 * RFC1122: OK.  Discards the bad packet silently (as far as 
+	 * the network is concerned, anyway) as per 4.1.3.4 (MUST). 
+	 */
+	NETDEBUG(printk(KERN_DEBUG "UDP: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n",
+			NIPQUAD(saddr),
+			ntohs(uh->source),
+			NIPQUAD(daddr),
+			ntohs(uh->dest),
+			ulen));
+	udp_statistics.UdpInErrors++;
+	kfree_skb(skb);
+	return(0);
+}
+
+struct proto udp_prot = {
+	(struct sock *)&udp_prot,	/* sklist_next */
+	(struct sock *)&udp_prot,	/* sklist_prev */
+	udp_close,			/* close */
+	udp_connect,			/* connect */
+	NULL,				/* accept */
+	NULL,				/* retransmit */
+	NULL,				/* write_wakeup */
+	NULL,				/* read_wakeup */
+	datagram_poll,			/* poll */
+	udp_ioctl,			/* ioctl */
+	NULL,				/* init */
+	NULL,				/* destroy */
+	NULL,				/* shutdown */
+	ip_setsockopt,			/* setsockopt */
+	ip_getsockopt,			/* getsockopt */
+	udp_sendmsg,			/* sendmsg */
+	udp_recvmsg,			/* recvmsg */
+	NULL,				/* bind */
+	udp_queue_rcv_skb,		/* backlog_rcv */
+	udp_v4_hash,			/* hash */
+	udp_v4_unhash,			/* unhash */
+	udp_v4_get_port,		/* good_socknum */
+	128,				/* max_header */
+	0,				/* retransmits */
+ 	"UDP",				/* name */
+	0,				/* inuse */
+	0				/* highestinuse */
+};
diff --git a/pfinet/linux-src/net/ipv4/utils.c b/pfinet/linux-src/net/ipv4/utils.c
new file mode 100644
index 00000000..ce74ade2
--- /dev/null
+++ b/pfinet/linux-src/net/ipv4/utils.c
@@ -0,0 +1,91 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		Various kernel-resident INET utility functions; mainly
+ *		for format conversion and debugging output.
+ *
+ * Version:	$Id: utils.c,v 1.6 1997/12/13 21:53:03 kuznet Exp $
+ *
+ * Author:	Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *
+ * Fixes:
+ *		Alan Cox	:	verify_area check.
+ *		Alan Cox	:	removed old debugging.
+ *		Andi Kleen	:	add net_ratelimit()  
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/stat.h>
+#include <stdarg.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/tcp.h>
+#include <linux/skbuff.h>
+
+
+/*
+ *	Display an IP address in readable format. 
+ */
+ 
+char *in_ntoa(__u32 in)
+{
+	static char buff[18];
+	char *p;
+
+	p = (char *) &in;
+	sprintf(buff, "%d.%d.%d.%d",
+		(p[0] & 255), (p[1] & 255), (p[2] & 255), (p[3] & 255));
+	return(buff);
+}
+
+
+/*
+ *	Convert an ASCII string to binary IP. 
+ */
+ 
+__u32 in_aton(const char *str)
+{
+	unsigned long l;
+	unsigned int val;
+	int i;
+
+	l = 0;
+	for (i = 0; i < 4; i++) 
+	{
+		l <<= 8;
+		if (*str != '\0') 
+		{
+			val = 0;
+			while (*str != '\0' && *str != '.') 
+			{
+				val *= 10;
+				val += *str - '0';
+				str++;
+			}
+			l |= val;
+			if (*str != '\0') 
+				str++;
+		}
+	}
+	return(htonl(l));
+}
+