summaryrefslogtreecommitdiff
path: root/pfinet/linux-src/net/core
diff options
context:
space:
mode:
authorRoland McGrath <roland@gnu.org>2000-02-04 03:21:18 +0000
committerRoland McGrath <roland@gnu.org>2000-02-04 03:21:18 +0000
commit9fd51e9b0ad33a89a83fdbbb66bd20d85f7893fb (patch)
tree8845b79f170028cb4380045c50277bbf075b5b7d /pfinet/linux-src/net/core
Import of Linux 2.2.12 subset (ipv4 stack and related)
Diffstat (limited to 'pfinet/linux-src/net/core')
-rw-r--r--pfinet/linux-src/net/core/Makefile41
-rw-r--r--pfinet/linux-src/net/core/datagram.c249
-rw-r--r--pfinet/linux-src/net/core/dev.c2026
-rw-r--r--pfinet/linux-src/net/core/dev_mcast.c252
-rw-r--r--pfinet/linux-src/net/core/dst.c145
-rw-r--r--pfinet/linux-src/net/core/filter.c454
-rw-r--r--pfinet/linux-src/net/core/firewall.c160
-rw-r--r--pfinet/linux-src/net/core/iovec.c278
-rw-r--r--pfinet/linux-src/net/core/neighbour.c1394
-rw-r--r--pfinet/linux-src/net/core/profile.c305
-rw-r--r--pfinet/linux-src/net/core/rtnetlink.c512
-rw-r--r--pfinet/linux-src/net/core/scm.c280
-rw-r--r--pfinet/linux-src/net/core/skbuff.c385
-rw-r--r--pfinet/linux-src/net/core/sock.c1051
-rw-r--r--pfinet/linux-src/net/core/sysctl_net_core.c61
-rw-r--r--pfinet/linux-src/net/core/utils.c66
16 files changed, 7659 insertions, 0 deletions
diff --git a/pfinet/linux-src/net/core/Makefile b/pfinet/linux-src/net/core/Makefile
new file mode 100644
index 00000000..5df65cd2
--- /dev/null
+++ b/pfinet/linux-src/net/core/Makefile
@@ -0,0 +1,41 @@
+#
+# Makefile for the Linux networking core.
+#
+# Note! Dependencies are done automagically by 'make dep', which also
+# removes any old dependencies. DON'T put your own dependencies here
+# unless it's something special (ie not a .c file).
+#
+# Note 2! The CFLAGS definition is now in the main makefile...
+
+O_TARGET := core.o
+
+O_OBJS := sock.o skbuff.o iovec.o datagram.o scm.o
+
+ifeq ($(CONFIG_SYSCTL),y)
+ifeq ($(CONFIG_NET),y)
+O_OBJS += sysctl_net_core.o
+endif
+endif
+
+ifdef CONFIG_FILTER
+O_OBJS += filter.o
+endif
+
+ifdef CONFIG_NET
+
+O_OBJS += dev.o dev_mcast.o dst.o neighbour.o rtnetlink.o utils.o
+
+ifdef CONFIG_FIREWALL
+OX_OBJS += firewall.o
+endif
+
+endif
+
+ifdef CONFIG_NET_PROFILE
+OX_OBJS += profile.o
+endif
+
+include $(TOPDIR)/Rules.make
+
+tar:
+ tar -cvf /dev/f1 .
diff --git a/pfinet/linux-src/net/core/datagram.c b/pfinet/linux-src/net/core/datagram.c
new file mode 100644
index 00000000..9bb68fa4
--- /dev/null
+++ b/pfinet/linux-src/net/core/datagram.c
@@ -0,0 +1,249 @@
+/*
+ * SUCS NET3:
+ *
+ * Generic datagram handling routines. These are generic for all protocols. Possibly a generic IP version on top
+ * of these would make sense. Not tonight however 8-).
+ * This is used because UDP, RAW, PACKET, DDP, IPX, AX.25 and NetROM layer all have identical poll code and mostly
+ * identical recvmsg() code. So we share it here. The poll was shared before but buried in udp.c so I moved it.
+ *
+ * Authors: Alan Cox <alan@redhat.com>. (datagram_poll() from old udp.c code)
+ *
+ * Fixes:
+ * Alan Cox : NULL return from skb_peek_copy() understood
+ * Alan Cox : Rewrote skb_read_datagram to avoid the skb_peek_copy stuff.
+ * Alan Cox : Added support for SOCK_SEQPACKET. IPX can no longer use the SO_TYPE hack but
+ * AX.25 now works right, and SPX is feasible.
+ * Alan Cox : Fixed write poll of non IP protocol crash.
+ * Florian La Roche: Changed for my new skbuff handling.
+ * Darryl Miles : Fixed non-blocking SOCK_SEQPACKET.
+ * Linus Torvalds : BSD semantic fixes.
+ * Alan Cox : Datagram iovec handling
+ * Darryl Miles : Fixed non-blocking SOCK_STREAM.
+ * Alan Cox : POSIXisms
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/poll.h>
+
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/route.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+
+
+/*
+ * Wait for a packet..
+ *
+ * Interrupts off so that no packet arrives before we begin sleeping.
+ * Otherwise we might miss our wake up
+ */
+
+static inline void wait_for_packet(struct sock * sk)
+{
+ struct wait_queue wait = { current, NULL };
+
+ add_wait_queue(sk->sleep, &wait);
+ current->state = TASK_INTERRUPTIBLE;
+
+ if (skb_peek(&sk->receive_queue) == NULL)
+ schedule();
+
+ current->state = TASK_RUNNING;
+ remove_wait_queue(sk->sleep, &wait);
+}
+
+/*
+ * Is a socket 'connection oriented' ?
+ */
+
+static inline int connection_based(struct sock *sk)
+{
+ return (sk->type==SOCK_SEQPACKET || sk->type==SOCK_STREAM);
+}
+
+/*
+ * Get a datagram skbuff, understands the peeking, nonblocking wakeups and possible
+ * races. This replaces identical code in packet,raw and udp, as well as the IPX
+ * AX.25 and Appletalk. It also finally fixes the long standing peek and read
+ * race for datagram sockets. If you alter this routine remember it must be
+ * re-entrant.
+ *
+ * This function will lock the socket if a skb is returned, so the caller
+ * needs to unlock the socket in that case (usually by calling skb_free_datagram)
+ *
+ * * It does not lock socket since today. This function is
+ * * free of race conditions. This measure should/can improve
+ * * significantly datagram socket latencies at high loads,
+ * * when data copying to user space takes lots of time.
+ * * (BTW I've just killed the last cli() in IP/IPv6/core/netlink/packet
+ * * 8) Great win.)
+ * * --ANK (980729)
+ *
+ * The order of the tests when we find no data waiting are specified
+ * quite explicitly by POSIX 1003.1g, don't change them without having
+ * the standard around please.
+ */
+
+struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock, int *err)
+{
+ int error;
+ struct sk_buff *skb;
+
+ /* Caller is allowed not to check sk->err before skb_recv_datagram() */
+ error = sock_error(sk);
+ if (error)
+ goto no_packet;
+
+restart:
+ while(skb_queue_empty(&sk->receive_queue)) /* No data */
+ {
+ /* Socket errors? */
+ error = sock_error(sk);
+ if (error)
+ goto no_packet;
+
+ /* Socket shut down? */
+ if (sk->shutdown & RCV_SHUTDOWN)
+ goto no_packet;
+
+ /* Sequenced packets can come disconnected. If so we report the problem */
+ error = -ENOTCONN;
+ if(connection_based(sk) && sk->state!=TCP_ESTABLISHED)
+ goto no_packet;
+
+ /* handle signals */
+ error = -ERESTARTSYS;
+ if (signal_pending(current))
+ goto no_packet;
+
+ /* User doesn't want to wait */
+ error = -EAGAIN;
+ if (noblock)
+ goto no_packet;
+
+ wait_for_packet(sk);
+ }
+
+ /* Again only user level code calls this function, so nothing interrupt level
+ will suddenly eat the receive_queue */
+ if (flags & MSG_PEEK)
+ {
+ unsigned long cpu_flags;
+
+ /* It is the only POTENTIAL race condition
+ in this function. skb may be stolen by
+ another receiver after peek, but before
+ incrementing use count, provided kernel
+ is reentearble (it is not) or this function
+ is called by interrupts.
+
+ Protect it with global skb spinlock,
+ though for now even this is overkill.
+ --ANK (980728)
+ */
+ spin_lock_irqsave(&skb_queue_lock, cpu_flags);
+ skb = skb_peek(&sk->receive_queue);
+ if(skb!=NULL)
+ atomic_inc(&skb->users);
+ spin_unlock_irqrestore(&skb_queue_lock, cpu_flags);
+ } else
+ skb = skb_dequeue(&sk->receive_queue);
+
+ if (!skb) /* Avoid race if someone beats us to the data */
+ goto restart;
+ return skb;
+
+no_packet:
+ *err = error;
+ return NULL;
+}
+
+void skb_free_datagram(struct sock * sk, struct sk_buff *skb)
+{
+ kfree_skb(skb);
+}
+
+/*
+ * Copy a datagram to a linear buffer.
+ */
+
+int skb_copy_datagram(struct sk_buff *skb, int offset, char *to, int size)
+{
+ int err = -EFAULT;
+
+ if (!copy_to_user(to, skb->h.raw + offset, size))
+ err = 0;
+ return err;
+}
+
+
+/*
+ * Copy a datagram to an iovec.
+ * Note: the iovec is modified during the copy.
+ */
+
+int skb_copy_datagram_iovec(struct sk_buff *skb, int offset, struct iovec *to,
+ int size)
+{
+ return memcpy_toiovec(to, skb->h.raw + offset, size);
+}
+
+/*
+ * Datagram poll: Again totally generic. This also handles
+ * sequenced packet sockets providing the socket receive queue
+ * is only ever holding data ready to receive.
+ *
+ * Note: when you _don't_ use this routine for this protocol,
+ * and you use a different write policy from sock_writeable()
+ * then please supply your own write_space callback.
+ */
+
+unsigned int datagram_poll(struct file * file, struct socket *sock, poll_table *wait)
+{
+ struct sock *sk = sock->sk;
+ unsigned int mask;
+
+ poll_wait(file, sk->sleep, wait);
+ mask = 0;
+
+ /* exceptional events? */
+ if (sk->err || !skb_queue_empty(&sk->error_queue))
+ mask |= POLLERR;
+ if (sk->shutdown & RCV_SHUTDOWN)
+ mask |= POLLHUP;
+
+ /* readable? */
+ if (!skb_queue_empty(&sk->receive_queue))
+ mask |= POLLIN | POLLRDNORM;
+
+ /* Connection-based need to check for termination and startup */
+ if (connection_based(sk)) {
+ if (sk->state==TCP_CLOSE)
+ mask |= POLLHUP;
+ /* connection hasn't started yet? */
+ if (sk->state == TCP_SYN_SENT)
+ return mask;
+ }
+
+ /* writable? */
+ if (sock_writeable(sk))
+ mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+ else
+ sk->socket->flags |= SO_NOSPACE;
+
+ return mask;
+}
diff --git a/pfinet/linux-src/net/core/dev.c b/pfinet/linux-src/net/core/dev.c
new file mode 100644
index 00000000..cc9584a1
--- /dev/null
+++ b/pfinet/linux-src/net/core/dev.c
@@ -0,0 +1,2026 @@
+/*
+ * NET3 Protocol independent device support routines.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Derived from the non IP parts of dev.c 1.0.19
+ * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
+ * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ * Mark Evans, <evansmp@uhura.aston.ac.uk>
+ *
+ * Additional Authors:
+ * Florian la Roche <rzsfl@rz.uni-sb.de>
+ * Alan Cox <gw4pts@gw4pts.ampr.org>
+ * David Hinds <dhinds@allegro.stanford.edu>
+ * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
+ * Adam Sulmicki <adam@cfar.umd.edu>
+ *
+ * Changes:
+ * Marcelo Tosatti <marcelo@conectiva.com.br> : dont accept mtu 0 or <
+ * Alan Cox : device private ioctl copies fields back.
+ * Alan Cox : Transmit queue code does relevant stunts to
+ * keep the queue safe.
+ * Alan Cox : Fixed double lock.
+ * Alan Cox : Fixed promisc NULL pointer trap
+ * ???????? : Support the full private ioctl range
+ * Alan Cox : Moved ioctl permission check into drivers
+ * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI
+ * Alan Cox : 100 backlog just doesn't cut it when
+ * you start doing multicast video 8)
+ * Alan Cox : Rewrote net_bh and list manager.
+ * Alan Cox : Fix ETH_P_ALL echoback lengths.
+ * Alan Cox : Took out transmit every packet pass
+ * Saved a few bytes in the ioctl handler
+ * Alan Cox : Network driver sets packet type before calling netif_rx. Saves
+ * a function call a packet.
+ * Alan Cox : Hashed net_bh()
+ * Richard Kooijman: Timestamp fixes.
+ * Alan Cox : Wrong field in SIOCGIFDSTADDR
+ * Alan Cox : Device lock protection.
+ * Alan Cox : Fixed nasty side effect of device close changes.
+ * Rudi Cilibrasi : Pass the right thing to set_mac_address()
+ * Dave Miller : 32bit quantity for the device lock to make it work out
+ * on a Sparc.
+ * Bjorn Ekwall : Added KERNELD hack.
+ * Alan Cox : Cleaned up the backlog initialise.
+ * Craig Metz : SIOCGIFCONF fix if space for under
+ * 1 device.
+ * Thomas Bogendoerfer : Return ENODEV for dev_open, if there
+ * is no device open function.
+ * Andi Kleen : Fix error reporting for SIOCGIFCONF
+ * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF
+ * Cyrus Durgin : Cleaned for KMOD
+ * Adam Sulmicki : Bug Fix : Network Device Unload
+ * A network device unload needs to purge
+ * the backlog queue.
+ * Paul Rusty Russel : SIOCSIFNAME
+ */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/notifier.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <linux/rtnetlink.h>
+#include <net/slhc.h>
+#include <linux/proc_fs.h>
+#include <linux/stat.h>
+#include <net/br.h>
+#include <net/dst.h>
+#include <net/pkt_sched.h>
+#include <net/profile.h>
+#include <linux/init.h>
+#include <linux/kmod.h>
+#ifdef CONFIG_NET_RADIO
+#include <linux/wireless.h>
+#endif /* CONFIG_NET_RADIO */
+#ifdef CONFIG_PLIP
+extern int plip_init(void);
+#endif
+
+NET_PROFILE_DEFINE(dev_queue_xmit)
+NET_PROFILE_DEFINE(net_bh)
+NET_PROFILE_DEFINE(net_bh_skb)
+
+
+const char *if_port_text[] = {
+ "unknown",
+ "BNC",
+ "10baseT",
+ "AUI",
+ "100baseT",
+ "100baseTX",
+ "100baseFX"
+};
+
+/*
+ * The list of packet types we will receive (as opposed to discard)
+ * and the routines to invoke.
+ *
+ * Why 16. Because with 16 the only overlap we get on a hash of the
+ * low nibble of the protocol value is RARP/SNAP/X.25.
+ *
+ * 0800 IP
+ * 0001 802.3
+ * 0002 AX.25
+ * 0004 802.2
+ * 8035 RARP
+ * 0005 SNAP
+ * 0805 X.25
+ * 0806 ARP
+ * 8137 IPX
+ * 0009 Localtalk
+ * 86DD IPv6
+ */
+
+struct packet_type *ptype_base[16]; /* 16 way hashed list */
+struct packet_type *ptype_all = NULL; /* Taps */
+
+/*
+ * Device list lock. Setting it provides that interface
+ * will not disappear unexpectedly while kernel sleeps.
+ */
+
+atomic_t dev_lockct = ATOMIC_INIT(0);
+
+/*
+ * Our notifier list
+ */
+
+static struct notifier_block *netdev_chain=NULL;
+
+/*
+ * Device drivers call our routines to queue packets here. We empty the
+ * queue in the bottom half handler.
+ */
+
+static struct sk_buff_head backlog;
+
+#ifdef CONFIG_NET_FASTROUTE
+int netdev_fastroute;
+int netdev_fastroute_obstacles;
+struct net_fastroute_stats dev_fastroute_stat;
+#endif
+
+static void dev_clear_backlog(struct device *dev);
+
+
+/******************************************************************************************
+
+ Protocol management and registration routines
+
+*******************************************************************************************/
+
+/*
+ * For efficiency
+ */
+
+int netdev_nit=0;
+
+/*
+ * Add a protocol ID to the list. Now that the input handler is
+ * smarter we can dispense with all the messy stuff that used to be
+ * here.
+ *
+ * BEWARE!!! Protocol handlers, mangling input packets,
+ * MUST BE last in hash buckets and checking protocol handlers
+ * MUST start from promiscous ptype_all chain in net_bh.
+ * It is true now, do not change it.
+ * Explantion follows: if protocol handler, mangling packet, will
+ * be the first on list, it is not able to sense, that packet
+ * is cloned and should be copied-on-write, so that it will
+ * change it and subsequent readers will get broken packet.
+ * --ANK (980803)
+ */
+
+void dev_add_pack(struct packet_type *pt)
+{
+ int hash;
+#ifdef CONFIG_NET_FASTROUTE
+ /* Hack to detect packet socket */
+ if (pt->data) {
+ netdev_fastroute_obstacles++;
+ dev_clear_fastroute(pt->dev);
+ }
+#endif
+ if(pt->type==htons(ETH_P_ALL))
+ {
+ netdev_nit++;
+ pt->next=ptype_all;
+ ptype_all=pt;
+ }
+ else
+ {
+ hash=ntohs(pt->type)&15;
+ pt->next = ptype_base[hash];
+ ptype_base[hash] = pt;
+ }
+}
+
+
+/*
+ * Remove a protocol ID from the list.
+ */
+
+void dev_remove_pack(struct packet_type *pt)
+{
+ struct packet_type **pt1;
+ if(pt->type==htons(ETH_P_ALL))
+ {
+ netdev_nit--;
+ pt1=&ptype_all;
+ }
+ else
+ pt1=&ptype_base[ntohs(pt->type)&15];
+ for(; (*pt1)!=NULL; pt1=&((*pt1)->next))
+ {
+ if(pt==(*pt1))
+ {
+ *pt1=pt->next;
+ synchronize_bh();
+#ifdef CONFIG_NET_FASTROUTE
+ if (pt->data)
+ netdev_fastroute_obstacles--;
+#endif
+ return;
+ }
+ }
+ printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
+}
+
+/*****************************************************************************************
+
+ Device Interface Subroutines
+
+******************************************************************************************/
+
+/*
+ * Find an interface by name.
+ */
+
+struct device *dev_get(const char *name)
+{
+ struct device *dev;
+
+ for (dev = dev_base; dev != NULL; dev = dev->next)
+ {
+ if (strcmp(dev->name, name) == 0)
+ return(dev);
+ }
+ return NULL;
+}
+
+struct device * dev_get_by_index(int ifindex)
+{
+ struct device *dev;
+
+ for (dev = dev_base; dev != NULL; dev = dev->next)
+ {
+ if (dev->ifindex == ifindex)
+ return(dev);
+ }
+ return NULL;
+}
+
+struct device *dev_getbyhwaddr(unsigned short type, char *ha)
+{
+ struct device *dev;
+
+ for (dev = dev_base; dev != NULL; dev = dev->next)
+ {
+ if (dev->type == type &&
+ memcmp(dev->dev_addr, ha, dev->addr_len) == 0)
+ return(dev);
+ }
+ return(NULL);
+}
+
+/*
+ * Passed a format string - eg "lt%d" it will try and find a suitable
+ * id. Not efficient for many devices, not called a lot..
+ */
+
+int dev_alloc_name(struct device *dev, const char *name)
+{
+ int i;
+ /*
+ * If you need over 100 please also fix the algorithm...
+ */
+ for(i=0;i<100;i++)
+ {
+ sprintf(dev->name,name,i);
+ if(dev_get(dev->name)==NULL)
+ return i;
+ }
+ return -ENFILE; /* Over 100 of the things .. bail out! */
+}
+
+struct device *dev_alloc(const char *name, int *err)
+{
+ struct device *dev=kmalloc(sizeof(struct device)+16, GFP_KERNEL);
+ if(dev==NULL)
+ {
+ *err=-ENOBUFS;
+ return NULL;
+ }
+ dev->name=(char *)(dev+1); /* Name string space */
+ *err=dev_alloc_name(dev,name);
+ if(*err<0)
+ {
+ kfree(dev);
+ return NULL;
+ }
+ return dev;
+}
+
+void netdev_state_change(struct device *dev)
+{
+ if (dev->flags&IFF_UP)
+ notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
+}
+
+
+/*
+ * Find and possibly load an interface.
+ */
+
+#ifdef CONFIG_KMOD
+
+void dev_load(const char *name)
+{
+ if(!dev_get(name) && capable(CAP_SYS_MODULE))
+ request_module(name);
+}
+
+#else
+
+extern inline void dev_load(const char *unused){;}
+
+#endif
+
+static int default_rebuild_header(struct sk_buff *skb)
+{
+ printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n", skb->dev ? skb->dev->name : "NULL!!!");
+ kfree_skb(skb);
+ return 1;
+}
+
+/*
+ * Prepare an interface for use.
+ */
+
+int dev_open(struct device *dev)
+{
+ int ret = 0;
+
+ /*
+ * Is it already up?
+ */
+
+ if (dev->flags&IFF_UP)
+ return 0;
+
+ /*
+ * Call device private open method
+ */
+
+ if (dev->open)
+ ret = dev->open(dev);
+
+ /*
+ * If it went open OK then:
+ */
+
+ if (ret == 0)
+ {
+ /*
+ * nil rebuild_header routine,
+ * that should be never called and used as just bug trap.
+ */
+
+ if (dev->rebuild_header == NULL)
+ dev->rebuild_header = default_rebuild_header;
+
+ /*
+ * Set the flags.
+ */
+ dev->flags |= (IFF_UP | IFF_RUNNING);
+
+ /*
+ * Initialize multicasting status
+ */
+ dev_mc_upload(dev);
+
+ /*
+ * Wakeup transmit queue engine
+ */
+ dev_activate(dev);
+
+ /*
+ * ... and announce new interface.
+ */
+ notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
+
+ }
+ return(ret);
+}
+
+#ifdef CONFIG_NET_FASTROUTE
+
+static __inline__ void dev_do_clear_fastroute(struct device *dev)
+{
+ if (dev->accept_fastpath) {
+ int i;
+
+ for (i=0; i<=NETDEV_FASTROUTE_HMASK; i++)
+ dst_release_irqwait(xchg(dev->fastpath+i, NULL));
+ }
+}
+
+void dev_clear_fastroute(struct device *dev)
+{
+ if (dev) {
+ dev_do_clear_fastroute(dev);
+ } else {
+ for (dev = dev_base; dev; dev = dev->next)
+ dev_do_clear_fastroute(dev);
+ }
+}
+#endif
+
+/*
+ * Completely shutdown an interface.
+ */
+
+int dev_close(struct device *dev)
+{
+ if (!(dev->flags&IFF_UP))
+ return 0;
+
+ dev_deactivate(dev);
+
+ dev_lock_wait();
+
+ /*
+ * Call the device specific close. This cannot fail.
+ * Only if device is UP
+ */
+
+ if (dev->stop)
+ dev->stop(dev);
+
+ if (dev->start)
+ printk("dev_close: bug %s still running\n", dev->name);
+
+ /*
+ * Device is now down.
+ */
+ dev_clear_backlog(dev);
+
+ dev->flags&=~(IFF_UP|IFF_RUNNING);
+#ifdef CONFIG_NET_FASTROUTE
+ dev_clear_fastroute(dev);
+#endif
+
+ /*
+ * Tell people we are going down
+ */
+ notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
+
+ return(0);
+}
+
+
+/*
+ * Device change register/unregister. These are not inline or static
+ * as we export them to the world.
+ */
+
+int register_netdevice_notifier(struct notifier_block *nb)
+{
+ return notifier_chain_register(&netdev_chain, nb);
+}
+
+int unregister_netdevice_notifier(struct notifier_block *nb)
+{
+ return notifier_chain_unregister(&netdev_chain,nb);
+}
+
+/*
+ * Support routine. Sends outgoing frames to any network
+ * taps currently in use.
+ */
+
+void dev_queue_xmit_nit(struct sk_buff *skb, struct device *dev)
+{
+ struct packet_type *ptype;
+ get_fast_time(&skb->stamp);
+
+ for (ptype = ptype_all; ptype!=NULL; ptype = ptype->next)
+ {
+ /* Never send packets back to the socket
+ * they originated from - MvS (miquels@drinkel.ow.org)
+ */
+ if ((ptype->dev == dev || !ptype->dev) &&
+ ((struct sock *)ptype->data != skb->sk))
+ {
+ struct sk_buff *skb2;
+ if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL)
+ break;
+
+ /* Code, following below is wrong.
+
+ The only reason, why it does work is that
+ ONLY packet sockets receive outgoing
+ packets. If such a packet will be (occasionally)
+ received by normal packet handler, which expects
+ that mac header is pulled...
+ */
+
+ /* More sensible variant. skb->nh should be correctly
+ set by sender, so that the second statement is
+ just protection against buggy protocols.
+ */
+ skb2->mac.raw = skb2->data;
+
+ if (skb2->nh.raw < skb2->data || skb2->nh.raw >= skb2->tail) {
+ if (net_ratelimit())
+ printk(KERN_DEBUG "protocol %04x is buggy, dev %s\n", skb2->protocol, dev->name);
+ skb2->nh.raw = skb2->data;
+ if (dev->hard_header)
+ skb2->nh.raw += dev->hard_header_len;
+ }
+
+ skb2->h.raw = skb2->nh.raw;
+ skb2->pkt_type = PACKET_OUTGOING;
+ ptype->func(skb2, skb->dev, ptype);
+ }
+ }
+}
+
+/*
+ * Fast path for loopback frames.
+ */
+
+void dev_loopback_xmit(struct sk_buff *skb)
+{
+ struct sk_buff *newskb=skb_clone(skb, GFP_ATOMIC);
+ if (newskb==NULL)
+ return;
+
+ newskb->mac.raw = newskb->data;
+ skb_pull(newskb, newskb->nh.raw - newskb->data);
+ newskb->pkt_type = PACKET_LOOPBACK;
+ newskb->ip_summed = CHECKSUM_UNNECESSARY;
+ if (newskb->dst==NULL)
+ printk(KERN_DEBUG "BUG: packet without dst looped back 1\n");
+ netif_rx(newskb);
+}
+
+int dev_queue_xmit(struct sk_buff *skb)
+{
+ struct device *dev = skb->dev;
+ struct Qdisc *q;
+
+#ifdef CONFIG_NET_PROFILE
+ start_bh_atomic();
+ NET_PROFILE_ENTER(dev_queue_xmit);
+#endif
+
+ start_bh_atomic();
+ q = dev->qdisc;
+ if (q->enqueue) {
+ q->enqueue(skb, q);
+ qdisc_wakeup(dev);
+ end_bh_atomic();
+
+#ifdef CONFIG_NET_PROFILE
+ NET_PROFILE_LEAVE(dev_queue_xmit);
+ end_bh_atomic();
+#endif
+
+ return 0;
+ }
+
+ /* The device has no queue. Common case for software devices:
+ loopback, all the sorts of tunnels...
+
+ Really, it is unlikely that bh protection is necessary here:
+ virtual devices do not generate EOI events.
+ However, it is possible, that they rely on bh protection
+ made by us here.
+ */
+ if (dev->flags&IFF_UP) {
+ if (netdev_nit)
+ dev_queue_xmit_nit(skb,dev);
+ if (dev->hard_start_xmit(skb, dev) == 0) {
+ end_bh_atomic();
+
+#ifdef CONFIG_NET_PROFILE
+ NET_PROFILE_LEAVE(dev_queue_xmit);
+ end_bh_atomic();
+#endif
+
+ return 0;
+ }
+ if (net_ratelimit())
+ printk(KERN_DEBUG "Virtual device %s asks to queue packet!\n", dev->name);
+ }
+ end_bh_atomic();
+
+ kfree_skb(skb);
+
+#ifdef CONFIG_NET_PROFILE
+ NET_PROFILE_LEAVE(dev_queue_xmit);
+ end_bh_atomic();
+#endif
+
+ return 0;
+}
+
+
+/*=======================================================================
+ Receiver rotutines
+ =======================================================================*/
+
+int netdev_dropping = 0;
+int netdev_max_backlog = 300;
+atomic_t netdev_rx_dropped;
+#ifdef CONFIG_CPU_IS_SLOW
+int net_cpu_congestion;
+#endif
+
+#ifdef CONFIG_NET_HW_FLOWCONTROL
+int netdev_throttle_events;
+static unsigned long netdev_fc_mask = 1;
+unsigned long netdev_fc_xoff = 0;
+
+static struct
+{
+ void (*stimul)(struct device *);
+ struct device *dev;
+} netdev_fc_slots[32];
+
+int netdev_register_fc(struct device *dev, void (*stimul)(struct device *dev))
+{
+ int bit = 0;
+ unsigned long flags;
+
+ save_flags(flags);
+ cli();
+ if (netdev_fc_mask != ~0UL) {
+ bit = ffz(netdev_fc_mask);
+ netdev_fc_slots[bit].stimul = stimul;
+ netdev_fc_slots[bit].dev = dev;
+ set_bit(bit, &netdev_fc_mask);
+ clear_bit(bit, &netdev_fc_xoff);
+ }
+ restore_flags(flags);
+ return bit;
+}
+
+void netdev_unregister_fc(int bit)
+{
+ unsigned long flags;
+
+ save_flags(flags);
+ cli();
+ if (bit > 0) {
+ netdev_fc_slots[bit].stimul = NULL;
+ netdev_fc_slots[bit].dev = NULL;
+ clear_bit(bit, &netdev_fc_mask);
+ clear_bit(bit, &netdev_fc_xoff);
+ }
+ restore_flags(flags);
+}
+
+static void netdev_wakeup(void)
+{
+ unsigned long xoff;
+
+ cli();
+ xoff = netdev_fc_xoff;
+ netdev_fc_xoff = 0;
+ netdev_dropping = 0;
+ netdev_throttle_events++;
+ while (xoff) {
+ int i = ffz(~xoff);
+ xoff &= ~(1<<i);
+ netdev_fc_slots[i].stimul(netdev_fc_slots[i].dev);
+ }
+ sti();
+}
+#endif
+
+static void dev_clear_backlog(struct device *dev)
+{
+ struct sk_buff *prev, *curr;
+
+ /*
+ *
+ * Let now clear backlog queue. -AS
+ *
+ * We are competing here both with netif_rx() and net_bh().
+ * We don't want either of those to mess with skb ptrs
+ * while we work on them, thus cli()/sti().
+ *
+ * It looks better to use net_bh trick, at least
+ * to be sure, that we keep interrupt latency really low. --ANK (980727)
+ */
+
+ if (backlog.qlen) {
+ start_bh_atomic();
+ curr = backlog.next;
+ while ( curr != (struct sk_buff *)(&backlog) ) {
+ unsigned long flags;
+ curr=curr->next;
+ if ( curr->prev->dev == dev ) {
+ prev = curr->prev;
+ spin_lock_irqsave(&skb_queue_lock, flags);
+ __skb_unlink(prev, &backlog);
+ spin_unlock_irqrestore(&skb_queue_lock, flags);
+ kfree_skb(prev);
+ }
+ }
+ end_bh_atomic();
+#ifdef CONFIG_NET_HW_FLOWCONTROL
+ if (netdev_dropping)
+ netdev_wakeup();
+#else
+ netdev_dropping = 0;
+#endif
+ }
+}
+
+/*
+ * Receive a packet from a device driver and queue it for the upper
+ * (protocol) levels. It always succeeds.
+ */
+
+void netif_rx(struct sk_buff *skb)
+{
+#ifndef CONFIG_CPU_IS_SLOW
+ if(skb->stamp.tv_sec==0)
+ get_fast_time(&skb->stamp);
+#else
+ skb->stamp = xtime;
+#endif
+
+ /* The code is rearranged so that the path is the most
+ short when CPU is congested, but is still operating.
+ */
+
+ if (backlog.qlen <= netdev_max_backlog) {
+ if (backlog.qlen) {
+ if (netdev_dropping == 0) {
+ skb_queue_tail(&backlog,skb);
+ mark_bh(NET_BH);
+ return;
+ }
+ atomic_inc(&netdev_rx_dropped);
+ kfree_skb(skb);
+ return;
+ }
+#ifdef CONFIG_NET_HW_FLOWCONTROL
+ if (netdev_dropping)
+ netdev_wakeup();
+#else
+ netdev_dropping = 0;
+#endif
+ skb_queue_tail(&backlog,skb);
+ mark_bh(NET_BH);
+ return;
+ }
+ netdev_dropping = 1;
+ atomic_inc(&netdev_rx_dropped);
+ kfree_skb(skb);
+}
+
+#ifdef CONFIG_BRIDGE
+static inline void handle_bridge(struct sk_buff *skb, unsigned short type)
+{
+ if (br_stats.flags & BR_UP && br_protocol_ok(ntohs(type)))
+ {
+ /*
+ * We pass the bridge a complete frame. This means
+ * recovering the MAC header first.
+ */
+
+ int offset;
+
+ skb=skb_clone(skb, GFP_ATOMIC);
+ if(skb==NULL)
+ return;
+
+ offset=skb->data-skb->mac.raw;
+ skb_push(skb,offset); /* Put header back on for bridge */
+
+ if(br_receive_frame(skb))
+ return;
+ kfree_skb(skb);
+ }
+ return;
+}
+#endif
+
+
+/*
+ * When we are called the queue is ready to grab, the interrupts are
+ * on and hardware can interrupt and queue to the receive queue as we
+ * run with no problems.
+ * This is run as a bottom half after an interrupt handler that does
+ * mark_bh(NET_BH);
+ */
+
+void net_bh(void)
+{
+ struct packet_type *ptype;
+ struct packet_type *pt_prev;
+ unsigned short type;
+ unsigned long start_time = jiffies;
+#ifdef CONFIG_CPU_IS_SLOW
+ static unsigned long start_busy = 0;
+ static unsigned long ave_busy = 0;
+
+ if (start_busy == 0)
+ start_busy = start_time;
+ net_cpu_congestion = ave_busy>>8;
+#endif
+
+ NET_PROFILE_ENTER(net_bh);
+ /*
+ * Can we send anything now? We want to clear the
+ * decks for any more sends that get done as we
+ * process the input. This also minimises the
+ * latency on a transmit interrupt bh.
+ */
+
+ if (qdisc_head.forw != &qdisc_head)
+ qdisc_run_queues();
+
+ /*
+ * Any data left to process. This may occur because a
+ * mark_bh() is done after we empty the queue including
+ * that from the device which does a mark_bh() just after
+ */
+
+ /*
+ * While the queue is not empty..
+ *
+ * Note that the queue never shrinks due to
+ * an interrupt, so we can do this test without
+ * disabling interrupts.
+ */
+
+ while (!skb_queue_empty(&backlog))
+ {
+ struct sk_buff * skb;
+
+ /* Give chance to other bottom halves to run */
+ if (jiffies - start_time > 1)
+ goto net_bh_break;
+
+ /*
+ * We have a packet. Therefore the queue has shrunk
+ */
+ skb = skb_dequeue(&backlog);
+
+#ifdef CONFIG_CPU_IS_SLOW
+ if (ave_busy > 128*16) {
+ kfree_skb(skb);
+ while ((skb = skb_dequeue(&backlog)) != NULL)
+ kfree_skb(skb);
+ break;
+ }
+#endif
+
+
+#if 0
+ NET_PROFILE_SKB_PASSED(skb, net_bh_skb);
+#endif
+#ifdef CONFIG_NET_FASTROUTE
+ if (skb->pkt_type == PACKET_FASTROUTE) {
+ dev_queue_xmit(skb);
+ continue;
+ }
+#endif
+
+ /*
+ * Bump the pointer to the next structure.
+ *
+ * On entry to the protocol layer. skb->data and
+ * skb->nh.raw point to the MAC and encapsulated data
+ */
+
+ /* XXX until we figure out every place to modify.. */
+ skb->h.raw = skb->nh.raw = skb->data;
+
+ if (skb->mac.raw < skb->head || skb->mac.raw > skb->data) {
+ printk(KERN_CRIT "%s: wrong mac.raw ptr, proto=%04x\n", skb->dev->name, skb->protocol);
+ kfree_skb(skb);
+ continue;
+ }
+
+ /*
+ * Fetch the packet protocol ID.
+ */
+
+ type = skb->protocol;
+
+#ifdef CONFIG_BRIDGE
+ /*
+ * If we are bridging then pass the frame up to the
+ * bridging code (if this protocol is to be bridged).
+ * If it is bridged then move on
+ */
+ handle_bridge(skb, type);
+#endif
+
+ /*
+ * We got a packet ID. Now loop over the "known protocols"
+ * list. There are two lists. The ptype_all list of taps (normally empty)
+ * and the main protocol list which is hashed perfectly for normal protocols.
+ */
+
+ pt_prev = NULL;
+ for (ptype = ptype_all; ptype!=NULL; ptype=ptype->next)
+ {
+ if (!ptype->dev || ptype->dev == skb->dev) {
+ if(pt_prev)
+ {
+ struct sk_buff *skb2=skb_clone(skb, GFP_ATOMIC);
+ if(skb2)
+ pt_prev->func(skb2,skb->dev, pt_prev);
+ }
+ pt_prev=ptype;
+ }
+ }
+
+ for (ptype = ptype_base[ntohs(type)&15]; ptype != NULL; ptype = ptype->next)
+ {
+ if (ptype->type == type && (!ptype->dev || ptype->dev==skb->dev))
+ {
+ /*
+ * We already have a match queued. Deliver
+ * to it and then remember the new match
+ */
+ if(pt_prev)
+ {
+ struct sk_buff *skb2;
+
+ skb2=skb_clone(skb, GFP_ATOMIC);
+
+ /*
+ * Kick the protocol handler. This should be fast
+ * and efficient code.
+ */
+
+ if(skb2)
+ pt_prev->func(skb2, skb->dev, pt_prev);
+ }
+ /* Remember the current last to do */
+ pt_prev=ptype;
+ }
+ } /* End of protocol list loop */
+
+ /*
+ * Is there a last item to send to ?
+ */
+
+ if(pt_prev)
+ pt_prev->func(skb, skb->dev, pt_prev);
+ /*
+ * Has an unknown packet has been received ?
+ */
+
+ else {
+ kfree_skb(skb);
+ }
+ } /* End of queue loop */
+
+ /*
+ * We have emptied the queue
+ */
+
+ /*
+ * One last output flush.
+ */
+
+ if (qdisc_head.forw != &qdisc_head)
+ qdisc_run_queues();
+
+#ifdef CONFIG_CPU_IS_SLOW
+ if (1) {
+ unsigned long start_idle = jiffies;
+ ave_busy += ((start_idle - start_busy)<<3) - (ave_busy>>4);
+ start_busy = 0;
+ }
+#endif
+#ifdef CONFIG_NET_HW_FLOWCONTROL
+ if (netdev_dropping)
+ netdev_wakeup();
+#else
+ netdev_dropping = 0;
+#endif
+ NET_PROFILE_LEAVE(net_bh);
+ return;
+
+net_bh_break:
+ mark_bh(NET_BH);
+ NET_PROFILE_LEAVE(net_bh);
+ return;
+}
+
+/* Protocol dependent address dumping routines */
+
+static gifconf_func_t * gifconf_list [NPROTO];
+
+int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
+{
+ if (family>=NPROTO)
+ return -EINVAL;
+ gifconf_list[family] = gifconf;
+ return 0;
+}
+
+
+/*
+ * Map an interface index to its name (SIOCGIFNAME)
+ */
+
+/*
+ * This call is useful, but I'd remove it too.
+ *
+ * The reason is purely aestetical, it is the only call
+ * from SIOC* family using struct ifreq in reversed manner.
+ * Besides that, it is pretty silly to put "drawing" facility
+ * to kernel, it is useful only to print ifindices
+ * in readable form, is not it? --ANK
+ *
+ * We need this ioctl for efficient implementation of the
+ * if_indextoname() function required by the IPv6 API. Without
+ * it, we would have to search all the interfaces to find a
+ * match. --pb
+ */
+
+static int dev_ifname(struct ifreq *arg)
+{
+ struct device *dev;
+ struct ifreq ifr;
+ int err;
+
+ /*
+ * Fetch the caller's info block.
+ */
+
+ err = copy_from_user(&ifr, arg, sizeof(struct ifreq));
+ if (err)
+ return -EFAULT;
+
+ dev = dev_get_by_index(ifr.ifr_ifindex);
+ if (!dev)
+ return -ENODEV;
+
+ strcpy(ifr.ifr_name, dev->name);
+
+ err = copy_to_user(arg, &ifr, sizeof(struct ifreq));
+ return (err)?-EFAULT:0;
+}
+
+/*
+ * Perform a SIOCGIFCONF call. This structure will change
+ * size eventually, and there is nothing I can do about it.
+ * Thus we will need a 'compatibility mode'.
+ */
+
+static int dev_ifconf(char *arg)
+{
+ struct ifconf ifc;
+ struct device *dev;
+ char *pos;
+ int len;
+ int total;
+ int i;
+
+ /*
+ * Fetch the caller's info block.
+ */
+
+ if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
+ return -EFAULT;
+
+ pos = ifc.ifc_buf;
+ len = ifc.ifc_len;
+
+ /*
+ * Loop over the interfaces, and write an info block for each.
+ */
+
+ total = 0;
+ for (dev = dev_base; dev != NULL; dev = dev->next) {
+ for (i=0; i<NPROTO; i++) {
+ if (gifconf_list[i]) {
+ int done;
+ if (pos==NULL) {
+ done = gifconf_list[i](dev, NULL, 0);
+ } else {
+ done = gifconf_list[i](dev, pos+total, len-total);
+ }
+ if (done<0)
+ return -EFAULT;
+ total += done;
+ }
+ }
+ }
+
+ /*
+ * All done. Write the updated control block back to the caller.
+ */
+ ifc.ifc_len = total;
+
+ if (copy_to_user(arg, &ifc, sizeof(struct ifconf)))
+ return -EFAULT;
+
+ /*
+ * Both BSD and Solaris return 0 here, so we do too.
+ */
+ return 0;
+}
+
+/*
+ * This is invoked by the /proc filesystem handler to display a device
+ * in detail.
+ */
+
+#ifdef CONFIG_PROC_FS
+static int sprintf_stats(char *buffer, struct device *dev)
+{
+ struct net_device_stats *stats = (dev->get_stats ? dev->get_stats(dev): NULL);
+ int size;
+
+ if (stats)
+ size = sprintf(buffer, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu %8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
+ dev->name,
+ stats->rx_bytes,
+ stats->rx_packets, stats->rx_errors,
+ stats->rx_dropped + stats->rx_missed_errors,
+ stats->rx_fifo_errors,
+ stats->rx_length_errors + stats->rx_over_errors
+ + stats->rx_crc_errors + stats->rx_frame_errors,
+ stats->rx_compressed, stats->multicast,
+ stats->tx_bytes,
+ stats->tx_packets, stats->tx_errors, stats->tx_dropped,
+ stats->tx_fifo_errors, stats->collisions,
+ stats->tx_carrier_errors + stats->tx_aborted_errors
+ + stats->tx_window_errors + stats->tx_heartbeat_errors,
+ stats->tx_compressed);
+ else
+ size = sprintf(buffer, "%6s: No statistics available.\n", dev->name);
+
+ return size;
+}
+
+/*
+ * Called from the PROCfs module. This now uses the new arbitrary sized /proc/net interface
+ * to create /proc/net/dev
+ */
+
+int dev_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
+{
+ int len=0;
+ off_t begin=0;
+ off_t pos=0;
+ int size;
+
+ struct device *dev;
+
+
+ size = sprintf(buffer,
+ "Inter-| Receive | Transmit\n"
+ " face |bytes packets errs drop fifo frame compressed multicast|bytes packets errs drop fifo colls carrier compressed\n");
+
+ pos+=size;
+ len+=size;
+
+
+ for (dev = dev_base; dev != NULL; dev = dev->next)
+ {
+ size = sprintf_stats(buffer+len, dev);
+ len+=size;
+ pos=begin+len;
+
+ if(pos<offset)
+ {
+ len=0;
+ begin=pos;
+ }
+ if(pos>offset+length)
+ break;
+ }
+
+ *start=buffer+(offset-begin); /* Start of wanted data */
+ len-=(offset-begin); /* Start slop */
+ if(len>length)
+ len=length; /* Ending slop */
+ return len;
+}
+
+static int dev_proc_stats(char *buffer, char **start, off_t offset,
+ int length, int *eof, void *data)
+{
+ int len;
+
+ len = sprintf(buffer, "%08x %08x %08x %08x %08x\n",
+ atomic_read(&netdev_rx_dropped),
+#ifdef CONFIG_NET_HW_FLOWCONTROL
+ netdev_throttle_events,
+#else
+ 0,
+#endif
+#ifdef CONFIG_NET_FASTROUTE
+ dev_fastroute_stat.hits,
+ dev_fastroute_stat.succeed,
+ dev_fastroute_stat.deferred
+#else
+ 0, 0, 0
+#endif
+ );
+
+ len -= offset;
+
+ if (len > length)
+ len = length;
+ if(len < 0)
+ len = 0;
+
+ *start = buffer + offset;
+ *eof = 1;
+
+ return len;
+}
+
+#endif /* CONFIG_PROC_FS */
+
+
+#ifdef CONFIG_NET_RADIO
+#ifdef CONFIG_PROC_FS
+
+/*
+ * Print one entry of /proc/net/wireless
+ * This is a clone of /proc/net/dev (just above)
+ */
+static int sprintf_wireless_stats(char *buffer, struct device *dev)
+{
+ /* Get stats from the driver */
+ struct iw_statistics *stats = (dev->get_wireless_stats ?
+ dev->get_wireless_stats(dev) :
+ (struct iw_statistics *) NULL);
+ int size;
+
+ if(stats != (struct iw_statistics *) NULL)
+ size = sprintf(buffer,
+ "%6s: %02x %3d%c %3d%c %3d%c %5d %5d %5d\n",
+ dev->name,
+ stats->status,
+ stats->qual.qual,
+ stats->qual.updated & 1 ? '.' : ' ',
+ stats->qual.level,
+ stats->qual.updated & 2 ? '.' : ' ',
+ stats->qual.noise,
+ stats->qual.updated & 3 ? '.' : ' ',
+ stats->discard.nwid,
+ stats->discard.code,
+ stats->discard.misc);
+ else
+ size = 0;
+
+ return size;
+}
+
+/*
+ * Print info for /proc/net/wireless (print all entries)
+ * This is a clone of /proc/net/dev (just above)
+ */
+int dev_get_wireless_info(char * buffer, char **start, off_t offset,
+ int length, int dummy)
+{
+ int len = 0;
+ off_t begin = 0;
+ off_t pos = 0;
+ int size;
+
+ struct device * dev;
+
+ size = sprintf(buffer,
+ "Inter-|sta| Quality | Discarded packets\n"
+ " face |tus|link level noise| nwid crypt misc\n");
+
+ pos+=size;
+ len+=size;
+
+ for(dev = dev_base; dev != NULL; dev = dev->next)
+ {
+ size = sprintf_wireless_stats(buffer+len, dev);
+ len+=size;
+ pos=begin+len;
+
+ if(pos < offset)
+ {
+ len=0;
+ begin=pos;
+ }
+ if(pos > offset + length)
+ break;
+ }
+
+ *start = buffer + (offset - begin); /* Start of wanted data */
+ len -= (offset - begin); /* Start slop */
+ if(len > length)
+ len = length; /* Ending slop */
+
+ return len;
+}
+#endif /* CONFIG_PROC_FS */
+#endif /* CONFIG_NET_RADIO */
+
+void dev_set_promiscuity(struct device *dev, int inc)
+{
+ unsigned short old_flags = dev->flags;
+
+ dev->flags |= IFF_PROMISC;
+ if ((dev->promiscuity += inc) == 0)
+ dev->flags &= ~IFF_PROMISC;
+ if (dev->flags^old_flags) {
+#ifdef CONFIG_NET_FASTROUTE
+ if (dev->flags&IFF_PROMISC) {
+ netdev_fastroute_obstacles++;
+ dev_clear_fastroute(dev);
+ } else
+ netdev_fastroute_obstacles--;
+#endif
+ dev_mc_upload(dev);
+ printk(KERN_INFO "device %s %s promiscuous mode\n",
+ dev->name, (dev->flags&IFF_PROMISC) ? "entered" : "left");
+ }
+}
+
+void dev_set_allmulti(struct device *dev, int inc)
+{
+ unsigned short old_flags = dev->flags;
+
+ dev->flags |= IFF_ALLMULTI;
+ if ((dev->allmulti += inc) == 0)
+ dev->flags &= ~IFF_ALLMULTI;
+ if (dev->flags^old_flags)
+ dev_mc_upload(dev);
+}
+
+int dev_change_flags(struct device *dev, unsigned flags)
+{
+ int ret;
+ int old_flags = dev->flags;
+
+ /*
+ * Set the flags on our device.
+ */
+
+ dev->flags = (flags & (IFF_DEBUG|IFF_NOTRAILERS|IFF_RUNNING|IFF_NOARP|
+ IFF_SLAVE|IFF_MASTER|IFF_DYNAMIC|
+ IFF_MULTICAST|IFF_PORTSEL|IFF_AUTOMEDIA)) |
+ (dev->flags & (IFF_UP|IFF_VOLATILE|IFF_PROMISC|IFF_ALLMULTI));
+
+ /*
+ * Load in the correct multicast list now the flags have changed.
+ */
+
+ dev_mc_upload(dev);
+
+ /*
+ * Have we downed the interface. We handle IFF_UP ourselves
+ * according to user attempts to set it, rather than blindly
+ * setting it.
+ */
+
+ ret = 0;
+ if ((old_flags^flags)&IFF_UP) /* Bit is different ? */
+ {
+ ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
+
+ if (ret == 0)
+ dev_mc_upload(dev);
+ }
+
+ if (dev->flags&IFF_UP &&
+ ((old_flags^dev->flags)&~(IFF_UP|IFF_RUNNING|IFF_PROMISC|IFF_ALLMULTI|IFF_VOLATILE)))
+ notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
+
+ if ((flags^dev->gflags)&IFF_PROMISC) {
+ int inc = (flags&IFF_PROMISC) ? +1 : -1;
+ dev->gflags ^= IFF_PROMISC;
+ dev_set_promiscuity(dev, inc);
+ }
+
+ /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
+ is important. Some (broken) drivers set IFF_PROMISC, when
+ IFF_ALLMULTI is requested not asking us and not reporting.
+ */
+ if ((flags^dev->gflags)&IFF_ALLMULTI) {
+ int inc = (flags&IFF_ALLMULTI) ? +1 : -1;
+ dev->gflags ^= IFF_ALLMULTI;
+ dev_set_allmulti(dev, inc);
+ }
+
+ return ret;
+}
+
+/*
+ * Perform the SIOCxIFxxx calls.
+ */
+
+static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
+{
+ struct device *dev;
+ int err;
+
+ if ((dev = dev_get(ifr->ifr_name)) == NULL)
+ return -ENODEV;
+
+ switch(cmd)
+ {
+ case SIOCGIFFLAGS: /* Get interface flags */
+ ifr->ifr_flags = (dev->flags&~(IFF_PROMISC|IFF_ALLMULTI))
+ |(dev->gflags&(IFF_PROMISC|IFF_ALLMULTI));
+ return 0;
+
+ case SIOCSIFFLAGS: /* Set interface flags */
+ return dev_change_flags(dev, ifr->ifr_flags);
+
+ case SIOCGIFMETRIC: /* Get the metric on the interface (currently unused) */
+ ifr->ifr_metric = 0;
+ return 0;
+
+ case SIOCSIFMETRIC: /* Set the metric on the interface (currently unused) */
+ return -EOPNOTSUPP;
+
+ case SIOCGIFMTU: /* Get the MTU of a device */
+ ifr->ifr_mtu = dev->mtu;
+ return 0;
+
+ case SIOCSIFMTU: /* Set the MTU of a device */
+ if (ifr->ifr_mtu == dev->mtu)
+ return 0;
+
+ /*
+ * MTU must be positive.
+ */
+
+ if (ifr->ifr_mtu<=0)
+ return -EINVAL;
+
+ if (dev->change_mtu)
+ err = dev->change_mtu(dev, ifr->ifr_mtu);
+ else {
+ dev->mtu = ifr->ifr_mtu;
+ err = 0;
+ }
+ if (!err && dev->flags&IFF_UP)
+ notifier_call_chain(&netdev_chain, NETDEV_CHANGEMTU, dev);
+ return err;
+
+ case SIOCGIFHWADDR:
+ memcpy(ifr->ifr_hwaddr.sa_data,dev->dev_addr, MAX_ADDR_LEN);
+ ifr->ifr_hwaddr.sa_family=dev->type;
+ return 0;
+
+ case SIOCSIFHWADDR:
+ if(dev->set_mac_address==NULL)
+ return -EOPNOTSUPP;
+ if(ifr->ifr_hwaddr.sa_family!=dev->type)
+ return -EINVAL;
+ err=dev->set_mac_address(dev,&ifr->ifr_hwaddr);
+ if (!err)
+ notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev);
+ return err;
+
+ case SIOCSIFHWBROADCAST:
+ if(ifr->ifr_hwaddr.sa_family!=dev->type)
+ return -EINVAL;
+ memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, MAX_ADDR_LEN);
+ notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev);
+ return 0;
+
+ case SIOCGIFMAP:
+ ifr->ifr_map.mem_start=dev->mem_start;
+ ifr->ifr_map.mem_end=dev->mem_end;
+ ifr->ifr_map.base_addr=dev->base_addr;
+ ifr->ifr_map.irq=dev->irq;
+ ifr->ifr_map.dma=dev->dma;
+ ifr->ifr_map.port=dev->if_port;
+ return 0;
+
+ case SIOCSIFMAP:
+ if (dev->set_config)
+ return dev->set_config(dev,&ifr->ifr_map);
+ return -EOPNOTSUPP;
+
+ case SIOCADDMULTI:
+ if(dev->set_multicast_list==NULL ||
+ ifr->ifr_hwaddr.sa_family!=AF_UNSPEC)
+ return -EINVAL;
+ dev_mc_add(dev,ifr->ifr_hwaddr.sa_data, dev->addr_len, 1);
+ return 0;
+
+ case SIOCDELMULTI:
+ if(dev->set_multicast_list==NULL ||
+ ifr->ifr_hwaddr.sa_family!=AF_UNSPEC)
+ return -EINVAL;
+ dev_mc_delete(dev,ifr->ifr_hwaddr.sa_data,dev->addr_len, 1);
+ return 0;
+
+ case SIOCGIFINDEX:
+ ifr->ifr_ifindex = dev->ifindex;
+ return 0;
+
+ case SIOCGIFTXQLEN:
+ ifr->ifr_qlen = dev->tx_queue_len;
+ return 0;
+
+ case SIOCSIFTXQLEN:
+ if(ifr->ifr_qlen<0)
+ return -EINVAL;
+ dev->tx_queue_len = ifr->ifr_qlen;
+ return 0;
+
+ case SIOCSIFNAME:
+ if (dev->flags&IFF_UP)
+ return -EBUSY;
+ if (dev_get(ifr->ifr_newname))
+ return -EEXIST;
+ memcpy(dev->name, ifr->ifr_newname, IFNAMSIZ);
+ dev->name[IFNAMSIZ-1] = 0;
+ notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
+ return 0;
+
+ /*
+ * Unknown or private ioctl
+ */
+
+ default:
+ if(cmd >= SIOCDEVPRIVATE &&
+ cmd <= SIOCDEVPRIVATE + 15) {
+ if (dev->do_ioctl)
+ return dev->do_ioctl(dev, ifr, cmd);
+ return -EOPNOTSUPP;
+ }
+
+#ifdef CONFIG_NET_RADIO
+ if(cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
+ if (dev->do_ioctl)
+ return dev->do_ioctl(dev, ifr, cmd);
+ return -EOPNOTSUPP;
+ }
+#endif /* CONFIG_NET_RADIO */
+
+ }
+ return -EINVAL;
+}
+
+
+/*
+ * This function handles all "interface"-type I/O control requests. The actual
+ * 'doing' part of this is dev_ifsioc above.
+ */
+
+int dev_ioctl(unsigned int cmd, void *arg)
+{
+ struct ifreq ifr;
+ int ret;
+ char *colon;
+
+ /* One special case: SIOCGIFCONF takes ifconf argument
+ and requires shared lock, because it sleeps writing
+ to user space.
+ */
+
+ if (cmd == SIOCGIFCONF) {
+ rtnl_shlock();
+ ret = dev_ifconf((char *) arg);
+ rtnl_shunlock();
+ return ret;
+ }
+ if (cmd == SIOCGIFNAME) {
+ return dev_ifname((struct ifreq *)arg);
+ }
+
+ if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
+ return -EFAULT;
+
+ ifr.ifr_name[IFNAMSIZ-1] = 0;
+
+ colon = strchr(ifr.ifr_name, ':');
+ if (colon)
+ *colon = 0;
+
+ /*
+ * See which interface the caller is talking about.
+ */
+
+ switch(cmd)
+ {
+ /*
+ * These ioctl calls:
+ * - can be done by all.
+ * - atomic and do not require locking.
+ * - return a value
+ */
+
+ case SIOCGIFFLAGS:
+ case SIOCGIFMETRIC:
+ case SIOCGIFMTU:
+ case SIOCGIFHWADDR:
+ case SIOCGIFSLAVE:
+ case SIOCGIFMAP:
+ case SIOCGIFINDEX:
+ case SIOCGIFTXQLEN:
+ dev_load(ifr.ifr_name);
+ ret = dev_ifsioc(&ifr, cmd);
+ if (!ret) {
+ if (colon)
+ *colon = ':';
+ if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
+ return -EFAULT;
+ }
+ return ret;
+
+ /*
+ * These ioctl calls:
+ * - require superuser power.
+ * - require strict serialization.
+ * - do not return a value
+ */
+
+ case SIOCSIFFLAGS:
+ case SIOCSIFMETRIC:
+ case SIOCSIFMTU:
+ case SIOCSIFMAP:
+ case SIOCSIFHWADDR:
+ case SIOCSIFSLAVE:
+ case SIOCADDMULTI:
+ case SIOCDELMULTI:
+ case SIOCSIFHWBROADCAST:
+ case SIOCSIFTXQLEN:
+ case SIOCSIFNAME:
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+ dev_load(ifr.ifr_name);
+ rtnl_lock();
+ ret = dev_ifsioc(&ifr, cmd);
+ rtnl_unlock();
+ return ret;
+
+ case SIOCGIFMEM:
+ /* Get the per device memory space. We can add this but currently
+ do not support it */
+ case SIOCSIFMEM:
+ /* Set the per device memory buffer space. Not applicable in our case */
+ case SIOCSIFLINK:
+ return -EINVAL;
+
+ /*
+ * Unknown or private ioctl.
+ */
+
+ default:
+ if (cmd >= SIOCDEVPRIVATE &&
+ cmd <= SIOCDEVPRIVATE + 15) {
+ dev_load(ifr.ifr_name);
+ rtnl_lock();
+ ret = dev_ifsioc(&ifr, cmd);
+ rtnl_unlock();
+ if (!ret && copy_to_user(arg, &ifr, sizeof(struct ifreq)))
+ return -EFAULT;
+ return ret;
+ }
+#ifdef CONFIG_NET_RADIO
+ if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
+ dev_load(ifr.ifr_name);
+ if (IW_IS_SET(cmd)) {
+ if (!suser())
+ return -EPERM;
+ rtnl_lock();
+ }
+ ret = dev_ifsioc(&ifr, cmd);
+ if (IW_IS_SET(cmd))
+ rtnl_unlock();
+ if (!ret && IW_IS_GET(cmd) &&
+ copy_to_user(arg, &ifr, sizeof(struct ifreq)))
+ return -EFAULT;
+ return ret;
+ }
+#endif /* CONFIG_NET_RADIO */
+ return -EINVAL;
+ }
+}
+
+int dev_new_index(void)
+{
+ static int ifindex;
+ for (;;) {
+ if (++ifindex <= 0)
+ ifindex=1;
+ if (dev_get_by_index(ifindex) == NULL)
+ return ifindex;
+ }
+}
+
+static int dev_boot_phase = 1;
+
+
+int register_netdevice(struct device *dev)
+{
+ struct device *d, **dp;
+
+ if (dev_boot_phase) {
+ /* This is NOT bug, but I am not sure, that all the
+ devices, initialized before netdev module is started
+ are sane.
+
+ Now they are chained to device boot list
+ and probed later. If a module is initialized
+ before netdev, but assumes that dev->init
+ is really called by register_netdev(), it will fail.
+
+ So that this message should be printed for a while.
+ */
+ printk(KERN_INFO "early initialization of device %s is deferred\n", dev->name);
+
+ /* Check for existence, and append to tail of chain */
+ for (dp=&dev_base; (d=*dp) != NULL; dp=&d->next) {
+ if (d == dev || strcmp(d->name, dev->name) == 0)
+ return -EEXIST;
+ }
+ dev->next = NULL;
+ *dp = dev;
+ return 0;
+ }
+
+ dev->iflink = -1;
+
+ /* Init, if this function is available */
+ if (dev->init && dev->init(dev) != 0)
+ return -EIO;
+
+ /* Check for existence, and append to tail of chain */
+ for (dp=&dev_base; (d=*dp) != NULL; dp=&d->next) {
+ if (d == dev || strcmp(d->name, dev->name) == 0)
+ return -EEXIST;
+ }
+ dev->next = NULL;
+ dev_init_scheduler(dev);
+ dev->ifindex = dev_new_index();
+ if (dev->iflink == -1)
+ dev->iflink = dev->ifindex;
+ *dp = dev;
+
+ /* Notify protocols, that a new device appeared. */
+ notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
+
+ return 0;
+}
+
+int unregister_netdevice(struct device *dev)
+{
+ struct device *d, **dp;
+
+ if (dev_boot_phase == 0) {
+ /* If device is running, close it.
+ It is very bad idea, really we should
+ complain loudly here, but random hackery
+ in linux/drivers/net likes it.
+ */
+ if (dev->flags & IFF_UP)
+ dev_close(dev);
+
+#ifdef CONFIG_NET_FASTROUTE
+ dev_clear_fastroute(dev);
+#endif
+
+ /* Shutdown queueing discipline. */
+ dev_shutdown(dev);
+
+ /* Notify protocols, that we are about to destroy
+ this device. They should clean all the things.
+ */
+ notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
+
+ /*
+ * Flush the multicast chain
+ */
+ dev_mc_discard(dev);
+
+ /* To avoid pointers looking to nowhere,
+ we wait for end of critical section */
+ dev_lock_wait();
+ }
+
+ /* And unlink it from device chain. */
+ for (dp = &dev_base; (d=*dp) != NULL; dp=&d->next) {
+ if (d == dev) {
+ *dp = d->next;
+ synchronize_bh();
+ d->next = NULL;
+
+ if (dev->destructor)
+ dev->destructor(dev);
+ return 0;
+ }
+ }
+ return -ENODEV;
+}
+
+
+/*
+ * Initialize the DEV module. At boot time this walks the device list and
+ * unhooks any devices that fail to initialise (normally hardware not
+ * present) and leaves us with a valid list of present and active devices.
+ *
+ */
+extern int lance_init(void);
+extern int bpq_init(void);
+extern int scc_init(void);
+extern void sdla_setup(void);
+extern void dlci_setup(void);
+extern int dmascc_init(void);
+extern int sm_init(void);
+
+extern int baycom_ser_fdx_init(void);
+extern int baycom_ser_hdx_init(void);
+extern int baycom_par_init(void);
+
+extern int lapbeth_init(void);
+extern void arcnet_init(void);
+extern void ip_auto_config(void);
+#ifdef CONFIG_8xx
+extern int cpm_enet_init(void);
+#endif /* CONFIG_8xx */
+
+#ifdef CONFIG_PROC_FS
+static struct proc_dir_entry proc_net_dev = {
+ PROC_NET_DEV, 3, "dev",
+ S_IFREG | S_IRUGO, 1, 0, 0,
+ 0, &proc_net_inode_operations,
+ dev_get_info
+};
+#endif
+
+#ifdef CONFIG_NET_RADIO
+#ifdef CONFIG_PROC_FS
+static struct proc_dir_entry proc_net_wireless = {
+ PROC_NET_WIRELESS, 8, "wireless",
+ S_IFREG | S_IRUGO, 1, 0, 0,
+ 0, &proc_net_inode_operations,
+ dev_get_wireless_info
+};
+#endif /* CONFIG_PROC_FS */
+#endif /* CONFIG_NET_RADIO */
+
+__initfunc(int net_dev_init(void))
+{
+ struct device *dev, **dp;
+
+#ifdef CONFIG_NET_SCHED
+ pktsched_init();
+#endif
+
+ /*
+ * Initialise the packet receive queue.
+ */
+
+ skb_queue_head_init(&backlog);
+
+ /*
+ * The bridge has to be up before the devices
+ */
+
+#ifdef CONFIG_BRIDGE
+ br_init();
+#endif
+
+ /*
+ * This is Very Ugly(tm).
+ *
+ * Some devices want to be initialized early..
+ */
+
+#if defined(CONFIG_SCC)
+ scc_init();
+#endif
+#if defined(CONFIG_DMASCC)
+ dmascc_init();
+#endif
+#if defined(CONFIG_BPQETHER)
+ bpq_init();
+#endif
+#if defined(CONFIG_DLCI)
+ dlci_setup();
+#endif
+#if defined(CONFIG_SDLA)
+ sdla_setup();
+#endif
+#if defined(CONFIG_BAYCOM_PAR)
+ baycom_par_init();
+#endif
+#if defined(CONFIG_BAYCOM_SER_FDX)
+ baycom_ser_fdx_init();
+#endif
+#if defined(CONFIG_BAYCOM_SER_HDX)
+ baycom_ser_hdx_init();
+#endif
+#if defined(CONFIG_SOUNDMODEM)
+ sm_init();
+#endif
+#if defined(CONFIG_LAPBETHER)
+ lapbeth_init();
+#endif
+#if defined(CONFIG_PLIP)
+ plip_init();
+#endif
+#if defined(CONFIG_ARCNET)
+ arcnet_init();
+#endif
+#if defined(CONFIG_8xx)
+ cpm_enet_init();
+#endif
+ /*
+ * SLHC if present needs attaching so other people see it
+ * even if not opened.
+ */
+
+#ifdef CONFIG_INET
+#if (defined(CONFIG_SLIP) && defined(CONFIG_SLIP_COMPRESSED)) \
+ || defined(CONFIG_PPP) \
+ || (defined(CONFIG_ISDN) && defined(CONFIG_ISDN_PPP))
+ slhc_install();
+#endif
+#endif
+
+#ifdef CONFIG_NET_PROFILE
+ net_profile_init();
+ NET_PROFILE_REGISTER(dev_queue_xmit);
+ NET_PROFILE_REGISTER(net_bh);
+#if 0
+ NET_PROFILE_REGISTER(net_bh_skb);
+#endif
+#endif
+ /*
+ * Add the devices.
+ * If the call to dev->init fails, the dev is removed
+ * from the chain disconnecting the device until the
+ * next reboot.
+ */
+
+ dp = &dev_base;
+ while ((dev = *dp) != NULL)
+ {
+ dev->iflink = -1;
+ if (dev->init && dev->init(dev))
+ {
+ /*
+ * It failed to come up. Unhook it.
+ */
+ *dp = dev->next;
+ synchronize_bh();
+ }
+ else
+ {
+ dp = &dev->next;
+ dev->ifindex = dev_new_index();
+ if (dev->iflink == -1)
+ dev->iflink = dev->ifindex;
+ dev_init_scheduler(dev);
+ }
+ }
+
+#ifdef CONFIG_PROC_FS
+ proc_net_register(&proc_net_dev);
+ {
+ struct proc_dir_entry *ent = create_proc_entry("net/dev_stat", 0, 0);
+ ent->read_proc = dev_proc_stats;
+ }
+#endif
+
+#ifdef CONFIG_NET_RADIO
+#ifdef CONFIG_PROC_FS
+ proc_net_register(&proc_net_wireless);
+#endif /* CONFIG_PROC_FS */
+#endif /* CONFIG_NET_RADIO */
+
+ init_bh(NET_BH, net_bh);
+
+ dev_boot_phase = 0;
+
+ dev_mcast_init();
+
+#ifdef CONFIG_IP_PNP
+ ip_auto_config();
+#endif
+
+ return 0;
+}
diff --git a/pfinet/linux-src/net/core/dev_mcast.c b/pfinet/linux-src/net/core/dev_mcast.c
new file mode 100644
index 00000000..bce3f4a4
--- /dev/null
+++ b/pfinet/linux-src/net/core/dev_mcast.c
@@ -0,0 +1,252 @@
+/*
+ * Linux NET3: Multicast List maintenance.
+ *
+ * Authors:
+ * Tim Kordas <tjk@nostromo.eeap.cwru.edu>
+ * Richard Underwood <richard@wuzz.demon.co.uk>
+ *
+ * Stir fried together from the IP multicast and CAP patches above
+ * Alan Cox <Alan.Cox@linux.org>
+ *
+ * Fixes:
+ * Alan Cox : Update the device on a real delete
+ * rather than any time but...
+ * Alan Cox : IFF_ALLMULTI support.
+ * Alan Cox : New format set_multicast_list() calls.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/arp.h>
+
+
+/*
+ * Device multicast list maintenance.
+ *
+ * This is used both by IP and by the user level maintenance functions.
+ * Unlike BSD we maintain a usage count on a given multicast address so
+ * that a casual user application can add/delete multicasts used by
+ * protocols without doing damage to the protocols when it deletes the
+ * entries. It also helps IP as it tracks overlapping maps.
+ *
+ * Device mc lists are changed by bh at least if IPv6 is enabled,
+ * so that it must be bh protected.
+ */
+
+/*
+ * Update the multicast list into the physical NIC controller.
+ */
+
+void dev_mc_upload(struct device *dev)
+{
+ /* Don't do anything till we up the interface
+ [dev_open will call this function so the list will
+ stay sane] */
+
+ if(!(dev->flags&IFF_UP))
+ return;
+
+ /*
+ * Devices with no set multicast don't get set
+ */
+
+ if(dev->set_multicast_list==NULL)
+ return;
+
+ start_bh_atomic();
+ dev->set_multicast_list(dev);
+ end_bh_atomic();
+}
+
+/*
+ * Delete a device level multicast
+ */
+
+int dev_mc_delete(struct device *dev, void *addr, int alen, int glbl)
+{
+ int err = 0;
+ struct dev_mc_list *dmi, **dmip;
+
+ start_bh_atomic();
+ for (dmip=&dev->mc_list; (dmi=*dmip)!=NULL; dmip=&dmi->next) {
+ /*
+ * Find the entry we want to delete. The device could
+ * have variable length entries so check these too.
+ */
+ if (memcmp(dmi->dmi_addr,addr,dmi->dmi_addrlen)==0 && alen==dmi->dmi_addrlen) {
+ if (glbl) {
+ int old_glbl = dmi->dmi_gusers;
+ dmi->dmi_gusers = 0;
+ if (old_glbl == 0)
+ break;
+ }
+ if(--dmi->dmi_users)
+ goto done;
+
+ /*
+ * Last user. So delete the entry.
+ */
+ *dmip = dmi->next;
+ dev->mc_count--;
+ kfree_s(dmi,sizeof(*dmi));
+ /*
+ * We have altered the list, so the card
+ * loaded filter is now wrong. Fix it
+ */
+ end_bh_atomic();
+ dev_mc_upload(dev);
+ return 0;
+ }
+ }
+ err = -ENOENT;
+done:
+ end_bh_atomic();
+ return err;
+}
+
+/*
+ * Add a device level multicast
+ */
+
+int dev_mc_add(struct device *dev, void *addr, int alen, int glbl)
+{
+ int err = 0;
+ struct dev_mc_list *dmi, *dmi1;
+
+ dmi1 = (struct dev_mc_list *)kmalloc(sizeof(*dmi), gfp_any());
+
+ start_bh_atomic();
+ for(dmi=dev->mc_list; dmi!=NULL; dmi=dmi->next) {
+ if (memcmp(dmi->dmi_addr,addr,dmi->dmi_addrlen)==0 && dmi->dmi_addrlen==alen) {
+ if (glbl) {
+ int old_glbl = dmi->dmi_gusers;
+ dmi->dmi_gusers = 1;
+ if (old_glbl)
+ goto done;
+ }
+ dmi->dmi_users++;
+ goto done;
+ }
+ }
+
+ if ((dmi=dmi1)==NULL)
+ return -ENOMEM;
+ memcpy(dmi->dmi_addr, addr, alen);
+ dmi->dmi_addrlen=alen;
+ dmi->next=dev->mc_list;
+ dmi->dmi_users=1;
+ dmi->dmi_gusers=glbl ? 1 : 0;
+ dev->mc_list=dmi;
+ dev->mc_count++;
+ end_bh_atomic();
+ dev_mc_upload(dev);
+ return 0;
+
+done:
+ end_bh_atomic();
+ if (dmi1)
+ kfree(dmi1);
+ return err;
+}
+
+/*
+ * Discard multicast list when a device is downed
+ */
+
+void dev_mc_discard(struct device *dev)
+{
+ start_bh_atomic();
+ while (dev->mc_list!=NULL) {
+ struct dev_mc_list *tmp=dev->mc_list;
+ dev->mc_list=tmp->next;
+ if (tmp->dmi_users > tmp->dmi_gusers)
+ printk("dev_mc_discard: multicast leakage! dmi_users=%d\n", tmp->dmi_users);
+ kfree_s(tmp,sizeof(*tmp));
+ }
+ dev->mc_count=0;
+ end_bh_atomic();
+}
+
+#ifdef CONFIG_PROC_FS
+static int dev_mc_read_proc(char *buffer, char **start, off_t offset,
+ int length, int *eof, void *data)
+{
+ off_t pos=0, begin=0;
+ struct dev_mc_list *m;
+ int len=0;
+ struct device *dev;
+
+ start_bh_atomic();
+
+ for (dev = dev_base; dev; dev = dev->next) {
+ for (m = dev->mc_list; m; m = m->next) {
+ int i;
+
+ len += sprintf(buffer+len,"%-4d %-15s %-5d %-5d ", dev->ifindex, dev->name,
+ m->dmi_users, m->dmi_gusers);
+
+ for (i=0; i<m->dmi_addrlen; i++)
+ len += sprintf(buffer+len, "%02x", m->dmi_addr[i]);
+
+ len+=sprintf(buffer+len, "\n");
+
+ pos=begin+len;
+ if (pos < offset) {
+ len=0;
+ begin=pos;
+ }
+ if (pos > offset+length)
+ goto done;
+ }
+ }
+ *eof = 1;
+
+done:
+ end_bh_atomic();
+ *start=buffer+(offset-begin);
+ len-=(offset-begin);
+ if(len>length)
+ len=length;
+ if(len<0)
+ len=0;
+ return len;
+}
+#endif
+
+__initfunc(void dev_mcast_init(void))
+{
+#ifdef CONFIG_PROC_FS
+ struct proc_dir_entry *ent;
+
+ ent = create_proc_entry("net/dev_mcast", 0, 0);
+ ent->read_proc = dev_mc_read_proc;
+#endif
+}
+
diff --git a/pfinet/linux-src/net/core/dst.c b/pfinet/linux-src/net/core/dst.c
new file mode 100644
index 00000000..9007dde6
--- /dev/null
+++ b/pfinet/linux-src/net/core/dst.c
@@ -0,0 +1,145 @@
+/*
+ * net/dst.c Protocol independent destination cache.
+ *
+ * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ */
+
+#include <asm/segment.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+
+#include <net/dst.h>
+
+struct dst_entry * dst_garbage_list;
+atomic_t dst_total = ATOMIC_INIT(0);
+
+static unsigned long dst_gc_timer_expires;
+static unsigned long dst_gc_timer_inc = DST_GC_MAX;
+static void dst_run_gc(unsigned long);
+
+static struct timer_list dst_gc_timer =
+ { NULL, NULL, DST_GC_MIN, 0L, dst_run_gc };
+
+#if RT_CACHE_DEBUG >= 2
+atomic_t hh_count;
+#endif
+
+static void dst_run_gc(unsigned long dummy)
+{
+ int delayed = 0;
+ struct dst_entry * dst, **dstp;
+
+ del_timer(&dst_gc_timer);
+ dstp = &dst_garbage_list;
+ while ((dst = *dstp) != NULL) {
+ if (atomic_read(&dst->use)) {
+ dstp = &dst->next;
+ delayed++;
+ continue;
+ }
+ *dstp = dst->next;
+ dst_destroy(dst);
+ }
+ if (!dst_garbage_list) {
+ dst_gc_timer_inc = DST_GC_MAX;
+ return;
+ }
+ if ((dst_gc_timer_expires += dst_gc_timer_inc) > DST_GC_MAX)
+ dst_gc_timer_expires = DST_GC_MAX;
+ dst_gc_timer_inc += DST_GC_INC;
+ dst_gc_timer.expires = jiffies + dst_gc_timer_expires;
+#if RT_CACHE_DEBUG >= 2
+ printk("dst_total: %d/%d %ld\n",
+ atomic_read(&dst_total), delayed, dst_gc_timer_expires);
+#endif
+ add_timer(&dst_gc_timer);
+}
+
+static int dst_discard(struct sk_buff *skb)
+{
+ kfree_skb(skb);
+ return 0;
+}
+
+static int dst_blackhole(struct sk_buff *skb)
+{
+ kfree_skb(skb);
+ return 0;
+}
+
+void * dst_alloc(int size, struct dst_ops * ops)
+{
+ struct dst_entry * dst;
+
+ if (ops->gc && atomic_read(&ops->entries) > ops->gc_thresh) {
+ if (ops->gc())
+ return NULL;
+ }
+ dst = kmalloc(size, GFP_ATOMIC);
+ if (!dst)
+ return NULL;
+ memset(dst, 0, size);
+ dst->ops = ops;
+ atomic_set(&dst->refcnt, 0);
+ dst->lastuse = jiffies;
+ dst->input = dst_discard;
+ dst->output = dst_blackhole;
+ atomic_inc(&dst_total);
+ atomic_inc(&ops->entries);
+ return dst;
+}
+
+void __dst_free(struct dst_entry * dst)
+{
+ start_bh_atomic();
+ /* The first case (dev==NULL) is required, when
+ protocol module is unloaded.
+ */
+ if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) {
+ dst->input = dst_discard;
+ dst->output = dst_blackhole;
+ dst->dev = &loopback_dev;
+ }
+ dst->obsolete = 2;
+ dst->next = dst_garbage_list;
+ dst_garbage_list = dst;
+ if (dst_gc_timer_inc > DST_GC_INC) {
+ del_timer(&dst_gc_timer);
+ dst_gc_timer_inc = DST_GC_INC;
+ dst_gc_timer_expires = DST_GC_MIN;
+ dst_gc_timer.expires = jiffies + dst_gc_timer_expires;
+ add_timer(&dst_gc_timer);
+ }
+ end_bh_atomic();
+}
+
+void dst_destroy(struct dst_entry * dst)
+{
+ struct neighbour *neigh = dst->neighbour;
+ struct hh_cache *hh = dst->hh;
+
+ dst->hh = NULL;
+ if (hh && atomic_dec_and_test(&hh->hh_refcnt))
+ kfree(hh);
+
+ if (neigh) {
+ dst->neighbour = NULL;
+ neigh_release(neigh);
+ }
+
+ atomic_dec(&dst->ops->entries);
+
+ if (dst->ops->destroy)
+ dst->ops->destroy(dst);
+ atomic_dec(&dst_total);
+ kfree(dst);
+}
diff --git a/pfinet/linux-src/net/core/filter.c b/pfinet/linux-src/net/core/filter.c
new file mode 100644
index 00000000..8e1ffb62
--- /dev/null
+++ b/pfinet/linux-src/net/core/filter.c
@@ -0,0 +1,454 @@
+/*
+ * Linux Socket Filter - Kernel level socket filtering
+ *
+ * Author:
+ * Jay Schulist <Jay.Schulist@spacs.k12.wi.us>
+ *
+ * Based on the design of:
+ * - The Berkeley Packet Filter
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Andi Kleen - Fix a few bad bugs and races.
+ */
+
+#include <linux/config.h>
+#if defined(CONFIG_FILTER)
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/fcntl.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/if_packet.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <linux/errno.h>
+#include <linux/timer.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <linux/filter.h>
+
+/* No hurry in this branch */
+
+static u8 *load_pointer(struct sk_buff *skb, int k)
+{
+ u8 *ptr = NULL;
+
+ if (k>=SKF_NET_OFF)
+ ptr = skb->nh.raw + k - SKF_NET_OFF;
+ else if (k>=SKF_LL_OFF)
+ ptr = skb->mac.raw + k - SKF_LL_OFF;
+
+ if (ptr<skb->head && ptr < skb->tail)
+ return ptr;
+ return NULL;
+}
+
+/*
+ * Decode and apply filter instructions to the skb->data.
+ * Return length to keep, 0 for none. skb is the data we are
+ * filtering, filter is the array of filter instructions, and
+ * len is the number of filter blocks in the array.
+ */
+
+int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
+{
+ unsigned char *data = skb->data;
+ /* len is UNSIGNED. Byte wide insns relies only on implicit
+ type casts to prevent reading arbitrary memory locations.
+ */
+ unsigned int len = skb->len;
+ struct sock_filter *fentry; /* We walk down these */
+ u32 A = 0; /* Accumulator */
+ u32 X = 0; /* Index Register */
+ u32 mem[BPF_MEMWORDS]; /* Scratch Memory Store */
+ int k;
+ int pc;
+
+ /*
+ * Process array of filter instructions.
+ */
+
+ for(pc = 0; pc < flen; pc++)
+ {
+ fentry = &filter[pc];
+
+ switch(fentry->code)
+ {
+ case BPF_ALU|BPF_ADD|BPF_X:
+ A += X;
+ continue;
+
+ case BPF_ALU|BPF_ADD|BPF_K:
+ A += fentry->k;
+ continue;
+
+ case BPF_ALU|BPF_SUB|BPF_X:
+ A -= X;
+ continue;
+
+ case BPF_ALU|BPF_SUB|BPF_K:
+ A -= fentry->k;
+ continue;
+
+ case BPF_ALU|BPF_MUL|BPF_X:
+ A *= X;
+ continue;
+
+ case BPF_ALU|BPF_MUL|BPF_K:
+ A *= fentry->k;
+ continue;
+
+ case BPF_ALU|BPF_DIV|BPF_X:
+ if(X == 0)
+ return (0);
+ A /= X;
+ continue;
+
+ case BPF_ALU|BPF_DIV|BPF_K:
+ if(fentry->k == 0)
+ return (0);
+ A /= fentry->k;
+ continue;
+
+ case BPF_ALU|BPF_AND|BPF_X:
+ A &= X;
+ continue;
+
+ case BPF_ALU|BPF_AND|BPF_K:
+ A &= fentry->k;
+ continue;
+
+ case BPF_ALU|BPF_OR|BPF_X:
+ A |= X;
+ continue;
+
+ case BPF_ALU|BPF_OR|BPF_K:
+ A |= fentry->k;
+ continue;
+
+ case BPF_ALU|BPF_LSH|BPF_X:
+ A <<= X;
+ continue;
+
+ case BPF_ALU|BPF_LSH|BPF_K:
+ A <<= fentry->k;
+ continue;
+
+ case BPF_ALU|BPF_RSH|BPF_X:
+ A >>= X;
+ continue;
+
+ case BPF_ALU|BPF_RSH|BPF_K:
+ A >>= fentry->k;
+ continue;
+
+ case BPF_ALU|BPF_NEG:
+ A = -A;
+ continue;
+
+ case BPF_JMP|BPF_JA:
+ pc += fentry->k;
+ continue;
+
+ case BPF_JMP|BPF_JGT|BPF_K:
+ pc += (A > fentry->k) ? fentry->jt : fentry->jf;
+ continue;
+
+ case BPF_JMP|BPF_JGE|BPF_K:
+ pc += (A >= fentry->k) ? fentry->jt : fentry->jf;
+ continue;
+
+ case BPF_JMP|BPF_JEQ|BPF_K:
+ pc += (A == fentry->k) ? fentry->jt : fentry->jf;
+ continue;
+
+ case BPF_JMP|BPF_JSET|BPF_K:
+ pc += (A & fentry->k) ? fentry->jt : fentry->jf;
+ continue;
+
+ case BPF_JMP|BPF_JGT|BPF_X:
+ pc += (A > X) ? fentry->jt : fentry->jf;
+ continue;
+
+ case BPF_JMP|BPF_JGE|BPF_X:
+ pc += (A >= X) ? fentry->jt : fentry->jf;
+ continue;
+
+ case BPF_JMP|BPF_JEQ|BPF_X:
+ pc += (A == X) ? fentry->jt : fentry->jf;
+ continue;
+
+ case BPF_JMP|BPF_JSET|BPF_X:
+ pc += (A & X) ? fentry->jt : fentry->jf;
+ continue;
+
+ case BPF_LD|BPF_W|BPF_ABS:
+ k = fentry->k;
+load_w:
+ if(k+sizeof(u32) <= len) {
+ A = ntohl(*(u32*)&data[k]);
+ continue;
+ }
+ if (k<0) {
+ u8 *ptr;
+
+ if (k>=SKF_AD_OFF)
+ break;
+ if ((ptr = load_pointer(skb, k)) != NULL) {
+ A = ntohl(*(u32*)ptr);
+ continue;
+ }
+ }
+ return 0;
+
+ case BPF_LD|BPF_H|BPF_ABS:
+ k = fentry->k;
+load_h:
+ if(k + sizeof(u16) <= len) {
+ A = ntohs(*(u16*)&data[k]);
+ continue;
+ }
+ if (k<0) {
+ u8 *ptr;
+
+ if (k>=SKF_AD_OFF)
+ break;
+ if ((ptr = load_pointer(skb, k)) != NULL) {
+ A = ntohs(*(u16*)ptr);
+ continue;
+ }
+ }
+ return 0;
+
+ case BPF_LD|BPF_B|BPF_ABS:
+ k = fentry->k;
+load_b:
+ if(k < len) {
+ A = data[k];
+ continue;
+ }
+ if (k<0) {
+ u8 *ptr;
+
+ if (k>=SKF_AD_OFF)
+ break;
+ if ((ptr = load_pointer(skb, k)) != NULL) {
+ A = *ptr;
+ continue;
+ }
+ }
+
+ case BPF_LD|BPF_W|BPF_LEN:
+ A = len;
+ continue;
+
+ case BPF_LDX|BPF_W|BPF_LEN:
+ X = len;
+ continue;
+
+ case BPF_LD|BPF_W|BPF_IND:
+ k = X + fentry->k;
+ goto load_w;
+
+ case BPF_LD|BPF_H|BPF_IND:
+ k = X + fentry->k;
+ goto load_h;
+
+ case BPF_LD|BPF_B|BPF_IND:
+ k = X + fentry->k;
+ goto load_b;
+
+ case BPF_LDX|BPF_B|BPF_MSH:
+ k = fentry->k;
+ if(k >= len)
+ return (0);
+ X = (data[k] & 0xf) << 2;
+ continue;
+
+ case BPF_LD|BPF_IMM:
+ A = fentry->k;
+ continue;
+
+ case BPF_LDX|BPF_IMM:
+ X = fentry->k;
+ continue;
+
+ case BPF_LD|BPF_MEM:
+ A = mem[fentry->k];
+ continue;
+
+ case BPF_LDX|BPF_MEM:
+ X = mem[fentry->k];
+ continue;
+
+ case BPF_MISC|BPF_TAX:
+ X = A;
+ continue;
+
+ case BPF_MISC|BPF_TXA:
+ A = X;
+ continue;
+
+ case BPF_RET|BPF_K:
+ return ((unsigned int)fentry->k);
+
+ case BPF_RET|BPF_A:
+ return ((unsigned int)A);
+
+ case BPF_ST:
+ mem[fentry->k] = A;
+ continue;
+
+ case BPF_STX:
+ mem[fentry->k] = X;
+ continue;
+
+ default:
+ /* Invalid instruction counts as RET */
+ return (0);
+ }
+
+ /* Handle ancillary data, which are impossible
+ (or very difficult) to get parsing packet contents.
+ */
+ switch (k-SKF_AD_OFF) {
+ case SKF_AD_PROTOCOL:
+ A = htons(skb->protocol);
+ continue;
+ case SKF_AD_PKTTYPE:
+ A = skb->pkt_type;
+ continue;
+ case SKF_AD_IFINDEX:
+ A = skb->dev->ifindex;
+ continue;
+ default:
+ return 0;
+ }
+ }
+
+ return (0);
+}
+
+/*
+ * Check the user's filter code. If we let some ugly
+ * filter code slip through kaboom!
+ */
+
+int sk_chk_filter(struct sock_filter *filter, int flen)
+{
+ struct sock_filter *ftest;
+ int pc;
+
+ /*
+ * Check the filter code now.
+ */
+ for(pc = 0; pc < flen; pc++)
+ {
+ /*
+ * All jumps are forward as they are not signed
+ */
+
+ ftest = &filter[pc];
+ if(BPF_CLASS(ftest->code) == BPF_JMP)
+ {
+ /*
+ * But they mustn't jump off the end.
+ */
+ if(BPF_OP(ftest->code) == BPF_JA)
+ {
+ /* Note, the large ftest->k might cause
+ loops. Compare this with conditional
+ jumps below, where offsets are limited. --ANK (981016)
+ */
+ if (ftest->k >= (unsigned)(flen-pc-1))
+ return (-EINVAL);
+ }
+ else
+ {
+ /*
+ * For conditionals both must be safe
+ */
+ if(pc + ftest->jt +1 >= flen || pc + ftest->jf +1 >= flen)
+ return (-EINVAL);
+ }
+ }
+
+ /*
+ * Check that memory operations use valid addresses.
+ */
+
+ if (ftest->k >= BPF_MEMWORDS)
+ {
+ /*
+ * But it might not be a memory operation...
+ */
+ switch (ftest->code) {
+ case BPF_ST:
+ case BPF_STX:
+ case BPF_LD|BPF_MEM:
+ case BPF_LDX|BPF_MEM:
+ return -EINVAL;
+ }
+ }
+ }
+
+ /*
+ * The program must end with a return. We don't care where they
+ * jumped within the script (its always forwards) but in the
+ * end they _will_ hit this.
+ */
+
+ return (BPF_CLASS(filter[flen - 1].code) == BPF_RET)?0:-EINVAL;
+}
+
+/*
+ * Attach the user's filter code. We first run some sanity checks on
+ * it to make sure it does not explode on us later.
+ */
+
+int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
+{
+ struct sk_filter *fp;
+ unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
+ int err;
+
+ /* Make sure new filter is there and in the right amounts. */
+ if (fprog->filter == NULL || fprog->len > BPF_MAXINSNS)
+ return (-EINVAL);
+
+ fp = (struct sk_filter *)sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL);
+ if(fp == NULL)
+ return (-ENOMEM);
+
+ if (copy_from_user(fp->insns, fprog->filter, fsize)) {
+ sock_kfree_s(sk, fp, fsize+sizeof(*fp));
+ return -EFAULT;
+ }
+
+ atomic_set(&fp->refcnt, 1);
+ fp->len = fprog->len;
+
+ if ((err = sk_chk_filter(fp->insns, fp->len))==0) {
+ struct sk_filter *old_fp = sk->filter;
+ sk->filter = fp;
+ synchronize_bh();
+ fp = old_fp;
+ }
+
+ if (fp)
+ sk_filter_release(sk, fp);
+
+ return (err);
+}
+#endif /* CONFIG_FILTER */
diff --git a/pfinet/linux-src/net/core/firewall.c b/pfinet/linux-src/net/core/firewall.c
new file mode 100644
index 00000000..fc7b1a51
--- /dev/null
+++ b/pfinet/linux-src/net/core/firewall.c
@@ -0,0 +1,160 @@
+/*
+ * Generic loadable firewalls. At the moment only IP will actually
+ * use these, but people can add the others as they are needed.
+ *
+ * Authors: Dave Bonn (for IP)
+ * much hacked by: Alan Cox
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/firewall.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <asm/semaphore.h>
+
+struct semaphore firewall_sem = MUTEX;
+static int firewall_policy[NPROTO];
+static struct firewall_ops *firewall_chain[NPROTO];
+
+/*
+ * Register a firewall
+ */
+
+int register_firewall(int pf, struct firewall_ops *fw)
+{
+ struct firewall_ops **p;
+
+ if(pf<0||pf>=NPROTO)
+ return -EINVAL;
+
+ /*
+ * Don't allow two people to adjust at once.
+ */
+
+ down(&firewall_sem);
+
+ p=&firewall_chain[pf];
+
+ while(*p)
+ {
+ if(fw->fw_priority > (*p)->fw_priority)
+ break;
+ p=&((*p)->next);
+ }
+
+ /*
+ * We need to use a memory barrier to make sure that this
+ * works correctly even in SMP with weakly ordered writes.
+ *
+ * This is atomic wrt interrupts (and generally walking the
+ * chain), but not wrt itself (so you can't call this from
+ * an interrupt. Not that you'd want to).
+ */
+
+ fw->next=*p;
+ mb();
+ *p = fw;
+
+ /*
+ * And release the sleep lock
+ */
+
+ up(&firewall_sem);
+ return 0;
+}
+
+/*
+ * Unregister a firewall
+ */
+
+int unregister_firewall(int pf, struct firewall_ops *fw)
+{
+ struct firewall_ops **nl;
+
+ if(pf<0||pf>=NPROTO)
+ return -EINVAL;
+
+ /*
+ * Don't allow two people to adjust at once.
+ */
+
+ down(&firewall_sem);
+
+ nl=&firewall_chain[pf];
+
+ while(*nl!=NULL)
+ {
+ if(*nl==fw)
+ {
+ struct firewall_ops *f=fw->next;
+ *nl = f;
+ up(&firewall_sem);
+ synchronize_bh();
+ return 0;
+ }
+ nl=&((*nl)->next);
+ }
+ up(&firewall_sem);
+ return -ENOENT;
+}
+
+int call_fw_firewall(int pf, struct device *dev, void *phdr, void *arg, struct sk_buff **skb)
+{
+ struct firewall_ops *fw=firewall_chain[pf];
+
+ while(fw!=NULL)
+ {
+ int rc=fw->fw_forward(fw,pf,dev,phdr,arg,skb);
+ if(rc!=FW_SKIP)
+ return rc;
+ fw=fw->next;
+ }
+ return firewall_policy[pf];
+}
+
+/*
+ * Actual invocation of the chains
+ */
+
+int call_in_firewall(int pf, struct device *dev, void *phdr, void *arg, struct sk_buff **skb)
+{
+ struct firewall_ops *fw=firewall_chain[pf];
+
+ while(fw!=NULL)
+ {
+ int rc=fw->fw_input(fw,pf,dev,phdr,arg,skb);
+ if(rc!=FW_SKIP)
+ return rc;
+ fw=fw->next;
+ }
+ return firewall_policy[pf];
+}
+
+int call_out_firewall(int pf, struct device *dev, void *phdr, void *arg, struct sk_buff **skb)
+{
+ struct firewall_ops *fw=firewall_chain[pf];
+
+ while(fw!=NULL)
+ {
+ int rc=fw->fw_output(fw,pf,dev,phdr,arg,skb);
+ if(rc!=FW_SKIP)
+ return rc;
+ fw=fw->next;
+ }
+ /* alan, is this right? */
+ return firewall_policy[pf];
+}
+
+EXPORT_SYMBOL(register_firewall);
+EXPORT_SYMBOL(unregister_firewall);
+EXPORT_SYMBOL(call_in_firewall);
+EXPORT_SYMBOL(call_out_firewall);
+EXPORT_SYMBOL(call_fw_firewall);
+
+__initfunc(void fwchain_init(void))
+{
+ int i;
+ for(i=0;i<NPROTO;i++)
+ firewall_policy[i]=FW_ACCEPT;
+}
diff --git a/pfinet/linux-src/net/core/iovec.c b/pfinet/linux-src/net/core/iovec.c
new file mode 100644
index 00000000..c20f8530
--- /dev/null
+++ b/pfinet/linux-src/net/core/iovec.c
@@ -0,0 +1,278 @@
+/*
+ * iovec manipulation routines.
+ *
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Fixes:
+ * Andrew Lunn : Errors in iovec copying.
+ * Pedro Roque : Added memcpy_fromiovecend and
+ * csum_..._fromiovecend.
+ * Andi Kleen : fixed error handling for 2.1
+ * Alexey Kuznetsov: 2.1 optimisations
+ * Andi Kleen : Fix csum*fromiovecend for IPv6.
+ */
+
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/malloc.h>
+#include <linux/net.h>
+#include <linux/in6.h>
+#include <asm/uaccess.h>
+#include <asm/byteorder.h>
+#include <net/checksum.h>
+
+/*
+ * Verify iovec. The caller must ensure that the iovec is big enough
+ * to hold the message iovec.
+ *
+ * Save time not doing verify_area. copy_*_user will make this work
+ * in any case.
+ */
+
+int verify_iovec(struct msghdr *m, struct iovec *iov, char *address, int mode)
+{
+ int size, err, ct;
+
+ if(m->msg_namelen)
+ {
+ if(mode==VERIFY_READ)
+ {
+ err=move_addr_to_kernel(m->msg_name, m->msg_namelen, address);
+ if(err<0)
+ goto out;
+ }
+
+ m->msg_name = address;
+ } else
+ m->msg_name = NULL;
+
+ err = -EFAULT;
+ size = m->msg_iovlen * sizeof(struct iovec);
+ if (copy_from_user(iov, m->msg_iov, size))
+ goto out;
+ m->msg_iov=iov;
+
+ for (err = 0, ct = 0; ct < m->msg_iovlen; ct++) {
+ err += iov[ct].iov_len;
+ /* Goal is not to verify user data, but to prevent returning
+ negative value, which is interpreted as errno.
+ Overflow is still possible, but it is harmless.
+ */
+ if (err < 0)
+ return -EMSGSIZE;
+ }
+out:
+ return err;
+}
+
+/*
+ * Copy kernel to iovec. Returns -EFAULT on error.
+ *
+ * Note: this modifies the original iovec.
+ */
+
+int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len)
+{
+ int err = -EFAULT;
+
+ while(len>0)
+ {
+ if(iov->iov_len)
+ {
+ int copy = min(iov->iov_len, len);
+ if (copy_to_user(iov->iov_base, kdata, copy))
+ goto out;
+ kdata+=copy;
+ len-=copy;
+ iov->iov_len-=copy;
+ iov->iov_base+=copy;
+ }
+ iov++;
+ }
+ err = 0;
+out:
+ return err;
+}
+
+/*
+ * In kernel copy to iovec. Returns -EFAULT on error.
+ *
+ * Note: this modifies the original iovec.
+ */
+
+void memcpy_tokerneliovec(struct iovec *iov, unsigned char *kdata, int len)
+{
+ while(len>0)
+ {
+ if(iov->iov_len)
+ {
+ int copy = min(iov->iov_len, len);
+ memcpy(iov->iov_base, kdata, copy);
+ kdata+=copy;
+ len-=copy;
+ iov->iov_len-=copy;
+ iov->iov_base+=copy;
+ }
+ iov++;
+ }
+}
+
+
+/*
+ * Copy iovec to kernel. Returns -EFAULT on error.
+ *
+ * Note: this modifies the original iovec.
+ */
+
+int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len)
+{
+ int err = -EFAULT;
+
+ while(len>0)
+ {
+ if(iov->iov_len)
+ {
+ int copy = min(len, iov->iov_len);
+ if (copy_from_user(kdata, iov->iov_base, copy))
+ goto out;
+ len-=copy;
+ kdata+=copy;
+ iov->iov_base+=copy;
+ iov->iov_len-=copy;
+ }
+ iov++;
+ }
+ err = 0;
+out:
+ return err;
+}
+
+
+/*
+ * For use with ip_build_xmit
+ */
+
+int memcpy_fromiovecend(unsigned char *kdata, struct iovec *iov, int offset,
+ int len)
+{
+ int err = -EFAULT;
+
+ /* Skip over the finished iovecs */
+ while(offset >= iov->iov_len)
+ {
+ offset -= iov->iov_len;
+ iov++;
+ }
+
+ while (len > 0)
+ {
+ u8 *base = iov->iov_base + offset;
+ int copy = min(len, iov->iov_len - offset);
+
+ offset = 0;
+ if (copy_from_user(kdata, base, copy))
+ goto out;
+ len -= copy;
+ kdata += copy;
+ iov++;
+ }
+ err = 0;
+out:
+ return err;
+}
+
+/*
+ * And now for the all-in-one: copy and checksum from a user iovec
+ * directly to a datagram
+ * Calls to csum_partial but the last must be in 32 bit chunks
+ *
+ * ip_build_xmit must ensure that when fragmenting only the last
+ * call to this function will be unaligned also.
+ */
+
+int csum_partial_copy_fromiovecend(unsigned char *kdata, struct iovec *iov,
+ int offset, unsigned int len, int *csump)
+{
+ int csum = *csump;
+ int partial_cnt = 0, err = 0;
+
+ /* Skip over the finished iovecs */
+ while (offset >= iov->iov_len)
+ {
+ offset -= iov->iov_len;
+ iov++;
+ }
+
+ while (len > 0)
+ {
+ u8 *base = iov->iov_base + offset;
+ unsigned int copy = min(len, iov->iov_len - offset);
+
+ offset = 0;
+ /* There is a remnant from previous iov. */
+ if (partial_cnt)
+ {
+ int par_len = 4 - partial_cnt;
+
+ /* iov component is too short ... */
+ if (par_len > copy) {
+ if (copy_from_user(kdata, base, copy))
+ goto out_fault;
+ kdata += copy;
+ base += copy;
+ partial_cnt += copy;
+ len -= copy;
+ iov++;
+ if (len)
+ continue;
+ *csump = csum_partial(kdata - partial_cnt,
+ partial_cnt, csum);
+ goto out;
+ }
+ if (copy_from_user(kdata, base, par_len))
+ goto out_fault;
+ csum = csum_partial(kdata - partial_cnt, 4, csum);
+ kdata += par_len;
+ base += par_len;
+ copy -= par_len;
+ len -= par_len;
+ partial_cnt = 0;
+ }
+
+ if (len > copy)
+ {
+ partial_cnt = copy % 4;
+ if (partial_cnt)
+ {
+ copy -= partial_cnt;
+ if (copy_from_user(kdata + copy, base + copy,
+ partial_cnt))
+ goto out_fault;
+ }
+ }
+
+ if (copy) {
+ csum = csum_and_copy_from_user(base, kdata, copy,
+ csum, &err);
+ if (err)
+ goto out;
+ }
+ len -= copy + partial_cnt;
+ kdata += copy + partial_cnt;
+ iov++;
+ }
+ *csump = csum;
+out:
+ return err;
+
+out_fault:
+ err = -EFAULT;
+ goto out;
+}
diff --git a/pfinet/linux-src/net/core/neighbour.c b/pfinet/linux-src/net/core/neighbour.c
new file mode 100644
index 00000000..6afbfdcc
--- /dev/null
+++ b/pfinet/linux-src/net/core/neighbour.c
@@ -0,0 +1,1394 @@
+/*
+ * Generic address resolution entity
+ *
+ * Authors:
+ * Pedro Roque <roque@di.fc.ul.pt>
+ * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Fixes:
+ * Vitaly E. Lavrov releasing NULL neighbor in neigh_add.
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/socket.h>
+#include <linux/sched.h>
+#include <linux/netdevice.h>
+#ifdef CONFIG_SYSCTL
+#include <linux/sysctl.h>
+#endif
+#include <net/neighbour.h>
+#include <net/dst.h>
+#include <net/sock.h>
+#include <linux/rtnetlink.h>
+
+/*
+ NOTE. The most unpleasent question is serialization of
+ accesses to resolved addresses. The problem is that addresses
+ are modified by bh, but they are referenced from normal
+ kernel thread. Before today no locking was made.
+ My reasoning was that corrupted address token will be copied
+ to packet with cosmologically small probability
+ (it is even difficult to estimate such small number)
+ and it is very silly to waste cycles in fast path to lock them.
+
+ But now I changed my mind, but not because previous statement
+ is wrong. Actually, neigh->ha MAY BE not opaque byte array,
+ but reference to some private data. In this case even neglibible
+ corruption probability becomes bug.
+
+ - hh cache is protected by rwlock. It assumes that
+ hh cache update procedure is short and fast, and that
+ read_lock is cheaper than start_bh_atomic().
+ - ha tokens, saved in neighbour entries, are protected
+ by bh_atomic().
+ - no protection is made in /proc reading. It is OK, because
+ /proc is broken by design in any case, and
+ corrupted output is normal behaviour there.
+
+ --ANK (981025)
+ */
+
+#define NEIGH_DEBUG 1
+
+#define NEIGH_PRINTK(x...) printk(x)
+#define NEIGH_NOPRINTK(x...) do { ; } while(0)
+#define NEIGH_PRINTK0 NEIGH_PRINTK
+#define NEIGH_PRINTK1 NEIGH_NOPRINTK
+#define NEIGH_PRINTK2 NEIGH_NOPRINTK
+
+#if NEIGH_DEBUG >= 1
+#undef NEIGH_PRINTK1
+#define NEIGH_PRINTK1 NEIGH_PRINTK
+#endif
+#if NEIGH_DEBUG >= 2
+#undef NEIGH_PRINTK2
+#define NEIGH_PRINTK2 NEIGH_PRINTK
+#endif
+
+static void neigh_timer_handler(unsigned long arg);
+#ifdef CONFIG_ARPD
+static void neigh_app_notify(struct neighbour *n);
+#endif
+static int pneigh_ifdown(struct neigh_table *tbl, struct device *dev);
+
+static int neigh_glbl_allocs;
+static struct neigh_table *neigh_tables;
+
+static int neigh_blackhole(struct sk_buff *skb)
+{
+ kfree_skb(skb);
+ return -ENETDOWN;
+}
+
+/*
+ * It is random distribution in the interval (1/2)*base...(3/2)*base.
+ * It corresponds to default IPv6 settings and is not overridable,
+ * because it is really reasonbale choice.
+ */
+
+unsigned long neigh_rand_reach_time(unsigned long base)
+{
+ return (net_random() % base) + (base>>1);
+}
+
+
+static int neigh_forced_gc(struct neigh_table *tbl)
+{
+ int shrunk = 0;
+ int i;
+
+ if (atomic_read(&tbl->lock))
+ return 0;
+
+ for (i=0; i<=NEIGH_HASHMASK; i++) {
+ struct neighbour *n, **np;
+
+ np = &tbl->hash_buckets[i];
+ while ((n = *np) != NULL) {
+ /* Neighbour record may be discarded if:
+ - nobody refers to it.
+ - it is not premanent
+ - (NEW and probably wrong)
+ INCOMPLETE entries are kept at least for
+ n->parms->retrans_time, otherwise we could
+ flood network with resolution requests.
+ It is not clear, what is better table overflow
+ or flooding.
+ */
+ if (atomic_read(&n->refcnt) == 0 &&
+ !(n->nud_state&NUD_PERMANENT) &&
+ (n->nud_state != NUD_INCOMPLETE ||
+ jiffies - n->used > n->parms->retrans_time)) {
+ *np = n->next;
+ n->tbl = NULL;
+ tbl->entries--;
+ shrunk = 1;
+ neigh_destroy(n);
+ continue;
+ }
+ np = &n->next;
+ }
+ }
+
+ tbl->last_flush = jiffies;
+ return shrunk;
+}
+
+int neigh_ifdown(struct neigh_table *tbl, struct device *dev)
+{
+ int i;
+
+ if (atomic_read(&tbl->lock)) {
+ NEIGH_PRINTK1("neigh_ifdown: impossible event 1763\n");
+ return -EBUSY;
+ }
+
+ start_bh_atomic();
+ for (i=0; i<=NEIGH_HASHMASK; i++) {
+ struct neighbour *n, **np;
+
+ np = &tbl->hash_buckets[i];
+ while ((n = *np) != NULL) {
+ if (dev && n->dev != dev) {
+ np = &n->next;
+ continue;
+ }
+ *np = n->next;
+ n->tbl = NULL;
+ tbl->entries--;
+ if (atomic_read(&n->refcnt)) {
+ /* The most unpleasant situation.
+ We must destroy neighbour entry,
+ but someone still uses it.
+
+ The destroy will be delayed until
+ the last user releases us, but
+ we must kill timers etc. and move
+ it to safe state.
+ */
+ if (n->nud_state & NUD_IN_TIMER)
+ del_timer(&n->timer);
+ n->parms = &tbl->parms;
+ skb_queue_purge(&n->arp_queue);
+ n->output = neigh_blackhole;
+ if (n->nud_state&NUD_VALID)
+ n->nud_state = NUD_NOARP;
+ else
+ n->nud_state = NUD_NONE;
+ NEIGH_PRINTK2("neigh %p is stray.\n", n);
+ } else
+ neigh_destroy(n);
+ }
+ }
+
+ del_timer(&tbl->proxy_timer);
+ skb_queue_purge(&tbl->proxy_queue);
+ pneigh_ifdown(tbl, dev);
+ end_bh_atomic();
+ return 0;
+}
+
+static struct neighbour *neigh_alloc(struct neigh_table *tbl, int creat)
+{
+ struct neighbour *n;
+ unsigned long now = jiffies;
+
+ if (tbl->entries > tbl->gc_thresh1) {
+ if (creat < 0)
+ return NULL;
+ if (tbl->entries > tbl->gc_thresh3 ||
+ (tbl->entries > tbl->gc_thresh2 &&
+ now - tbl->last_flush > 5*HZ)) {
+ if (neigh_forced_gc(tbl) == 0 &&
+ tbl->entries > tbl->gc_thresh3)
+ return NULL;
+ }
+ }
+
+ n = kmalloc(tbl->entry_size, GFP_ATOMIC);
+ if (n == NULL)
+ return NULL;
+
+ memset(n, 0, tbl->entry_size);
+
+ skb_queue_head_init(&n->arp_queue);
+ n->updated = n->used = now;
+ n->nud_state = NUD_NONE;
+ n->output = neigh_blackhole;
+ n->parms = &tbl->parms;
+ init_timer(&n->timer);
+ n->timer.function = neigh_timer_handler;
+ n->timer.data = (unsigned long)n;
+ tbl->stats.allocs++;
+ neigh_glbl_allocs++;
+ return n;
+}
+
+
+struct neighbour * __neigh_lookup(struct neigh_table *tbl, const void *pkey,
+ struct device *dev, int creat)
+{
+ struct neighbour *n;
+ u32 hash_val;
+ int key_len = tbl->key_len;
+
+ hash_val = *(u32*)(pkey + key_len - 4);
+ hash_val ^= (hash_val>>16);
+ hash_val ^= hash_val>>8;
+ hash_val ^= hash_val>>3;
+ hash_val = (hash_val^dev->ifindex)&NEIGH_HASHMASK;
+
+ for (n = tbl->hash_buckets[hash_val]; n; n = n->next) {
+ if (dev == n->dev &&
+ memcmp(n->primary_key, pkey, key_len) == 0) {
+ atomic_inc(&n->refcnt);
+ return n;
+ }
+ }
+ if (!creat)
+ return NULL;
+
+ n = neigh_alloc(tbl, creat);
+ if (n == NULL)
+ return NULL;
+
+ memcpy(n->primary_key, pkey, key_len);
+ n->dev = dev;
+
+ /* Protocol specific setup. */
+ if (tbl->constructor && tbl->constructor(n) < 0) {
+ neigh_destroy(n);
+ return NULL;
+ }
+
+ /* Device specific setup. */
+ if (n->parms && n->parms->neigh_setup && n->parms->neigh_setup(n) < 0) {
+ neigh_destroy(n);
+ return NULL;
+ }
+
+ n->confirmed = jiffies - (n->parms->base_reachable_time<<1);
+ atomic_set(&n->refcnt, 1);
+ tbl->entries++;
+ n->next = tbl->hash_buckets[hash_val];
+ tbl->hash_buckets[hash_val] = n;
+ n->tbl = tbl;
+ NEIGH_PRINTK2("neigh %p is created.\n", n);
+ return n;
+}
+
+struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey,
+ struct device *dev, int creat)
+{
+ struct pneigh_entry *n;
+ u32 hash_val;
+ int key_len = tbl->key_len;
+
+ hash_val = *(u32*)(pkey + key_len - 4);
+ hash_val ^= (hash_val>>16);
+ hash_val ^= hash_val>>8;
+ hash_val ^= hash_val>>4;
+ hash_val &= PNEIGH_HASHMASK;
+
+ for (n = tbl->phash_buckets[hash_val]; n; n = n->next) {
+ if (memcmp(n->key, pkey, key_len) == 0 &&
+ (n->dev == dev || !n->dev))
+ return n;
+ }
+ if (!creat)
+ return NULL;
+
+ n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
+ if (n == NULL)
+ return NULL;
+
+ memcpy(n->key, pkey, key_len);
+ n->dev = dev;
+
+ if (tbl->pconstructor && tbl->pconstructor(n)) {
+ kfree(n);
+ return NULL;
+ }
+
+ n->next = tbl->phash_buckets[hash_val];
+ tbl->phash_buckets[hash_val] = n;
+ return n;
+}
+
+
+int pneigh_delete(struct neigh_table *tbl, const void *pkey, struct device *dev)
+{
+ struct pneigh_entry *n, **np;
+ u32 hash_val;
+ int key_len = tbl->key_len;
+
+ hash_val = *(u32*)(pkey + key_len - 4);
+ hash_val ^= (hash_val>>16);
+ hash_val ^= hash_val>>8;
+ hash_val ^= hash_val>>4;
+ hash_val &= PNEIGH_HASHMASK;
+
+ for (np = &tbl->phash_buckets[hash_val]; (n=*np) != NULL; np = &n->next) {
+ if (memcmp(n->key, pkey, key_len) == 0 && n->dev == dev) {
+ *np = n->next;
+ synchronize_bh();
+ if (tbl->pdestructor)
+ tbl->pdestructor(n);
+ kfree(n);
+ return 0;
+ }
+ }
+ return -ENOENT;
+}
+
+static int pneigh_ifdown(struct neigh_table *tbl, struct device *dev)
+{
+ struct pneigh_entry *n, **np;
+ u32 h;
+
+ for (h=0; h<=PNEIGH_HASHMASK; h++) {
+ np = &tbl->phash_buckets[h];
+ while ((n=*np) != NULL) {
+ if (n->dev == dev || dev == NULL) {
+ *np = n->next;
+ synchronize_bh();
+ if (tbl->pdestructor)
+ tbl->pdestructor(n);
+ kfree(n);
+ continue;
+ }
+ np = &n->next;
+ }
+ }
+ return -ENOENT;
+}
+
+
+/*
+ * neighbour must already be out of the table;
+ *
+ */
+void neigh_destroy(struct neighbour *neigh)
+{
+ struct hh_cache *hh;
+
+ if (neigh->tbl || atomic_read(&neigh->refcnt)) {
+ NEIGH_PRINTK1("neigh_destroy: neighbour is use tbl=%p, ref=%d: "
+ "called from %p\n", neigh->tbl, atomic_read(&neigh->refcnt), __builtin_return_address(0));
+ return;
+ }
+
+ if (neigh->nud_state&NUD_IN_TIMER)
+ del_timer(&neigh->timer);
+
+ while ((hh = neigh->hh) != NULL) {
+ neigh->hh = hh->hh_next;
+ hh->hh_next = NULL;
+ hh->hh_output = neigh_blackhole;
+ if (atomic_dec_and_test(&hh->hh_refcnt))
+ kfree(hh);
+ }
+
+ if (neigh->ops && neigh->ops->destructor)
+ (neigh->ops->destructor)(neigh);
+
+ skb_queue_purge(&neigh->arp_queue);
+
+ NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
+
+ neigh_glbl_allocs--;
+ kfree(neigh);
+}
+
+/* Neighbour state is suspicious;
+ disable fast path.
+ */
+static void neigh_suspect(struct neighbour *neigh)
+{
+ struct hh_cache *hh;
+
+ NEIGH_PRINTK2("neigh %p is suspecteded.\n", neigh);
+
+ neigh->output = neigh->ops->output;
+
+ for (hh = neigh->hh; hh; hh = hh->hh_next)
+ hh->hh_output = neigh->ops->output;
+}
+
+/* Neighbour state is OK;
+ enable fast path.
+ */
+static void neigh_connect(struct neighbour *neigh)
+{
+ struct hh_cache *hh;
+
+ NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
+
+ neigh->output = neigh->ops->connected_output;
+
+ for (hh = neigh->hh; hh; hh = hh->hh_next)
+ hh->hh_output = neigh->ops->hh_output;
+}
+
+/*
+ Transitions NUD_STALE <-> NUD_REACHABLE do not occur
+ when fast path is built: we have no timers assotiated with
+ these states, we do not have time to check state when sending.
+ neigh_periodic_timer check periodically neigh->confirmed
+ time and moves NUD_REACHABLE -> NUD_STALE.
+
+ If a routine wants to know TRUE entry state, it calls
+ neigh_sync before checking state.
+ */
+
+static void neigh_sync(struct neighbour *n)
+{
+ unsigned long now = jiffies;
+ u8 state = n->nud_state;
+
+ if (state&(NUD_NOARP|NUD_PERMANENT))
+ return;
+ if (state&NUD_REACHABLE) {
+ if (now - n->confirmed > n->parms->reachable_time) {
+ n->nud_state = NUD_STALE;
+ neigh_suspect(n);
+ }
+ } else if (state&NUD_VALID) {
+ if (now - n->confirmed < n->parms->reachable_time) {
+ if (state&NUD_IN_TIMER)
+ del_timer(&n->timer);
+ n->nud_state = NUD_REACHABLE;
+ neigh_connect(n);
+ }
+ }
+}
+
+static void neigh_periodic_timer(unsigned long arg)
+{
+ struct neigh_table *tbl = (struct neigh_table*)arg;
+ unsigned long now = jiffies;
+ int i;
+
+ if (atomic_read(&tbl->lock)) {
+ tbl->gc_timer.expires = now + 1*HZ;
+ add_timer(&tbl->gc_timer);
+ return;
+ }
+
+ /*
+ * periodicly recompute ReachableTime from random function
+ */
+
+ if (now - tbl->last_rand > 300*HZ) {
+ struct neigh_parms *p;
+ tbl->last_rand = now;
+ for (p=&tbl->parms; p; p = p->next)
+ p->reachable_time = neigh_rand_reach_time(p->base_reachable_time);
+ }
+
+ for (i=0; i <= NEIGH_HASHMASK; i++) {
+ struct neighbour *n, **np;
+
+ np = &tbl->hash_buckets[i];
+ while ((n = *np) != NULL) {
+ unsigned state = n->nud_state;
+
+ if (state&(NUD_PERMANENT|NUD_IN_TIMER))
+ goto next_elt;
+
+ if ((long)(n->used - n->confirmed) < 0)
+ n->used = n->confirmed;
+
+ if (atomic_read(&n->refcnt) == 0 &&
+ (state == NUD_FAILED || now - n->used > n->parms->gc_staletime)) {
+ *np = n->next;
+ n->tbl = NULL;
+ n->next = NULL;
+ tbl->entries--;
+ neigh_destroy(n);
+ continue;
+ }
+
+ if (n->nud_state&NUD_REACHABLE &&
+ now - n->confirmed > n->parms->reachable_time) {
+ n->nud_state = NUD_STALE;
+ neigh_suspect(n);
+ }
+
+next_elt:
+ np = &n->next;
+ }
+ }
+
+ tbl->gc_timer.expires = now + tbl->gc_interval;
+ add_timer(&tbl->gc_timer);
+}
+
+static __inline__ int neigh_max_probes(struct neighbour *n)
+{
+ struct neigh_parms *p = n->parms;
+ return p->ucast_probes + p->app_probes + p->mcast_probes;
+}
+
+
+/* Called when a timer expires for a neighbour entry. */
+
+static void neigh_timer_handler(unsigned long arg)
+{
+ unsigned long now = jiffies;
+ struct neighbour *neigh = (struct neighbour*)arg;
+ unsigned state = neigh->nud_state;
+
+ if (!(state&NUD_IN_TIMER)) {
+ NEIGH_PRINTK1("neigh: timer & !nud_in_timer\n");
+ return;
+ }
+
+ if ((state&NUD_VALID) &&
+ now - neigh->confirmed < neigh->parms->reachable_time) {
+ neigh->nud_state = NUD_REACHABLE;
+ NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
+ neigh_connect(neigh);
+ return;
+ }
+ if (state == NUD_DELAY) {
+ NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
+ neigh->nud_state = NUD_PROBE;
+ neigh->probes = 0;
+ }
+
+ if (neigh->probes >= neigh_max_probes(neigh)) {
+ struct sk_buff *skb;
+
+ neigh->nud_state = NUD_FAILED;
+ neigh->tbl->stats.res_failed++;
+ NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
+
+ /* It is very thin place. report_unreachable is very complicated
+ routine. Particularly, it can hit the same neighbour entry!
+
+ So that, we try to be accurate and avoid dead loop. --ANK
+ */
+ while(neigh->nud_state==NUD_FAILED && (skb=__skb_dequeue(&neigh->arp_queue)) != NULL)
+ neigh->ops->error_report(neigh, skb);
+ skb_queue_purge(&neigh->arp_queue);
+ return;
+ }
+
+ neigh->timer.expires = now + neigh->parms->retrans_time;
+ add_timer(&neigh->timer);
+
+ neigh->ops->solicit(neigh, skb_peek(&neigh->arp_queue));
+ neigh->probes++;
+}
+
+int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
+{
+ start_bh_atomic();
+ if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE))) {
+ if (!(neigh->nud_state&(NUD_STALE|NUD_INCOMPLETE))) {
+ if (neigh->tbl == NULL) {
+ NEIGH_PRINTK2("neigh %p used after death.\n", neigh);
+ if (skb)
+ kfree_skb(skb);
+ end_bh_atomic();
+ return 1;
+ }
+ if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
+ neigh->probes = neigh->parms->ucast_probes;
+ neigh->nud_state = NUD_INCOMPLETE;
+ neigh->timer.expires = jiffies + neigh->parms->retrans_time;
+ add_timer(&neigh->timer);
+
+ neigh->ops->solicit(neigh, skb);
+ neigh->probes++;
+ } else {
+ neigh->nud_state = NUD_FAILED;
+ if (skb)
+ kfree_skb(skb);
+ end_bh_atomic();
+ return 1;
+ }
+ }
+ if (neigh->nud_state == NUD_INCOMPLETE) {
+ if (skb) {
+ if (skb_queue_len(&neigh->arp_queue) >= neigh->parms->queue_len) {
+ struct sk_buff *buff;
+ buff = neigh->arp_queue.prev;
+ __skb_unlink(buff, &neigh->arp_queue);
+ kfree_skb(buff);
+ }
+ __skb_queue_head(&neigh->arp_queue, skb);
+ }
+ end_bh_atomic();
+ return 1;
+ }
+ if (neigh->nud_state == NUD_STALE) {
+ NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
+ neigh->nud_state = NUD_DELAY;
+ neigh->timer.expires = jiffies + neigh->parms->delay_probe_time;
+ add_timer(&neigh->timer);
+ }
+ }
+ end_bh_atomic();
+ return 0;
+}
+
+static __inline__ void neigh_update_hhs(struct neighbour *neigh)
+{
+ struct hh_cache *hh;
+ void (*update)(struct hh_cache*, struct device*, unsigned char*) =
+ neigh->dev->header_cache_update;
+
+ if (update) {
+ for (hh=neigh->hh; hh; hh=hh->hh_next) {
+ write_lock_irq(&hh->hh_lock);
+ update(hh, neigh->dev, neigh->ha);
+ write_unlock_irq(&hh->hh_lock);
+ }
+ }
+}
+
+
+
+/* Generic update routine.
+ -- lladdr is new lladdr or NULL, if it is not supplied.
+ -- new is new state.
+ -- override==1 allows to override existing lladdr, if it is different.
+ -- arp==0 means that the change is administrative.
+ */
+
+int neigh_update(struct neighbour *neigh, u8 *lladdr, u8 new, int override, int arp)
+{
+ u8 old = neigh->nud_state;
+ struct device *dev = neigh->dev;
+
+ if (arp && (old&(NUD_NOARP|NUD_PERMANENT)))
+ return -EPERM;
+
+ if (!(new&NUD_VALID)) {
+ if (old&NUD_IN_TIMER)
+ del_timer(&neigh->timer);
+ if (old&NUD_CONNECTED)
+ neigh_suspect(neigh);
+ neigh->nud_state = new;
+ return 0;
+ }
+
+ /* Compare new lladdr with cached one */
+ if (dev->addr_len == 0) {
+ /* First case: device needs no address. */
+ lladdr = neigh->ha;
+ } else if (lladdr) {
+ /* The second case: if something is already cached
+ and a new address is proposed:
+ - compare new & old
+ - if they are different, check override flag
+ */
+ if (old&NUD_VALID) {
+ if (memcmp(lladdr, neigh->ha, dev->addr_len) == 0)
+ lladdr = neigh->ha;
+ else if (!override)
+ return -EPERM;
+ }
+ } else {
+ /* No address is supplied; if we know something,
+ use it, otherwise discard the request.
+ */
+ if (!(old&NUD_VALID))
+ return -EINVAL;
+ lladdr = neigh->ha;
+ }
+
+ neigh_sync(neigh);
+ old = neigh->nud_state;
+ if (new&NUD_CONNECTED)
+ neigh->confirmed = jiffies;
+ neigh->updated = jiffies;
+
+ /* If entry was valid and address is not changed,
+ do not change entry state, if new one is STALE.
+ */
+ if (old&NUD_VALID) {
+ if (lladdr == neigh->ha)
+ if (new == old || (new == NUD_STALE && (old&NUD_CONNECTED)))
+ return 0;
+ }
+ if (old&NUD_IN_TIMER)
+ del_timer(&neigh->timer);
+ neigh->nud_state = new;
+ if (lladdr != neigh->ha) {
+ memcpy(&neigh->ha, lladdr, dev->addr_len);
+ neigh_update_hhs(neigh);
+ neigh->confirmed = jiffies - (neigh->parms->base_reachable_time<<1);
+#ifdef CONFIG_ARPD
+ if (neigh->parms->app_probes)
+ neigh_app_notify(neigh);
+#endif
+ }
+ if (new == old)
+ return 0;
+ if (new&NUD_CONNECTED)
+ neigh_connect(neigh);
+ else
+ neigh_suspect(neigh);
+ if (!(old&NUD_VALID)) {
+ struct sk_buff *skb;
+
+ /* Again: avoid dead loop if something went wrong */
+
+ while (neigh->nud_state&NUD_VALID &&
+ (skb=__skb_dequeue(&neigh->arp_queue)) != NULL) {
+ struct neighbour *n1 = neigh;
+ /* On shaper/eql skb->dst->neighbour != neigh :( */
+ if (skb->dst && skb->dst->neighbour)
+ n1 = skb->dst->neighbour;
+ n1->output(skb);
+ }
+ skb_queue_purge(&neigh->arp_queue);
+ }
+ return 0;
+}
+
+struct neighbour * neigh_event_ns(struct neigh_table *tbl,
+ u8 *lladdr, void *saddr,
+ struct device *dev)
+{
+ struct neighbour *neigh;
+
+ neigh = __neigh_lookup(tbl, saddr, dev, lladdr || !dev->addr_len);
+ if (neigh)
+ neigh_update(neigh, lladdr, NUD_STALE, 1, 1);
+ return neigh;
+}
+
+static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst, u16 protocol)
+{
+ struct hh_cache *hh = NULL;
+ struct device *dev = dst->dev;
+
+ for (hh=n->hh; hh; hh = hh->hh_next)
+ if (hh->hh_type == protocol)
+ break;
+
+ if (!hh && (hh = kmalloc(sizeof(*hh), GFP_ATOMIC)) != NULL) {
+ memset(hh, 0, sizeof(struct hh_cache));
+ hh->hh_type = protocol;
+ atomic_set(&hh->hh_refcnt, 0);
+ hh->hh_next = NULL;
+ if (dev->hard_header_cache(n, hh)) {
+ kfree(hh);
+ hh = NULL;
+ } else {
+ atomic_inc(&hh->hh_refcnt);
+ hh->hh_next = n->hh;
+ n->hh = hh;
+ if (n->nud_state&NUD_CONNECTED)
+ hh->hh_output = n->ops->hh_output;
+ else
+ hh->hh_output = n->ops->output;
+ }
+ }
+ if (hh) {
+ atomic_inc(&hh->hh_refcnt);
+ dst->hh = hh;
+ }
+}
+
+/* This function can be used in contexts, where only old dev_queue_xmit
+ worked, f.e. if you want to override normal output path (eql, shaper),
+ but resoltution is not made yet.
+ */
+
+int neigh_compat_output(struct sk_buff *skb)
+{
+ struct device *dev = skb->dev;
+
+ __skb_pull(skb, skb->nh.raw - skb->data);
+
+ if (dev->hard_header &&
+ dev->hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL, skb->len) < 0 &&
+ dev->rebuild_header(skb))
+ return 0;
+
+ return dev_queue_xmit(skb);
+}
+
+/* Slow and careful. */
+
+int neigh_resolve_output(struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb->dst;
+ struct neighbour *neigh;
+
+ if (!dst || !(neigh = dst->neighbour))
+ goto discard;
+
+ __skb_pull(skb, skb->nh.raw - skb->data);
+
+ if (neigh_event_send(neigh, skb) == 0) {
+ int err;
+ struct device *dev = neigh->dev;
+ if (dev->hard_header_cache && dst->hh == NULL) {
+ start_bh_atomic();
+ if (dst->hh == NULL)
+ neigh_hh_init(neigh, dst, dst->ops->protocol);
+ err = dev->hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, NULL, skb->len);
+ end_bh_atomic();
+ } else {
+ start_bh_atomic();
+ err = dev->hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, NULL, skb->len);
+ end_bh_atomic();
+ }
+ if (err >= 0)
+ return neigh->ops->queue_xmit(skb);
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+ return 0;
+
+discard:
+ NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n", dst, dst ? dst->neighbour : NULL);
+ kfree_skb(skb);
+ return -EINVAL;
+}
+
+/* As fast as possible without hh cache */
+
+int neigh_connected_output(struct sk_buff *skb)
+{
+ int err;
+ struct dst_entry *dst = skb->dst;
+ struct neighbour *neigh = dst->neighbour;
+ struct device *dev = neigh->dev;
+
+ __skb_pull(skb, skb->nh.raw - skb->data);
+
+ start_bh_atomic();
+ err = dev->hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, NULL, skb->len);
+ end_bh_atomic();
+ if (err >= 0)
+ return neigh->ops->queue_xmit(skb);
+ kfree_skb(skb);
+ return -EINVAL;
+}
+
+static void neigh_proxy_process(unsigned long arg)
+{
+ struct neigh_table *tbl = (struct neigh_table *)arg;
+ long sched_next = 0;
+ unsigned long now = jiffies;
+ struct sk_buff *skb = tbl->proxy_queue.next;
+
+ while (skb != (struct sk_buff*)&tbl->proxy_queue) {
+ struct sk_buff *back = skb;
+ long tdif = back->stamp.tv_usec - now;
+
+ skb = skb->next;
+ if (tdif <= 0) {
+ __skb_unlink(back, &tbl->proxy_queue);
+ if (tbl->proxy_redo)
+ tbl->proxy_redo(back);
+ else
+ kfree_skb(back);
+ } else if (!sched_next || tdif < sched_next)
+ sched_next = tdif;
+ }
+ del_timer(&tbl->proxy_timer);
+ if (sched_next) {
+ tbl->proxy_timer.expires = jiffies + sched_next;
+ add_timer(&tbl->proxy_timer);
+ }
+}
+
+void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
+ struct sk_buff *skb)
+{
+ unsigned long now = jiffies;
+ long sched_next = net_random()%p->proxy_delay;
+
+ if (tbl->proxy_queue.qlen > p->proxy_qlen) {
+ kfree_skb(skb);
+ return;
+ }
+ skb->stamp.tv_sec = 0;
+ skb->stamp.tv_usec = now + sched_next;
+ if (del_timer(&tbl->proxy_timer)) {
+ long tval = tbl->proxy_timer.expires - now;
+ if (tval < sched_next)
+ sched_next = tval;
+ }
+ tbl->proxy_timer.expires = now + sched_next;
+ dst_release(skb->dst);
+ skb->dst = NULL;
+ __skb_queue_tail(&tbl->proxy_queue, skb);
+ add_timer(&tbl->proxy_timer);
+}
+
+
+struct neigh_parms *neigh_parms_alloc(struct device *dev, struct neigh_table *tbl)
+{
+ struct neigh_parms *p;
+ p = kmalloc(sizeof(*p), GFP_KERNEL);
+ if (p) {
+ memcpy(p, &tbl->parms, sizeof(*p));
+ p->tbl = tbl;
+ p->reachable_time = neigh_rand_reach_time(p->base_reachable_time);
+ if (dev && dev->neigh_setup) {
+ if (dev->neigh_setup(dev, p)) {
+ kfree(p);
+ return NULL;
+ }
+ }
+ p->next = tbl->parms.next;
+ tbl->parms.next = p;
+ }
+ return p;
+}
+
+void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
+{
+ struct neigh_parms **p;
+
+ if (parms == NULL || parms == &tbl->parms)
+ return;
+ for (p = &tbl->parms.next; *p; p = &(*p)->next) {
+ if (*p == parms) {
+ *p = parms->next;
+ synchronize_bh();
+#ifdef CONFIG_SYSCTL
+ neigh_sysctl_unregister(parms);
+#endif
+ kfree(parms);
+ return;
+ }
+ }
+ NEIGH_PRINTK1("neigh_release_parms: not found\n");
+}
+
+
+void neigh_table_init(struct neigh_table *tbl)
+{
+ unsigned long now = jiffies;
+
+ tbl->parms.reachable_time = neigh_rand_reach_time(tbl->parms.base_reachable_time);
+
+ init_timer(&tbl->gc_timer);
+ tbl->gc_timer.data = (unsigned long)tbl;
+ tbl->gc_timer.function = neigh_periodic_timer;
+ tbl->gc_timer.expires = now + tbl->gc_interval + tbl->parms.reachable_time;
+ add_timer(&tbl->gc_timer);
+
+ init_timer(&tbl->proxy_timer);
+ tbl->proxy_timer.data = (unsigned long)tbl;
+ tbl->proxy_timer.function = neigh_proxy_process;
+ skb_queue_head_init(&tbl->proxy_queue);
+
+ tbl->last_flush = now;
+ tbl->last_rand = now + tbl->parms.reachable_time*20;
+ tbl->next = neigh_tables;
+ neigh_tables = tbl;
+}
+
+int neigh_table_clear(struct neigh_table *tbl)
+{
+ struct neigh_table **tp;
+
+ start_bh_atomic();
+ del_timer(&tbl->gc_timer);
+ del_timer(&tbl->proxy_timer);
+ skb_queue_purge(&tbl->proxy_queue);
+ neigh_ifdown(tbl, NULL);
+ end_bh_atomic();
+ if (tbl->entries)
+ printk(KERN_CRIT "neighbour leakage\n");
+ for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
+ if (*tp == tbl) {
+ *tp = tbl->next;
+ synchronize_bh();
+ break;
+ }
+ }
+#ifdef CONFIG_SYSCTL
+ neigh_sysctl_unregister(&tbl->parms);
+#endif
+ return 0;
+}
+
+#ifdef CONFIG_RTNETLINK
+
+
+int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+ struct ndmsg *ndm = NLMSG_DATA(nlh);
+ struct rtattr **nda = arg;
+ struct neigh_table *tbl;
+ struct device *dev = NULL;
+
+ if (ndm->ndm_ifindex) {
+ if ((dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL)
+ return -ENODEV;
+ }
+
+ for (tbl=neigh_tables; tbl; tbl = tbl->next) {
+ int err = 0;
+ struct neighbour *n;
+
+ if (tbl->family != ndm->ndm_family)
+ continue;
+
+ if (nda[NDA_DST-1] == NULL ||
+ nda[NDA_DST-1]->rta_len != RTA_LENGTH(tbl->key_len))
+ return -EINVAL;
+
+ if (ndm->ndm_flags&NTF_PROXY)
+ return pneigh_delete(tbl, RTA_DATA(nda[NDA_DST-1]), dev);
+
+ if (dev == NULL)
+ return -EINVAL;
+
+ start_bh_atomic();
+ n = __neigh_lookup(tbl, RTA_DATA(nda[NDA_DST-1]), dev, 0);
+ if (n) {
+ err = neigh_update(n, NULL, NUD_FAILED, 1, 0);
+ neigh_release(n);
+ }
+ end_bh_atomic();
+ return err;
+ }
+
+ return -EADDRNOTAVAIL;
+}
+
+int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+ struct ndmsg *ndm = NLMSG_DATA(nlh);
+ struct rtattr **nda = arg;
+ struct neigh_table *tbl;
+ struct device *dev = NULL;
+
+ if (ndm->ndm_ifindex) {
+ if ((dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL)
+ return -ENODEV;
+ }
+
+ for (tbl=neigh_tables; tbl; tbl = tbl->next) {
+ int err = 0;
+ struct neighbour *n;
+
+ if (tbl->family != ndm->ndm_family)
+ continue;
+ if (nda[NDA_DST-1] == NULL ||
+ nda[NDA_DST-1]->rta_len != RTA_LENGTH(tbl->key_len))
+ return -EINVAL;
+ if (ndm->ndm_flags&NTF_PROXY) {
+ if (pneigh_lookup(tbl, RTA_DATA(nda[NDA_DST-1]), dev, 1))
+ return 0;
+ return -ENOBUFS;
+ }
+ if (dev == NULL)
+ return -EINVAL;
+ if (nda[NDA_LLADDR-1] != NULL &&
+ nda[NDA_LLADDR-1]->rta_len != RTA_LENGTH(dev->addr_len))
+ return -EINVAL;
+ start_bh_atomic();
+ n = __neigh_lookup(tbl, RTA_DATA(nda[NDA_DST-1]), dev, 0);
+ if (n) {
+ if (nlh->nlmsg_flags&NLM_F_EXCL)
+ err = -EEXIST;
+ } else if (!(nlh->nlmsg_flags&NLM_F_CREATE))
+ err = -ENOENT;
+ else {
+ n = __neigh_lookup(tbl, RTA_DATA(nda[NDA_DST-1]), dev, 1);
+ if (n == NULL)
+ err = -ENOBUFS;
+ }
+ if (err == 0) {
+ err = neigh_update(n, nda[NDA_LLADDR-1] ? RTA_DATA(nda[NDA_LLADDR-1]) : NULL,
+ ndm->ndm_state,
+ nlh->nlmsg_flags&NLM_F_REPLACE, 0);
+ }
+ if (n)
+ neigh_release(n);
+ end_bh_atomic();
+ return err;
+ }
+
+ return -EADDRNOTAVAIL;
+}
+
+
+static int neigh_fill_info(struct sk_buff *skb, struct neighbour *n,
+ u32 pid, u32 seq, int event)
+{
+ unsigned long now = jiffies;
+ struct ndmsg *ndm;
+ struct nlmsghdr *nlh;
+ unsigned char *b = skb->tail;
+ struct nda_cacheinfo ci;
+
+ nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ndm));
+ ndm = NLMSG_DATA(nlh);
+ ndm->ndm_family = n->ops->family;
+ ndm->ndm_flags = n->flags;
+ ndm->ndm_type = n->type;
+ ndm->ndm_state = n->nud_state;
+ ndm->ndm_ifindex = n->dev->ifindex;
+ RTA_PUT(skb, NDA_DST, n->tbl->key_len, n->primary_key);
+ if (n->nud_state&NUD_VALID)
+ RTA_PUT(skb, NDA_LLADDR, n->dev->addr_len, n->ha);
+ ci.ndm_used = now - n->used;
+ ci.ndm_confirmed = now - n->confirmed;
+ ci.ndm_updated = now - n->updated;
+ ci.ndm_refcnt = atomic_read(&n->refcnt);
+ RTA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
+ nlh->nlmsg_len = skb->tail - b;
+ return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+ skb_trim(skb, b - skb->data);
+ return -1;
+}
+
+
+static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct neighbour *n;
+ int h, s_h;
+ int idx, s_idx;
+
+ s_h = cb->args[1];
+ s_idx = idx = cb->args[2];
+ for (h=0; h <= NEIGH_HASHMASK; h++) {
+ if (h < s_h) continue;
+ if (h > s_h)
+ s_idx = 0;
+ start_bh_atomic();
+ for (n = tbl->hash_buckets[h], idx = 0; n;
+ n = n->next, idx++) {
+ if (idx < s_idx)
+ continue;
+ if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq, RTM_NEWNEIGH) <= 0) {
+ end_bh_atomic();
+ cb->args[1] = h;
+ cb->args[2] = idx;
+ return -1;
+ }
+ }
+ end_bh_atomic();
+ }
+
+ cb->args[1] = h;
+ cb->args[2] = idx;
+ return skb->len;
+}
+
+int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ int t;
+ int s_t;
+ struct neigh_table *tbl;
+ int family = ((struct rtgenmsg*)NLMSG_DATA(cb->nlh))->rtgen_family;
+
+ s_t = cb->args[0];
+
+ for (tbl=neigh_tables, t=0; tbl; tbl = tbl->next, t++) {
+ if (t < s_t) continue;
+ if (family && tbl->family != family)
+ continue;
+ if (t > s_t)
+ memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
+ if (neigh_dump_table(tbl, skb, cb) < 0)
+ break;
+ }
+
+ cb->args[0] = t;
+
+ return skb->len;
+}
+
+#ifdef CONFIG_ARPD
+void neigh_app_ns(struct neighbour *n)
+{
+ struct sk_buff *skb;
+ struct nlmsghdr *nlh;
+ int size = NLMSG_SPACE(sizeof(struct ndmsg)+256);
+
+ skb = alloc_skb(size, GFP_ATOMIC);
+ if (!skb)
+ return;
+
+ if (neigh_fill_info(skb, n, 0, 0, RTM_GETNEIGH) < 0) {
+ kfree_skb(skb);
+ return;
+ }
+ nlh = (struct nlmsghdr*)skb->data;
+ nlh->nlmsg_flags = NLM_F_REQUEST;
+ NETLINK_CB(skb).dst_groups = RTMGRP_NEIGH;
+ netlink_broadcast(rtnl, skb, 0, RTMGRP_NEIGH, GFP_ATOMIC);
+}
+
+static void neigh_app_notify(struct neighbour *n)
+{
+ struct sk_buff *skb;
+ struct nlmsghdr *nlh;
+ int size = NLMSG_SPACE(sizeof(struct ndmsg)+256);
+
+ skb = alloc_skb(size, GFP_ATOMIC);
+ if (!skb)
+ return;
+
+ if (neigh_fill_info(skb, n, 0, 0, RTM_NEWNEIGH) < 0) {
+ kfree_skb(skb);
+ return;
+ }
+ nlh = (struct nlmsghdr*)skb->data;
+ NETLINK_CB(skb).dst_groups = RTMGRP_NEIGH;
+ netlink_broadcast(rtnl, skb, 0, RTMGRP_NEIGH, GFP_ATOMIC);
+}
+
+
+
+#endif
+
+
+#endif
+
+#ifdef CONFIG_SYSCTL
+
+struct neigh_sysctl_table
+{
+ struct ctl_table_header *sysctl_header;
+ ctl_table neigh_vars[17];
+ ctl_table neigh_dev[2];
+ ctl_table neigh_neigh_dir[2];
+ ctl_table neigh_proto_dir[2];
+ ctl_table neigh_root_dir[2];
+} neigh_sysctl_template = {
+ NULL,
+ {{NET_NEIGH_MCAST_SOLICIT, "mcast_solicit",
+ NULL, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_NEIGH_UCAST_SOLICIT, "ucast_solicit",
+ NULL, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_NEIGH_APP_SOLICIT, "app_solicit",
+ NULL, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_NEIGH_RETRANS_TIME, "retrans_time",
+ NULL, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_NEIGH_REACHABLE_TIME, "base_reachable_time",
+ NULL, sizeof(int), 0644, NULL,
+ &proc_dointvec_jiffies},
+ {NET_NEIGH_DELAY_PROBE_TIME, "delay_first_probe_time",
+ NULL, sizeof(int), 0644, NULL,
+ &proc_dointvec_jiffies},
+ {NET_NEIGH_GC_STALE_TIME, "gc_stale_time",
+ NULL, sizeof(int), 0644, NULL,
+ &proc_dointvec_jiffies},
+ {NET_NEIGH_UNRES_QLEN, "unres_qlen",
+ NULL, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_NEIGH_PROXY_QLEN, "proxy_qlen",
+ NULL, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_NEIGH_ANYCAST_DELAY, "anycast_delay",
+ NULL, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_NEIGH_PROXY_DELAY, "proxy_delay",
+ NULL, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_NEIGH_LOCKTIME, "locktime",
+ NULL, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_NEIGH_GC_INTERVAL, "gc_interval",
+ NULL, sizeof(int), 0644, NULL,
+ &proc_dointvec_jiffies},
+ {NET_NEIGH_GC_THRESH1, "gc_thresh1",
+ NULL, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_NEIGH_GC_THRESH2, "gc_thresh2",
+ NULL, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_NEIGH_GC_THRESH3, "gc_thresh3",
+ NULL, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {0}},
+
+ {{NET_PROTO_CONF_DEFAULT, "default", NULL, 0, 0555, NULL},{0}},
+ {{0, "neigh", NULL, 0, 0555, NULL},{0}},
+ {{0, NULL, NULL, 0, 0555, NULL},{0}},
+ {{CTL_NET, "net", NULL, 0, 0555, NULL},{0}}
+};
+
+int neigh_sysctl_register(struct device *dev, struct neigh_parms *p,
+ int p_id, int pdev_id, char *p_name)
+{
+ struct neigh_sysctl_table *t;
+
+ t = kmalloc(sizeof(*t), GFP_KERNEL);
+ if (t == NULL)
+ return -ENOBUFS;
+ memcpy(t, &neigh_sysctl_template, sizeof(*t));
+ t->neigh_vars[0].data = &p->mcast_probes;
+ t->neigh_vars[1].data = &p->ucast_probes;
+ t->neigh_vars[2].data = &p->app_probes;
+ t->neigh_vars[3].data = &p->retrans_time;
+ t->neigh_vars[4].data = &p->base_reachable_time;
+ t->neigh_vars[5].data = &p->delay_probe_time;
+ t->neigh_vars[6].data = &p->gc_staletime;
+ t->neigh_vars[7].data = &p->queue_len;
+ t->neigh_vars[8].data = &p->proxy_qlen;
+ t->neigh_vars[9].data = &p->anycast_delay;
+ t->neigh_vars[10].data = &p->proxy_delay;
+ t->neigh_vars[11].data = &p->locktime;
+ if (dev) {
+ t->neigh_dev[0].procname = dev->name;
+ t->neigh_dev[0].ctl_name = dev->ifindex;
+ memset(&t->neigh_vars[12], 0, sizeof(ctl_table));
+ } else {
+ t->neigh_vars[12].data = (int*)(p+1);
+ t->neigh_vars[13].data = (int*)(p+1) + 1;
+ t->neigh_vars[14].data = (int*)(p+1) + 2;
+ t->neigh_vars[15].data = (int*)(p+1) + 3;
+ }
+ t->neigh_neigh_dir[0].ctl_name = pdev_id;
+
+ t->neigh_proto_dir[0].procname = p_name;
+ t->neigh_proto_dir[0].ctl_name = p_id;
+
+ t->neigh_dev[0].child = t->neigh_vars;
+ t->neigh_neigh_dir[0].child = t->neigh_dev;
+ t->neigh_proto_dir[0].child = t->neigh_neigh_dir;
+ t->neigh_root_dir[0].child = t->neigh_proto_dir;
+
+ t->sysctl_header = register_sysctl_table(t->neigh_root_dir, 0);
+ if (t->sysctl_header == NULL) {
+ kfree(t);
+ return -ENOBUFS;
+ }
+ p->sysctl_table = t;
+ return 0;
+}
+
+void neigh_sysctl_unregister(struct neigh_parms *p)
+{
+ if (p->sysctl_table) {
+ struct neigh_sysctl_table *t = p->sysctl_table;
+ p->sysctl_table = NULL;
+ unregister_sysctl_table(t->sysctl_header);
+ kfree(t);
+ }
+}
+
+#endif /* CONFIG_SYSCTL */
diff --git a/pfinet/linux-src/net/core/profile.c b/pfinet/linux-src/net/core/profile.c
new file mode 100644
index 00000000..fc7464b7
--- /dev/null
+++ b/pfinet/linux-src/net/core/profile.c
@@ -0,0 +1,305 @@
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/ip.h>
+#include <linux/inet.h>
+#include <net/checksum.h>
+
+#include <asm/processor.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+#include <net/profile.h>
+
+#ifdef CONFIG_NET_PROFILE
+
+atomic_t net_profile_active;
+struct timeval net_profile_adjust;
+
+NET_PROFILE_DEFINE(total);
+
+struct net_profile_slot *net_profile_chain = &net_prof_total;
+
+#ifdef __alpha__
+__u32 alpha_lo;
+long alpha_hi;
+
+static void alpha_tick(unsigned long);
+
+static struct timer_list alpha_timer =
+ { NULL, NULL, 0, 0L, alpha_tick };
+
+void alpha_tick(unsigned long dummy)
+{
+ struct timeval dummy_stamp;
+ net_profile_stamp(&dummy_stamp);
+ alpha_timer.expires = jiffies + 4*HZ;
+ add_timer(&alpha_timer);
+}
+
+#endif
+
+void net_profile_irq_adjust(struct timeval *entered, struct timeval* leaved)
+{
+ struct net_profile_slot *s;
+
+ net_profile_sub(entered, leaved);
+ for (s = net_profile_chain; s; s = s->next) {
+ if (s->active)
+ net_profile_add(leaved, &s->irq);
+ }
+}
+
+
+#ifdef CONFIG_PROC_FS
+static int profile_read_proc(char *buffer, char **start, off_t offset,
+ int length, int *eof, void *data)
+{
+ off_t pos=0;
+ off_t begin=0;
+ int len=0;
+ struct net_profile_slot *s;
+
+ len+= sprintf(buffer, "Slot Hits Hi Lo OnIrqHi OnIrqLo Ufl\n");
+
+ if (offset == 0) {
+ cli();
+ net_prof_total.active = 1;
+ atomic_inc(&net_profile_active);
+ NET_PROFILE_LEAVE(total);
+ sti();
+ }
+ for (s = net_profile_chain; s; s = s->next) {
+ struct net_profile_slot tmp;
+
+ cli();
+ tmp = *s;
+
+ /* Wrong, but pretty close to truth */
+
+ s->accumulator.tv_sec = 0;
+ s->accumulator.tv_usec = 0;
+ s->irq.tv_sec = 0;
+ s->irq.tv_usec = 0;
+ s->hits = 0;
+ s->underflow = 0;
+ /* Repair active count, it is possible, only if code has a bug */
+ if (s->active) {
+ s->active = 0;
+ atomic_dec(&net_profile_active);
+ }
+ sti();
+
+ net_profile_sub(&tmp.irq, &tmp.accumulator);
+
+ len += sprintf(buffer+len,"%-15s %-10d %-10ld %-10lu %-10lu %-10lu %d/%d",
+ tmp.id,
+ tmp.hits,
+ tmp.accumulator.tv_sec,
+ tmp.accumulator.tv_usec,
+ tmp.irq.tv_sec,
+ tmp.irq.tv_usec,
+ tmp.underflow, tmp.active);
+
+ buffer[len++]='\n';
+
+ pos=begin+len;
+ if(pos<offset) {
+ len=0;
+ begin=pos;
+ }
+ if(pos>offset+length)
+ goto done;
+ }
+ *eof = 1;
+
+done:
+ *start=buffer+(offset-begin);
+ len-=(offset-begin);
+ if(len>length)
+ len=length;
+ if (len < 0) {
+ len = 0;
+ printk(KERN_CRIT "Yep, guys... our template for proc_*_read is crappy :-)\n");
+ }
+ if (offset == 0) {
+ cli();
+ net_prof_total.active = 0;
+ net_prof_total.hits = 0;
+ net_profile_stamp(&net_prof_total.entered);
+ sti();
+ }
+ return len;
+}
+#endif
+
+struct iphdr whitehole_iph;
+int whitehole_count;
+
+static int whitehole_xmit(struct sk_buff *skb, struct device *dev)
+{
+ struct net_device_stats *stats;
+ dev_kfree_skb(skb);
+ stats = (struct net_device_stats *)dev->priv;
+ stats->tx_packets++;
+ stats->tx_bytes+=skb->len;
+
+ return 0;
+}
+
+static void whitehole_inject(unsigned long);
+int whitehole_init(struct device *dev);
+
+static struct timer_list whitehole_timer =
+ { NULL, NULL, 0, 0L, whitehole_inject };
+
+static struct device whitehole_dev = {
+ "whitehole", 0x0, 0x0, 0x0, 0x0, 0, 0, 0, 0, 0, NULL, whitehole_init, };
+
+static int whitehole_open(struct device *dev)
+{
+ whitehole_count = 100000;
+ whitehole_timer.expires = jiffies + 5*HZ;
+ add_timer(&whitehole_timer);
+ return 0;
+}
+
+static int whitehole_close(struct device *dev)
+{
+ del_timer(&whitehole_timer);
+ return 0;
+}
+
+static void whitehole_inject(unsigned long dummy)
+{
+ struct net_device_stats *stats = (struct net_device_stats *)whitehole_dev.priv;
+ extern int netdev_dropping;
+
+ do {
+ struct iphdr *iph;
+ struct sk_buff *skb = alloc_skb(128, GFP_ATOMIC);
+ if (!skb)
+ break;
+ skb_reserve(skb, 32);
+ iph = (struct iphdr*)skb_put(skb, sizeof(*iph));
+ skb->mac.raw = ((u8*)iph) - 14;
+ memcpy(iph, &whitehole_iph, sizeof(*iph));
+ skb->protocol = __constant_htons(ETH_P_IP);
+ skb->dev = &whitehole_dev;
+ skb->pkt_type = PACKET_HOST;
+ stats->rx_packets++;
+ stats->rx_bytes += skb->len;
+ netif_rx(skb);
+ whitehole_count--;
+ } while (netdev_dropping == 0 && whitehole_count>0);
+ if (whitehole_count > 0) {
+ whitehole_timer.expires = jiffies + 1;
+ add_timer(&whitehole_timer);
+ }
+}
+
+static struct net_device_stats *whitehole_get_stats(struct device *dev)
+{
+ struct net_device_stats *stats = (struct net_device_stats *) dev->priv;
+ return stats;
+}
+
+__initfunc(int whitehole_init(struct device *dev))
+{
+ dev->priv = kmalloc(sizeof(struct net_device_stats), GFP_KERNEL);
+ if (dev->priv == NULL)
+ return -ENOBUFS;
+ memset(dev->priv, 0, sizeof(struct net_device_stats));
+ dev->get_stats = whitehole_get_stats;
+ dev->hard_start_xmit = whitehole_xmit;
+ dev->open = whitehole_open;
+ dev->stop = whitehole_close;
+ ether_setup(dev);
+ dev->tx_queue_len = 0;
+ dev->flags |= IFF_NOARP;
+ dev->flags &= ~(IFF_BROADCAST|IFF_MULTICAST);
+ dev->iflink = 0;
+ whitehole_iph.ihl = 5;
+ whitehole_iph.version = 4;
+ whitehole_iph.ttl = 2;
+ whitehole_iph.saddr = in_aton("193.233.7.21");
+ whitehole_iph.daddr = in_aton("193.233.7.10");
+ whitehole_iph.tot_len = htons(20);
+ whitehole_iph.check = ip_compute_csum((void *)&whitehole_iph, 20);
+ return 0;
+}
+
+int net_profile_register(struct net_profile_slot *slot)
+{
+ cli();
+ slot->next = net_profile_chain;
+ net_profile_chain = slot;
+ sti();
+ return 0;
+}
+
+int net_profile_unregister(struct net_profile_slot *slot)
+{
+ struct net_profile_slot **sp, *s;
+
+ for (sp = &net_profile_chain; (s = *sp) != NULL; sp = &s->next) {
+ if (s == slot) {
+ cli();
+ *sp = s->next;
+ sti();
+ return 0;
+ }
+ }
+ return -ESRCH;
+}
+
+
+__initfunc(int net_profile_init(void))
+{
+ int i;
+
+#ifdef CONFIG_PROC_FS
+ struct proc_dir_entry *ent;
+
+ ent = create_proc_entry("net/profile", 0, 0);
+ ent->read_proc = profile_read_proc;
+#endif
+
+ register_netdevice(&whitehole_dev);
+
+ printk("Evaluating net profiler cost ...");
+#if CPU == 586 || CPU == 686
+ if (!(boot_cpu_data.x86_capability & X86_FEATURE_TSC)) {
+ printk(KERN_ERR "Sorry, your CPU does not support TSC. Net profiler disabled.\n");
+ return -1;
+ }
+#endif
+ start_bh_atomic();
+#ifdef __alpha__
+ alpha_tick(0);
+#endif
+ for (i=0; i<1024; i++) {
+ NET_PROFILE_ENTER(total);
+ NET_PROFILE_LEAVE(total);
+ }
+ if (net_prof_total.accumulator.tv_sec) {
+ printk(" too high!\n");
+ } else {
+ net_profile_adjust.tv_usec = net_prof_total.accumulator.tv_usec>>10;
+ printk("%ld units\n", net_profile_adjust.tv_usec);
+ }
+ net_prof_total.hits = 0;
+ net_profile_stamp(&net_prof_total.entered);
+ end_bh_atomic();
+ return 0;
+}
+
+#endif
diff --git a/pfinet/linux-src/net/core/rtnetlink.c b/pfinet/linux-src/net/core/rtnetlink.c
new file mode 100644
index 00000000..7f89e54a
--- /dev/null
+++ b/pfinet/linux-src/net/core/rtnetlink.c
@@ -0,0 +1,512 @@
+/*
+ * INET An implementation of the TCP/IP protocol suite for the LINUX
+ * operating system. INET is implemented using the BSD Socket
+ * interface as the means of communication with the user level.
+ *
+ * Routing netlink socket interface: protocol independent part.
+ *
+ * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Fixes:
+ * Vitaly E. Lavrov RTA_OK arithmetics was wrong.
+ * Alexey Zhuravlev ifi_change does something useful
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/capability.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/string.h>
+
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/arp.h>
+#include <net/route.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+
+atomic_t rtnl_rlockct;
+struct wait_queue *rtnl_wait;
+
+
+void rtnl_lock()
+{
+ rtnl_shlock();
+ rtnl_exlock();
+}
+
+void rtnl_unlock()
+{
+ rtnl_exunlock();
+ rtnl_shunlock();
+}
+
+int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len)
+{
+ memset(tb, 0, sizeof(struct rtattr*)*maxattr);
+
+ while (RTA_OK(rta, len)) {
+ unsigned flavor = rta->rta_type;
+ if (flavor && flavor <= maxattr)
+ tb[flavor-1] = rta;
+ rta = RTA_NEXT(rta, len);
+ }
+ return 0;
+}
+
+#ifdef CONFIG_RTNETLINK
+struct sock *rtnl;
+
+unsigned long rtnl_wlockct;
+
+struct rtnetlink_link * rtnetlink_links[NPROTO];
+
+#define _S 1 /* superuser privileges required */
+#define _X 2 /* exclusive access to tables required */
+#define _G 4 /* GET request */
+
+static const int rtm_min[(RTM_MAX+1-RTM_BASE)/4] =
+{
+ NLMSG_LENGTH(sizeof(struct ifinfomsg)),
+ NLMSG_LENGTH(sizeof(struct ifaddrmsg)),
+ NLMSG_LENGTH(sizeof(struct rtmsg)),
+ NLMSG_LENGTH(sizeof(struct ndmsg)),
+ NLMSG_LENGTH(sizeof(struct rtmsg)),
+ NLMSG_LENGTH(sizeof(struct tcmsg)),
+ NLMSG_LENGTH(sizeof(struct tcmsg)),
+ NLMSG_LENGTH(sizeof(struct tcmsg))
+};
+
+static const int rta_max[(RTM_MAX+1-RTM_BASE)/4] =
+{
+ IFLA_MAX,
+ IFA_MAX,
+ RTA_MAX,
+ NDA_MAX,
+ RTA_MAX,
+ TCA_MAX,
+ TCA_MAX,
+ TCA_MAX
+};
+
+void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data)
+{
+ struct rtattr *rta;
+ int size = RTA_LENGTH(attrlen);
+
+ rta = (struct rtattr*)skb_put(skb, RTA_ALIGN(size));
+ rta->rta_type = attrtype;
+ rta->rta_len = size;
+ memcpy(RTA_DATA(rta), data, attrlen);
+}
+
+int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo)
+{
+ int err = 0;
+
+ NETLINK_CB(skb).dst_groups = group;
+ if (echo)
+ atomic_inc(&skb->users);
+ netlink_broadcast(rtnl, skb, pid, group, GFP_KERNEL);
+ if (echo)
+ err = netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
+ return err;
+}
+
+static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct device *dev,
+ int type, u32 pid, u32 seq, u32 change)
+{
+ struct ifinfomsg *r;
+ struct nlmsghdr *nlh;
+ unsigned char *b = skb->tail;
+
+ nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*r));
+ if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
+ r = NLMSG_DATA(nlh);
+ r->ifi_family = AF_UNSPEC;
+ r->ifi_type = dev->type;
+ r->ifi_index = dev->ifindex;
+ r->ifi_flags = dev->flags;
+ r->ifi_change = change;
+
+ RTA_PUT(skb, IFLA_IFNAME, strlen(dev->name)+1, dev->name);
+ if (dev->addr_len) {
+ RTA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr);
+ RTA_PUT(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast);
+ }
+ if (1) {
+ unsigned mtu = dev->mtu;
+ RTA_PUT(skb, IFLA_MTU, sizeof(mtu), &mtu);
+ }
+ if (dev->ifindex != dev->iflink)
+ RTA_PUT(skb, IFLA_LINK, sizeof(int), &dev->iflink);
+ if (dev->qdisc_sleeping)
+ RTA_PUT(skb, IFLA_QDISC,
+ strlen(dev->qdisc_sleeping->ops->id) + 1,
+ dev->qdisc_sleeping->ops->id);
+ if (dev->get_stats) {
+ struct net_device_stats *stats = dev->get_stats(dev);
+ if (stats)
+ RTA_PUT(skb, IFLA_STATS, sizeof(*stats), stats);
+ }
+ nlh->nlmsg_len = skb->tail - b;
+ return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+ skb_trim(skb, b - skb->data);
+ return -1;
+}
+
+int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ int idx;
+ int s_idx = cb->args[0];
+ struct device *dev;
+
+ for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
+ if (idx < s_idx)
+ continue;
+ if (rtnetlink_fill_ifinfo(skb, dev, RTM_NEWLINK, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, 0) <= 0)
+ break;
+ }
+ cb->args[0] = idx;
+
+ return skb->len;
+}
+
+int rtnetlink_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ int idx;
+ int s_idx = cb->family;
+
+ if (s_idx == 0)
+ s_idx = 1;
+ for (idx=1; idx<NPROTO; idx++) {
+ int type = cb->nlh->nlmsg_type-RTM_BASE;
+ if (idx < s_idx || idx == PF_PACKET)
+ continue;
+ if (rtnetlink_links[idx] == NULL ||
+ rtnetlink_links[idx][type].dumpit == NULL)
+ continue;
+ if (idx > s_idx)
+ memset(&cb->args[0], 0, sizeof(cb->args));
+ if (rtnetlink_links[idx][type].dumpit(skb, cb) == 0)
+ continue;
+ if (skb_tailroom(skb) < 256)
+ break;
+ }
+ cb->family = idx;
+
+ return skb->len;
+}
+
+void rtmsg_ifinfo(int type, struct device *dev)
+{
+ struct sk_buff *skb;
+ int size = NLMSG_GOODSIZE;
+
+ skb = alloc_skb(size, GFP_KERNEL);
+ if (!skb)
+ return;
+
+ if (rtnetlink_fill_ifinfo(skb, dev, type, 0, 0, ~0U) < 0) {
+ kfree_skb(skb);
+ return;
+ }
+ NETLINK_CB(skb).dst_groups = RTMGRP_LINK;
+ netlink_broadcast(rtnl, skb, 0, RTMGRP_LINK, GFP_KERNEL);
+}
+
+static int rtnetlink_done(struct netlink_callback *cb)
+{
+ if (cap_raised(NETLINK_CB(cb->skb).eff_cap, CAP_NET_ADMIN) && cb->nlh->nlmsg_flags&NLM_F_ATOMIC)
+ rtnl_shunlock();
+ return 0;
+}
+
+/* Process one rtnetlink message. */
+
+extern __inline__ int
+rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
+{
+ struct rtnetlink_link *link;
+ struct rtnetlink_link *link_tab;
+ struct rtattr *rta[RTATTR_MAX];
+
+ int exclusive = 0;
+ int sz_idx, kind;
+ int min_len;
+ int family;
+ int type;
+ int err;
+
+ /* Only requests are handled by kernel now */
+ if (!(nlh->nlmsg_flags&NLM_F_REQUEST))
+ return 0;
+
+ type = nlh->nlmsg_type;
+
+ /* A control message: ignore them */
+ if (type < RTM_BASE)
+ return 0;
+
+ /* Unknown message: reply with EINVAL */
+ if (type > RTM_MAX)
+ goto err_inval;
+
+ type -= RTM_BASE;
+
+ /* All the messages must have at least 1 byte length */
+ if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(struct rtgenmsg)))
+ return 0;
+
+ family = ((struct rtgenmsg*)NLMSG_DATA(nlh))->rtgen_family;
+ if (family > NPROTO) {
+ *errp = -EAFNOSUPPORT;
+ return -1;
+ }
+
+ link_tab = rtnetlink_links[family];
+ if (link_tab == NULL)
+ link_tab = rtnetlink_links[PF_UNSPEC];
+ link = &link_tab[type];
+
+ sz_idx = type>>2;
+ kind = type&3;
+
+ if (kind != 2 && !cap_raised(NETLINK_CB(skb).eff_cap, CAP_NET_ADMIN)) {
+ *errp = -EPERM;
+ return -1;
+ }
+
+ if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
+ u32 rlen;
+
+ if (link->dumpit == NULL)
+ link = &(rtnetlink_links[PF_UNSPEC][type]);
+
+ if (link->dumpit == NULL)
+ goto err_inval;
+
+ /* Super-user locks all the tables to get atomic snapshot */
+ if (cap_raised(NETLINK_CB(skb).eff_cap, CAP_NET_ADMIN)
+ && nlh->nlmsg_flags&NLM_F_ATOMIC)
+ atomic_inc(&rtnl_rlockct);
+ if ((*errp = netlink_dump_start(rtnl, skb, nlh,
+ link->dumpit,
+ rtnetlink_done)) != 0) {
+ if (cap_raised(NETLINK_CB(skb).eff_cap, CAP_NET_ADMIN) && nlh->nlmsg_flags&NLM_F_ATOMIC)
+ atomic_dec(&rtnl_rlockct);
+ return -1;
+ }
+ rlen = NLMSG_ALIGN(nlh->nlmsg_len);
+ if (rlen > skb->len)
+ rlen = skb->len;
+ skb_pull(skb, rlen);
+ return -1;
+ }
+
+ if (kind != 2) {
+ if (rtnl_exlock_nowait()) {
+ *errp = 0;
+ return -1;
+ }
+ exclusive = 1;
+ }
+
+ memset(&rta, 0, sizeof(rta));
+
+ min_len = rtm_min[sz_idx];
+ if (nlh->nlmsg_len < min_len)
+ goto err_inval;
+
+ if (nlh->nlmsg_len > min_len) {
+ int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
+ struct rtattr *attr = (void*)nlh + NLMSG_ALIGN(min_len);
+
+ while (RTA_OK(attr, attrlen)) {
+ unsigned flavor = attr->rta_type;
+ if (flavor) {
+ if (flavor > rta_max[sz_idx])
+ goto err_inval;
+ rta[flavor-1] = attr;
+ }
+ attr = RTA_NEXT(attr, attrlen);
+ }
+ }
+
+ if (link->doit == NULL)
+ link = &(rtnetlink_links[PF_UNSPEC][type]);
+ if (link->doit == NULL)
+ goto err_inval;
+ err = link->doit(skb, nlh, (void *)&rta);
+
+ if (exclusive)
+ rtnl_exunlock();
+ *errp = err;
+ return err;
+
+err_inval:
+ if (exclusive)
+ rtnl_exunlock();
+ *errp = -EINVAL;
+ return -1;
+}
+
+/*
+ * Process one packet of messages.
+ * Malformed skbs with wrong lengths of messages are discarded silently.
+ */
+
+extern __inline__ int rtnetlink_rcv_skb(struct sk_buff *skb)
+{
+ int err;
+ struct nlmsghdr * nlh;
+
+ while (skb->len >= NLMSG_SPACE(0)) {
+ u32 rlen;
+
+ nlh = (struct nlmsghdr *)skb->data;
+ if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
+ return 0;
+ rlen = NLMSG_ALIGN(nlh->nlmsg_len);
+ if (rlen > skb->len)
+ rlen = skb->len;
+ if (rtnetlink_rcv_msg(skb, nlh, &err)) {
+ /* Not error, but we must interrupt processing here:
+ * Note, that in this case we do not pull message
+ * from skb, it will be processed later.
+ */
+ if (err == 0)
+ return -1;
+ netlink_ack(skb, nlh, err);
+ } else if (nlh->nlmsg_flags&NLM_F_ACK)
+ netlink_ack(skb, nlh, 0);
+ skb_pull(skb, rlen);
+ }
+
+ return 0;
+}
+
+/*
+ * rtnetlink input queue processing routine:
+ * - try to acquire shared lock. If it is failed, defer processing.
+ * - feed skbs to rtnetlink_rcv_skb, until it refuse a message,
+ * that will occur, when a dump started and/or acquisition of
+ * exclusive lock failed.
+ */
+
+static void rtnetlink_rcv(struct sock *sk, int len)
+{
+ struct sk_buff *skb;
+
+ if (rtnl_shlock_nowait())
+ return;
+
+ while ((skb = skb_dequeue(&sk->receive_queue)) != NULL) {
+ if (rtnetlink_rcv_skb(skb)) {
+ if (skb->len)
+ skb_queue_head(&sk->receive_queue, skb);
+ else
+ kfree_skb(skb);
+ break;
+ }
+ kfree_skb(skb);
+ }
+
+ rtnl_shunlock();
+}
+
+static struct rtnetlink_link link_rtnetlink_table[RTM_MAX-RTM_BASE+1] =
+{
+ { NULL, NULL, },
+ { NULL, NULL, },
+ { NULL, rtnetlink_dump_ifinfo, },
+ { NULL, NULL, },
+
+ { NULL, NULL, },
+ { NULL, NULL, },
+ { NULL, rtnetlink_dump_all, },
+ { NULL, NULL, },
+
+ { NULL, NULL, },
+ { NULL, NULL, },
+ { NULL, rtnetlink_dump_all, },
+ { NULL, NULL, },
+
+ { neigh_add, NULL, },
+ { neigh_delete, NULL, },
+ { NULL, neigh_dump_info, },
+ { NULL, NULL, },
+
+ { NULL, NULL, },
+ { NULL, NULL, },
+ { NULL, NULL, },
+ { NULL, NULL, },
+};
+
+
+static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+ struct device *dev = ptr;
+ switch (event) {
+ case NETDEV_UNREGISTER:
+ rtmsg_ifinfo(RTM_DELLINK, dev);
+ break;
+ default:
+ rtmsg_ifinfo(RTM_NEWLINK, dev);
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+struct notifier_block rtnetlink_dev_notifier = {
+ rtnetlink_event,
+ NULL,
+ 0
+};
+
+
+__initfunc(void rtnetlink_init(void))
+{
+#ifdef RTNL_DEBUG
+ printk("Initializing RT netlink socket\n");
+#endif
+ rtnl = netlink_kernel_create(NETLINK_ROUTE, rtnetlink_rcv);
+ if (rtnl == NULL)
+ panic("rtnetlink_init: cannot initialize rtnetlink\n");
+ register_netdevice_notifier(&rtnetlink_dev_notifier);
+ rtnetlink_links[PF_UNSPEC] = link_rtnetlink_table;
+ rtnetlink_links[PF_PACKET] = link_rtnetlink_table;
+}
+
+
+
+#endif
diff --git a/pfinet/linux-src/net/core/scm.c b/pfinet/linux-src/net/core/scm.c
new file mode 100644
index 00000000..cdb5f3d0
--- /dev/null
+++ b/pfinet/linux-src/net/core/scm.c
@@ -0,0 +1,280 @@
+/* scm.c - Socket level control messages processing.
+ *
+ * Author: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ * Alignment and value checking mods by Craig Metz
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/signal.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/stat.h>
+#include <linux/socket.h>
+#include <linux/file.h>
+#include <linux/fcntl.h>
+#include <linux/net.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#include <linux/inet.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/rarp.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/scm.h>
+
+
+/*
+ * Only allow a user to send credentials, that they could set with
+ * setu(g)id.
+ */
+
+static __inline__ int scm_check_creds(struct ucred *creds)
+{
+ if ((creds->pid == current->pid || capable(CAP_SYS_ADMIN)) &&
+ ((creds->uid == current->uid || creds->uid == current->euid ||
+ creds->uid == current->suid) || capable(CAP_SETUID)) &&
+ ((creds->gid == current->gid || creds->gid == current->egid ||
+ creds->gid == current->sgid) || capable(CAP_SETGID))) {
+ return 0;
+ }
+ return -EPERM;
+}
+
+static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
+{
+ int *fdp = (int*)CMSG_DATA(cmsg);
+ struct scm_fp_list *fpl = *fplp;
+ struct file **fpp;
+ int i, num;
+
+ num = (cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr)))/sizeof(int);
+
+ if (num <= 0)
+ return 0;
+
+ if (num > SCM_MAX_FD)
+ return -EINVAL;
+
+ if (!fpl)
+ {
+ fpl = kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL);
+ if (!fpl)
+ return -ENOMEM;
+ *fplp = fpl;
+ fpl->count = 0;
+ }
+ fpp = &fpl->fp[fpl->count];
+
+ if (fpl->count + num > SCM_MAX_FD)
+ return -EINVAL;
+
+ /*
+ * Verify the descriptors and increment the usage count.
+ */
+
+ for (i=0; i< num; i++)
+ {
+ int fd = fdp[i];
+ struct file *file;
+
+ if (fd < 0 || !(file = fget(fd)))
+ return -EBADF;
+ *fpp++ = file;
+ fpl->count++;
+ }
+ return num;
+}
+
+void __scm_destroy(struct scm_cookie *scm)
+{
+ struct scm_fp_list *fpl = scm->fp;
+ int i;
+
+ if (fpl) {
+ scm->fp = NULL;
+ for (i=fpl->count-1; i>=0; i--)
+ fput(fpl->fp[i]);
+ kfree(fpl);
+ }
+}
+
+int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
+{
+ struct cmsghdr *cmsg;
+ int err;
+
+ for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg))
+ {
+ err = -EINVAL;
+
+ /* Verify that cmsg_len is at least sizeof(struct cmsghdr) */
+ /* The first check was omitted in <= 2.2.5. The reasoning was
+ that parser checks cmsg_len in any case, so that
+ additional check would be work duplication.
+ But if cmsg_level is not SOL_SOCKET, we do not check
+ for too short ancillary data object at all! Oops.
+ OK, let's add it...
+ */
+ if (cmsg->cmsg_len < sizeof(struct cmsghdr) ||
+ (unsigned long)(((char*)cmsg - (char*)msg->msg_control)
+ + cmsg->cmsg_len) > msg->msg_controllen)
+ goto error;
+
+ if (cmsg->cmsg_level != SOL_SOCKET)
+ continue;
+
+ switch (cmsg->cmsg_type)
+ {
+ case SCM_RIGHTS:
+ err=scm_fp_copy(cmsg, &p->fp);
+ if (err<0)
+ goto error;
+ break;
+ case SCM_CREDENTIALS:
+ if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct ucred)))
+ goto error;
+ memcpy(&p->creds, CMSG_DATA(cmsg), sizeof(struct ucred));
+ err = scm_check_creds(&p->creds);
+ if (err)
+ goto error;
+ break;
+ default:
+ goto error;
+ }
+ }
+
+ if (p->fp && !p->fp->count)
+ {
+ kfree(p->fp);
+ p->fp = NULL;
+ }
+
+ err = -EINVAL;
+ if (msg->msg_flags & MSG_CTLFLAGS)
+ goto error;
+
+ return 0;
+
+error:
+ scm_destroy(p);
+ return err;
+}
+
+int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data)
+{
+ struct cmsghdr *cm = (struct cmsghdr*)msg->msg_control;
+ struct cmsghdr cmhdr;
+ int cmlen = CMSG_LEN(len);
+ int err;
+
+ if (cm==NULL || msg->msg_controllen < sizeof(*cm)) {
+ msg->msg_flags |= MSG_CTRUNC;
+ return 0; /* XXX: return error? check spec. */
+ }
+ if (msg->msg_controllen < cmlen) {
+ msg->msg_flags |= MSG_CTRUNC;
+ cmlen = msg->msg_controllen;
+ }
+ cmhdr.cmsg_level = level;
+ cmhdr.cmsg_type = type;
+ cmhdr.cmsg_len = cmlen;
+
+ err = -EFAULT;
+ if (copy_to_user(cm, &cmhdr, sizeof cmhdr))
+ goto out;
+ if (copy_to_user(CMSG_DATA(cm), data, cmlen - sizeof(struct cmsghdr)))
+ goto out;
+ cmlen = CMSG_SPACE(len);
+ msg->msg_control += cmlen;
+ msg->msg_controllen -= cmlen;
+ err = 0;
+out:
+ return err;
+}
+
+void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
+{
+ struct cmsghdr *cm = (struct cmsghdr*)msg->msg_control;
+
+ int fdmax = (msg->msg_controllen - sizeof(struct cmsghdr))/sizeof(int);
+ int fdnum = scm->fp->count;
+ struct file **fp = scm->fp->fp;
+ int *cmfptr;
+ int err = 0, i;
+
+ if (fdnum < fdmax)
+ fdmax = fdnum;
+
+ for (i=0, cmfptr=(int*)CMSG_DATA(cm); i<fdmax; i++, cmfptr++)
+ {
+ int new_fd;
+ err = get_unused_fd();
+ if (err < 0)
+ break;
+ new_fd = err;
+ err = put_user(new_fd, cmfptr);
+ if (err) {
+ put_unused_fd(new_fd);
+ break;
+ }
+ /* Bump the usage count and install the file. */
+ fp[i]->f_count++;
+ current->files->fd[new_fd] = fp[i];
+ }
+
+ if (i > 0)
+ {
+ int cmlen = CMSG_LEN(i*sizeof(int));
+ if (!err)
+ err = put_user(SOL_SOCKET, &cm->cmsg_level);
+ if (!err)
+ err = put_user(SCM_RIGHTS, &cm->cmsg_type);
+ if (!err)
+ err = put_user(cmlen, &cm->cmsg_len);
+ if (!err) {
+ cmlen = CMSG_SPACE(i*sizeof(int));
+ msg->msg_control += cmlen;
+ msg->msg_controllen -= cmlen;
+ }
+ }
+ if (i < fdnum)
+ msg->msg_flags |= MSG_CTRUNC;
+
+ /*
+ * All of the files that fit in the message have had their
+ * usage counts incremented, so we just free the list.
+ */
+ __scm_destroy(scm);
+}
+
+struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
+{
+ struct scm_fp_list *new_fpl;
+ int i;
+
+ if (!fpl)
+ return NULL;
+
+ new_fpl = kmalloc(sizeof(*fpl), GFP_KERNEL);
+ if (new_fpl) {
+ memcpy(new_fpl, fpl, sizeof(*fpl));
+
+ for (i=fpl->count-1; i>=0; i--)
+ fpl->fp[i]->f_count++;
+ }
+ return new_fpl;
+}
diff --git a/pfinet/linux-src/net/core/skbuff.c b/pfinet/linux-src/net/core/skbuff.c
new file mode 100644
index 00000000..b7636437
--- /dev/null
+++ b/pfinet/linux-src/net/core/skbuff.c
@@ -0,0 +1,385 @@
+/*
+ * Routines having to do with the 'struct sk_buff' memory handlers.
+ *
+ * Authors: Alan Cox <iiitac@pyr.swan.ac.uk>
+ * Florian La Roche <rzsfl@rz.uni-sb.de>
+ *
+ * Version: $Id: skbuff.c,v 1.55 1999/02/23 08:12:27 davem Exp $
+ *
+ * Fixes:
+ * Alan Cox : Fixed the worst of the load balancer bugs.
+ * Dave Platt : Interrupt stacking fix.
+ * Richard Kooijman : Timestamp fixes.
+ * Alan Cox : Changed buffer format.
+ * Alan Cox : destructor hook for AF_UNIX etc.
+ * Linus Torvalds : Better skb_clone.
+ * Alan Cox : Added skb_copy.
+ * Alan Cox : Added all the changed routines Linus
+ * only put in the headers
+ * Ray VanTassle : Fixed --skb->lock in free
+ * Alan Cox : skb_copy copy arp field
+ * Andi Kleen : slabified it.
+ *
+ * NOTE:
+ * The __skb_ routines should be called with interrupts
+ * disabled, or you better be *real* sure that the operation is atomic
+ * with respect to whatever list is being frobbed (e.g. via lock_sock()
+ * or via disabling bottom half handlers, etc).
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+/*
+ * The functions in this file will not compile correctly with gcc 2.4.x
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/malloc.h>
+#include <linux/netdevice.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/dst.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <net/sock.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+/*
+ * Skb list spinlock
+ */
+spinlock_t skb_queue_lock = SPIN_LOCK_UNLOCKED;
+
+/*
+ * Resource tracking variables
+ */
+
+static atomic_t net_skbcount = ATOMIC_INIT(0);
+static atomic_t net_allocs = ATOMIC_INIT(0);
+static atomic_t net_fails = ATOMIC_INIT(0);
+
+extern atomic_t ip_frag_mem;
+
+static kmem_cache_t *skbuff_head_cache;
+
+/*
+ * Keep out-of-line to prevent kernel bloat.
+ * __builtin_return_address is not used because it is not always
+ * reliable.
+ */
+
+void skb_over_panic(struct sk_buff *skb, int sz, void *here)
+{
+ panic("skput:over: %p:%d put:%d dev:%s",
+ here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
+}
+
+void skb_under_panic(struct sk_buff *skb, int sz, void *here)
+{
+ panic("skput:under: %p:%d put:%d dev:%s",
+ here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
+}
+
+void show_net_buffers(void)
+{
+ printk("Networking buffers in use : %u\n",
+ atomic_read(&net_skbcount));
+ printk("Total network buffer allocations : %u\n",
+ atomic_read(&net_allocs));
+ printk("Total failed network buffer allocs : %u\n",
+ atomic_read(&net_fails));
+#ifdef CONFIG_INET
+ printk("IP fragment buffer size : %u\n",
+ atomic_read(&ip_frag_mem));
+#endif
+}
+
+/* Allocate a new skbuff. We do this ourselves so we can fill in a few
+ * 'private' fields and also do memory statistics to find all the
+ * [BEEP] leaks.
+ *
+ */
+
+struct sk_buff *alloc_skb(unsigned int size,int gfp_mask)
+{
+ struct sk_buff *skb;
+ u8 *data;
+
+ if (in_interrupt() && (gfp_mask & __GFP_WAIT)) {
+ static int count = 0;
+ if (++count < 5) {
+ printk(KERN_ERR "alloc_skb called nonatomically "
+ "from interrupt %p\n", __builtin_return_address(0));
+ }
+ gfp_mask &= ~__GFP_WAIT;
+ }
+
+ /* Get the HEAD */
+ skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
+ if (skb == NULL)
+ goto nohead;
+
+ /* Get the DATA. Size must match skb_add_mtu(). */
+ size = ((size + 15) & ~15);
+ data = kmalloc(size + sizeof(atomic_t), gfp_mask);
+ if (data == NULL)
+ goto nodata;
+
+ /* Note that this counter is useless now - you can just look in the
+ * skbuff_head entry in /proc/slabinfo. We keep it only for emergency
+ * cases.
+ */
+ atomic_inc(&net_allocs);
+
+ skb->truesize = size;
+
+ atomic_inc(&net_skbcount);
+
+ /* Load the data pointers. */
+ skb->head = data;
+ skb->data = data;
+ skb->tail = data;
+ skb->end = data + size;
+
+ /* Set up other state */
+ skb->len = 0;
+ skb->is_clone = 0;
+ skb->cloned = 0;
+
+ atomic_set(&skb->users, 1);
+ atomic_set(skb_datarefp(skb), 1);
+ return skb;
+
+nodata:
+ kmem_cache_free(skbuff_head_cache, skb);
+nohead:
+ atomic_inc(&net_fails);
+ return NULL;
+}
+
+
+/*
+ * Slab constructor for a skb head.
+ */
+static inline void skb_headerinit(void *p, kmem_cache_t *cache,
+ unsigned long flags)
+{
+ struct sk_buff *skb = p;
+
+ skb->destructor = NULL;
+ skb->pkt_type = PACKET_HOST; /* Default type */
+ skb->pkt_bridged = 0; /* Not bridged */
+ skb->prev = skb->next = NULL;
+ skb->list = NULL;
+ skb->sk = NULL;
+ skb->stamp.tv_sec=0; /* No idea about time */
+ skb->ip_summed = 0;
+ skb->security = 0; /* By default packets are insecure */
+ skb->dst = NULL;
+#ifdef CONFIG_IP_FIREWALL
+ skb->fwmark = 0;
+#endif
+ memset(skb->cb, 0, sizeof(skb->cb));
+ skb->priority = 0;
+}
+
+/*
+ * Free an skbuff by memory without cleaning the state.
+ */
+void kfree_skbmem(struct sk_buff *skb)
+{
+ if (!skb->cloned || atomic_dec_and_test(skb_datarefp(skb)))
+ kfree(skb->head);
+
+ kmem_cache_free(skbuff_head_cache, skb);
+ atomic_dec(&net_skbcount);
+}
+
+/*
+ * Free an sk_buff. Release anything attached to the buffer. Clean the state.
+ */
+
+void __kfree_skb(struct sk_buff *skb)
+{
+ if (skb->list)
+ printk(KERN_WARNING "Warning: kfree_skb passed an skb still "
+ "on a list (from %p).\n", __builtin_return_address(0));
+
+ dst_release(skb->dst);
+ if(skb->destructor)
+ skb->destructor(skb);
+ skb_headerinit(skb, NULL, 0); /* clean state */
+ kfree_skbmem(skb);
+}
+
+/*
+ * Duplicate an sk_buff. The new one is not owned by a socket.
+ */
+
+struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
+{
+ struct sk_buff *n;
+
+ n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
+ if (!n)
+ return NULL;
+
+ memcpy(n, skb, sizeof(*n));
+ atomic_inc(skb_datarefp(skb));
+ skb->cloned = 1;
+
+ atomic_inc(&net_allocs);
+ atomic_inc(&net_skbcount);
+ dst_clone(n->dst);
+ n->cloned = 1;
+ n->next = n->prev = NULL;
+ n->list = NULL;
+ n->sk = NULL;
+ n->is_clone = 1;
+ atomic_set(&n->users, 1);
+ n->destructor = NULL;
+ return n;
+}
+
+/*
+ * This is slower, and copies the whole data area
+ */
+
+struct sk_buff *skb_copy(struct sk_buff *skb, int gfp_mask)
+{
+ struct sk_buff *n;
+ unsigned long offset;
+
+ /*
+ * Allocate the copy buffer
+ */
+
+ n=alloc_skb(skb->end - skb->head, gfp_mask);
+ if(n==NULL)
+ return NULL;
+
+ /*
+ * Shift between the two data areas in bytes
+ */
+
+ offset=n->head-skb->head;
+
+ /* Set the data pointer */
+ skb_reserve(n,skb->data-skb->head);
+ /* Set the tail pointer and length */
+ skb_put(n,skb->len);
+ /* Copy the bytes */
+ memcpy(n->head,skb->head,skb->end-skb->head);
+ n->csum = skb->csum;
+ n->list=NULL;
+ n->sk=NULL;
+ n->dev=skb->dev;
+ n->priority=skb->priority;
+ n->protocol=skb->protocol;
+ n->dst=dst_clone(skb->dst);
+ n->h.raw=skb->h.raw+offset;
+ n->nh.raw=skb->nh.raw+offset;
+ n->mac.raw=skb->mac.raw+offset;
+ memcpy(n->cb, skb->cb, sizeof(skb->cb));
+ n->used=skb->used;
+ n->is_clone=0;
+ atomic_set(&n->users, 1);
+ n->pkt_type=skb->pkt_type;
+ n->stamp=skb->stamp;
+ n->destructor = NULL;
+ n->security=skb->security;
+#ifdef CONFIG_IP_FIREWALL
+ n->fwmark = skb->fwmark;
+#endif
+ return n;
+}
+
+struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, int newheadroom)
+{
+ struct sk_buff *n;
+ unsigned long offset;
+ int headroom = skb_headroom(skb);
+
+ /*
+ * Allocate the copy buffer
+ */
+
+ n=alloc_skb(skb->truesize+newheadroom-headroom, GFP_ATOMIC);
+ if(n==NULL)
+ return NULL;
+
+ skb_reserve(n,newheadroom);
+
+ /*
+ * Shift between the two data areas in bytes
+ */
+
+ offset=n->data-skb->data;
+
+ /* Set the tail pointer and length */
+ skb_put(n,skb->len);
+ /* Copy the bytes */
+ memcpy(n->data,skb->data,skb->len);
+ n->list=NULL;
+ n->sk=NULL;
+ n->priority=skb->priority;
+ n->protocol=skb->protocol;
+ n->dev=skb->dev;
+ n->dst=dst_clone(skb->dst);
+ n->h.raw=skb->h.raw+offset;
+ n->nh.raw=skb->nh.raw+offset;
+ n->mac.raw=skb->mac.raw+offset;
+ memcpy(n->cb, skb->cb, sizeof(skb->cb));
+ n->used=skb->used;
+ n->is_clone=0;
+ atomic_set(&n->users, 1);
+ n->pkt_type=skb->pkt_type;
+ n->stamp=skb->stamp;
+ n->destructor = NULL;
+ n->security=skb->security;
+#ifdef CONFIG_IP_FIREWALL
+ n->fwmark = skb->fwmark;
+#endif
+
+ return n;
+}
+
+#if 0
+/*
+ * Tune the memory allocator for a new MTU size.
+ */
+void skb_add_mtu(int mtu)
+{
+ /* Must match allocation in alloc_skb */
+ mtu = ((mtu + 15) & ~15) + sizeof(atomic_t);
+
+ kmem_add_cache_size(mtu);
+}
+#endif
+
+void __init skb_init(void)
+{
+ skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
+ sizeof(struct sk_buff),
+ 0,
+ SLAB_HWCACHE_ALIGN,
+ skb_headerinit, NULL);
+ if (!skbuff_head_cache)
+ panic("cannot create skbuff cache");
+}
diff --git a/pfinet/linux-src/net/core/sock.c b/pfinet/linux-src/net/core/sock.c
new file mode 100644
index 00000000..e0eb41a0
--- /dev/null
+++ b/pfinet/linux-src/net/core/sock.c
@@ -0,0 +1,1051 @@
+/*
+ * INET An implementation of the TCP/IP protocol suite for the LINUX
+ * operating system. INET is implemented using the BSD Socket
+ * interface as the means of communication with the user level.
+ *
+ * Generic socket support routines. Memory allocators, socket lock/release
+ * handler for protocols to use and generic option handler.
+ *
+ *
+ * Version: $Id: sock.c,v 1.80 1999/05/08 03:04:34 davem Exp $
+ *
+ * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
+ * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ * Florian La Roche, <flla@stud.uni-sb.de>
+ * Alan Cox, <A.Cox@swansea.ac.uk>
+ *
+ * Fixes:
+ * Alan Cox : Numerous verify_area() problems
+ * Alan Cox : Connecting on a connecting socket
+ * now returns an error for tcp.
+ * Alan Cox : sock->protocol is set correctly.
+ * and is not sometimes left as 0.
+ * Alan Cox : connect handles icmp errors on a
+ * connect properly. Unfortunately there
+ * is a restart syscall nasty there. I
+ * can't match BSD without hacking the C
+ * library. Ideas urgently sought!
+ * Alan Cox : Disallow bind() to addresses that are
+ * not ours - especially broadcast ones!!
+ * Alan Cox : Socket 1024 _IS_ ok for users. (fencepost)
+ * Alan Cox : sock_wfree/sock_rfree don't destroy sockets,
+ * instead they leave that for the DESTROY timer.
+ * Alan Cox : Clean up error flag in accept
+ * Alan Cox : TCP ack handling is buggy, the DESTROY timer
+ * was buggy. Put a remove_sock() in the handler
+ * for memory when we hit 0. Also altered the timer
+ * code. The ACK stuff can wait and needs major
+ * TCP layer surgery.
+ * Alan Cox : Fixed TCP ack bug, removed remove sock
+ * and fixed timer/inet_bh race.
+ * Alan Cox : Added zapped flag for TCP
+ * Alan Cox : Move kfree_skb into skbuff.c and tidied up surplus code
+ * Alan Cox : for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
+ * Alan Cox : kfree_s calls now are kfree_skbmem so we can track skb resources
+ * Alan Cox : Supports socket option broadcast now as does udp. Packet and raw need fixing.
+ * Alan Cox : Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
+ * Rick Sladkey : Relaxed UDP rules for matching packets.
+ * C.E.Hawkins : IFF_PROMISC/SIOCGHWADDR support
+ * Pauline Middelink : identd support
+ * Alan Cox : Fixed connect() taking signals I think.
+ * Alan Cox : SO_LINGER supported
+ * Alan Cox : Error reporting fixes
+ * Anonymous : inet_create tidied up (sk->reuse setting)
+ * Alan Cox : inet sockets don't set sk->type!
+ * Alan Cox : Split socket option code
+ * Alan Cox : Callbacks
+ * Alan Cox : Nagle flag for Charles & Johannes stuff
+ * Alex : Removed restriction on inet fioctl
+ * Alan Cox : Splitting INET from NET core
+ * Alan Cox : Fixed bogus SO_TYPE handling in getsockopt()
+ * Adam Caldwell : Missing return in SO_DONTROUTE/SO_DEBUG code
+ * Alan Cox : Split IP from generic code
+ * Alan Cox : New kfree_skbmem()
+ * Alan Cox : Make SO_DEBUG superuser only.
+ * Alan Cox : Allow anyone to clear SO_DEBUG
+ * (compatibility fix)
+ * Alan Cox : Added optimistic memory grabbing for AF_UNIX throughput.
+ * Alan Cox : Allocator for a socket is settable.
+ * Alan Cox : SO_ERROR includes soft errors.
+ * Alan Cox : Allow NULL arguments on some SO_ opts
+ * Alan Cox : Generic socket allocation to make hooks
+ * easier (suggested by Craig Metz).
+ * Michael Pall : SO_ERROR returns positive errno again
+ * Steve Whitehouse: Added default destructor to free
+ * protocol private data.
+ * Steve Whitehouse: Added various other default routines
+ * common to several socket families.
+ * Chris Evans : Call suser() check last on F_SETOWN
+ * Jay Schulist : Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
+ * Andi Kleen : Add sock_kmalloc()/sock_kfree_s()
+ * Andi Kleen : Fix write_space callback
+ *
+ * To Fix:
+ *
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/poll.h>
+#include <linux/init.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/arp.h>
+#include <net/rarp.h>
+#include <net/route.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/raw.h>
+#include <net/icmp.h>
+#include <linux/ipsec.h>
+
+#ifdef CONFIG_FILTER
+#include <linux/filter.h>
+#endif
+
+#define min(a,b) ((a)<(b)?(a):(b))
+
+/* Run time adjustable parameters. */
+__u32 sysctl_wmem_max = SK_WMEM_MAX;
+__u32 sysctl_rmem_max = SK_RMEM_MAX;
+__u32 sysctl_wmem_default = SK_WMEM_MAX;
+__u32 sysctl_rmem_default = SK_RMEM_MAX;
+
+/* Maximal space eaten by iovec or ancilliary data plus some space */
+int sysctl_optmem_max = sizeof(unsigned long)*(2*UIO_MAXIOV + 512);
+
+/*
+ * This is meant for all protocols to use and covers goings on
+ * at the socket level. Everything here is generic.
+ */
+
+int sock_setsockopt(struct socket *sock, int level, int optname,
+ char *optval, int optlen)
+{
+ struct sock *sk=sock->sk;
+#ifdef CONFIG_FILTER
+ struct sk_filter *filter;
+#endif
+ int val;
+ int valbool;
+ int err;
+ struct linger ling;
+ int ret = 0;
+
+ /*
+ * Options without arguments
+ */
+
+#ifdef SO_DONTLINGER /* Compatibility item... */
+ switch(optname)
+ {
+ case SO_DONTLINGER:
+ sk->linger=0;
+ return 0;
+ }
+#endif
+
+ if(optlen<sizeof(int))
+ return(-EINVAL);
+
+ err = get_user(val, (int *)optval);
+ if (err)
+ return err;
+
+ valbool = val?1:0;
+
+ switch(optname)
+ {
+ case SO_DEBUG:
+ if(val && !capable(CAP_NET_ADMIN))
+ {
+ ret = -EACCES;
+ }
+ else
+ sk->debug=valbool;
+ break;
+ case SO_REUSEADDR:
+ sk->reuse = valbool;
+ break;
+ case SO_TYPE:
+ case SO_ERROR:
+ ret = -ENOPROTOOPT;
+ break;
+ case SO_DONTROUTE:
+ sk->localroute=valbool;
+ break;
+ case SO_BROADCAST:
+ sk->broadcast=valbool;
+ break;
+ case SO_SNDBUF:
+ /* Don't error on this BSD doesn't and if you think
+ about it this is right. Otherwise apps have to
+ play 'guess the biggest size' games. RCVBUF/SNDBUF
+ are treated in BSD as hints */
+
+ if (val > sysctl_wmem_max)
+ val = sysctl_wmem_max;
+
+ sk->sndbuf = max(val*2,2048);
+
+ /*
+ * Wake up sending tasks if we
+ * upped the value.
+ */
+ sk->write_space(sk);
+ break;
+
+ case SO_RCVBUF:
+ /* Don't error on this BSD doesn't and if you think
+ about it this is right. Otherwise apps have to
+ play 'guess the biggest size' games. RCVBUF/SNDBUF
+ are treated in BSD as hints */
+
+ if (val > sysctl_rmem_max)
+ val = sysctl_rmem_max;
+
+ /* FIXME: is this lower bound the right one? */
+ sk->rcvbuf = max(val*2,256);
+ break;
+
+ case SO_KEEPALIVE:
+#ifdef CONFIG_INET
+ if (sk->protocol == IPPROTO_TCP)
+ {
+ tcp_set_keepalive(sk, valbool);
+ }
+#endif
+ sk->keepopen = valbool;
+ break;
+
+ case SO_OOBINLINE:
+ sk->urginline = valbool;
+ break;
+
+ case SO_NO_CHECK:
+ sk->no_check = valbool;
+ break;
+
+ case SO_PRIORITY:
+ if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
+ sk->priority = val;
+ else
+ return(-EPERM);
+ break;
+
+ case SO_LINGER:
+ if(optlen<sizeof(ling))
+ return -EINVAL; /* 1003.1g */
+ err = copy_from_user(&ling,optval,sizeof(ling));
+ if (err)
+ {
+ ret = -EFAULT;
+ break;
+ }
+ if(ling.l_onoff==0)
+ sk->linger=0;
+ else
+ {
+ sk->lingertime=ling.l_linger;
+ sk->linger=1;
+ }
+ break;
+
+ case SO_BSDCOMPAT:
+ sk->bsdism = valbool;
+ break;
+
+ case SO_PASSCRED:
+ sock->passcred = valbool;
+ break;
+
+
+#ifdef CONFIG_NETDEVICES
+ case SO_BINDTODEVICE:
+ {
+ char devname[IFNAMSIZ];
+
+ /* Sorry... */
+ if (!capable(CAP_NET_RAW))
+ return -EPERM;
+
+ /* Bind this socket to a particular device like "eth0",
+ * as specified in the passed interface name. If the
+ * name is "" or the option length is zero the socket
+ * is not bound.
+ */
+
+ if (!valbool) {
+ sk->bound_dev_if = 0;
+ } else {
+ if (optlen > IFNAMSIZ)
+ optlen = IFNAMSIZ;
+ if (copy_from_user(devname, optval, optlen))
+ return -EFAULT;
+
+ /* Remove any cached route for this socket. */
+ lock_sock(sk);
+ dst_release(xchg(&sk->dst_cache, NULL));
+ release_sock(sk);
+
+ if (devname[0] == '\0') {
+ sk->bound_dev_if = 0;
+ } else {
+ struct device *dev = dev_get(devname);
+ if (!dev)
+ return -EINVAL;
+ sk->bound_dev_if = dev->ifindex;
+ }
+ return 0;
+ }
+ }
+#endif
+
+
+#ifdef CONFIG_FILTER
+ case SO_ATTACH_FILTER:
+ ret = -EINVAL;
+ if (optlen == sizeof(struct sock_fprog)) {
+ struct sock_fprog fprog;
+
+ ret = -EFAULT;
+ if (copy_from_user(&fprog, optval, sizeof(fprog)))
+ break;
+
+ ret = sk_attach_filter(&fprog, sk);
+ }
+ break;
+
+ case SO_DETACH_FILTER:
+ filter = sk->filter;
+ if(filter) {
+ sk->filter = NULL;
+ synchronize_bh();
+ sk_filter_release(sk, filter);
+ return 0;
+ }
+ return -ENOENT;
+#endif
+ /* We implement the SO_SNDLOWAT etc to
+ not be settable (1003.1g 5.3) */
+ default:
+ return(-ENOPROTOOPT);
+ }
+ return ret;
+}
+
+
+int sock_getsockopt(struct socket *sock, int level, int optname,
+ char *optval, int *optlen)
+{
+ struct sock *sk = sock->sk;
+
+ union
+ {
+ int val;
+ struct linger ling;
+ struct timeval tm;
+ } v;
+
+ int lv=sizeof(int),len;
+
+ if(get_user(len,optlen))
+ return -EFAULT;
+
+ switch(optname)
+ {
+ case SO_DEBUG:
+ v.val = sk->debug;
+ break;
+
+ case SO_DONTROUTE:
+ v.val = sk->localroute;
+ break;
+
+ case SO_BROADCAST:
+ v.val= sk->broadcast;
+ break;
+
+ case SO_SNDBUF:
+ v.val=sk->sndbuf;
+ break;
+
+ case SO_RCVBUF:
+ v.val =sk->rcvbuf;
+ break;
+
+ case SO_REUSEADDR:
+ v.val = sk->reuse;
+ break;
+
+ case SO_KEEPALIVE:
+ v.val = sk->keepopen;
+ break;
+
+ case SO_TYPE:
+ v.val = sk->type;
+ break;
+
+ case SO_ERROR:
+ v.val = -sock_error(sk);
+ if(v.val==0)
+ v.val=xchg(&sk->err_soft,0);
+ break;
+
+ case SO_OOBINLINE:
+ v.val = sk->urginline;
+ break;
+
+ case SO_NO_CHECK:
+ v.val = sk->no_check;
+ break;
+
+ case SO_PRIORITY:
+ v.val = sk->priority;
+ break;
+
+ case SO_LINGER:
+ lv=sizeof(v.ling);
+ v.ling.l_onoff=sk->linger;
+ v.ling.l_linger=sk->lingertime;
+ break;
+
+ case SO_BSDCOMPAT:
+ v.val = sk->bsdism;
+ break;
+
+ case SO_RCVTIMEO:
+ case SO_SNDTIMEO:
+ lv=sizeof(struct timeval);
+ v.tm.tv_sec=0;
+ v.tm.tv_usec=0;
+ break;
+
+ case SO_RCVLOWAT:
+ case SO_SNDLOWAT:
+ v.val=1;
+ break;
+
+ case SO_PASSCRED:
+ v.val = sock->passcred;
+ break;
+
+ case SO_PEERCRED:
+ lv=sizeof(sk->peercred);
+ len=min(len, lv);
+ if(copy_to_user((void*)optval, &sk->peercred, len))
+ return -EFAULT;
+ goto lenout;
+
+ default:
+ return(-ENOPROTOOPT);
+ }
+ len=min(len,lv);
+ if(copy_to_user(optval,&v,len))
+ return -EFAULT;
+lenout:
+ if(put_user(len, optlen))
+ return -EFAULT;
+ return 0;
+}
+
+static kmem_cache_t *sk_cachep;
+
+/*
+ * All socket objects are allocated here. This is for future
+ * usage.
+ */
+
+struct sock *sk_alloc(int family, int priority, int zero_it)
+{
+ struct sock *sk = kmem_cache_alloc(sk_cachep, priority);
+
+ if(sk) {
+ if (zero_it)
+ memset(sk, 0, sizeof(struct sock));
+ sk->family = family;
+ }
+
+ return sk;
+}
+
+void sk_free(struct sock *sk)
+{
+#ifdef CONFIG_FILTER
+ struct sk_filter *filter;
+#endif
+ if (sk->destruct)
+ sk->destruct(sk);
+
+#ifdef CONFIG_FILTER
+ filter = sk->filter;
+ if (filter) {
+ sk_filter_release(sk, filter);
+ sk->filter = NULL;
+ }
+#endif
+
+ if (atomic_read(&sk->omem_alloc))
+ printk(KERN_DEBUG "sk_free: optmem leakage (%d bytes) detected.\n", atomic_read(&sk->omem_alloc));
+
+ kmem_cache_free(sk_cachep, sk);
+}
+
+void __init sk_init(void)
+{
+ sk_cachep = kmem_cache_create("sock", sizeof(struct sock), 0,
+ SLAB_HWCACHE_ALIGN, 0, 0);
+
+}
+
+/*
+ * Simple resource managers for sockets.
+ */
+
+
+/*
+ * Write buffer destructor automatically called from kfree_skb.
+ */
+void sock_wfree(struct sk_buff *skb)
+{
+ struct sock *sk = skb->sk;
+
+ /* In case it might be waiting for more memory. */
+ atomic_sub(skb->truesize, &sk->wmem_alloc);
+ sk->write_space(sk);
+}
+
+/*
+ * Read buffer destructor automatically called from kfree_skb.
+ */
+void sock_rfree(struct sk_buff *skb)
+{
+ struct sock *sk = skb->sk;
+
+ atomic_sub(skb->truesize, &sk->rmem_alloc);
+}
+
+
+/*
+ * Allocate a skb from the socket's send buffer.
+ */
+struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, int priority)
+{
+ if (force || atomic_read(&sk->wmem_alloc) < sk->sndbuf) {
+ struct sk_buff * skb = alloc_skb(size, priority);
+ if (skb) {
+ atomic_add(skb->truesize, &sk->wmem_alloc);
+ skb->destructor = sock_wfree;
+ skb->sk = sk;
+ return skb;
+ }
+ }
+ return NULL;
+}
+
+/*
+ * Allocate a skb from the socket's receive buffer.
+ */
+struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, int priority)
+{
+ if (force || atomic_read(&sk->rmem_alloc) < sk->rcvbuf) {
+ struct sk_buff *skb = alloc_skb(size, priority);
+ if (skb) {
+ atomic_add(skb->truesize, &sk->rmem_alloc);
+ skb->destructor = sock_rfree;
+ skb->sk = sk;
+ return skb;
+ }
+ }
+ return NULL;
+}
+
+/*
+ * Allocate a memory block from the socket's option memory buffer.
+ */
+void *sock_kmalloc(struct sock *sk, int size, int priority)
+{
+ if (atomic_read(&sk->omem_alloc)+size < sysctl_optmem_max) {
+ void *mem;
+ /* First do the add, to avoid the race if kmalloc
+ * might sleep.
+ */
+ atomic_add(size, &sk->omem_alloc);
+ mem = kmalloc(size, priority);
+ if (mem)
+ return mem;
+ atomic_sub(size, &sk->omem_alloc);
+ }
+ return NULL;
+}
+
+/*
+ * Free an option memory block.
+ */
+void sock_kfree_s(struct sock *sk, void *mem, int size)
+{
+ kfree_s(mem, size);
+ atomic_sub(size, &sk->omem_alloc);
+}
+
+/* FIXME: this is insane. We are trying suppose to be controlling how
+ * how much space we have for data bytes, not packet headers.
+ * This really points out that we need a better system for doing the
+ * receive buffer. -- erics
+ * WARNING: This is currently ONLY used in tcp. If you need it else where
+ * this will probably not be what you want. Possibly these two routines
+ * should move over to the ipv4 directory.
+ */
+unsigned long sock_rspace(struct sock *sk)
+{
+ int amt = 0;
+
+ if (sk != NULL) {
+ /* This used to have some bizarre complications that
+ * to attempt to reserve some amount of space. This doesn't
+ * make sense, since the number returned here does not
+ * actually reflect allocated space, but rather the amount
+ * of space we committed to. We gamble that we won't
+ * run out of memory, and returning a smaller number does
+ * not change the gamble. If we lose the gamble tcp still
+ * works, it may just slow down for retransmissions.
+ */
+ amt = sk->rcvbuf - atomic_read(&sk->rmem_alloc);
+ if (amt < 0)
+ amt = 0;
+ }
+ return amt;
+}
+
+
+/* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
+ I think, these locks should be removed for datagram sockets.
+ */
+static void sock_wait_for_wmem(struct sock * sk)
+{
+ struct wait_queue wait = { current, NULL };
+
+ sk->socket->flags &= ~SO_NOSPACE;
+ add_wait_queue(sk->sleep, &wait);
+ for (;;) {
+ if (signal_pending(current))
+ break;
+ current->state = TASK_INTERRUPTIBLE;
+ if (atomic_read(&sk->wmem_alloc) < sk->sndbuf)
+ break;
+ if (sk->shutdown & SEND_SHUTDOWN)
+ break;
+ if (sk->err)
+ break;
+ schedule();
+ }
+ current->state = TASK_RUNNING;
+ remove_wait_queue(sk->sleep, &wait);
+}
+
+
+/*
+ * Generic send/receive buffer handlers
+ */
+
+struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
+ unsigned long fallback, int noblock, int *errcode)
+{
+ int err;
+ struct sk_buff *skb;
+
+ while (1) {
+ unsigned long try_size = size;
+
+ err = sock_error(sk);
+ if (err != 0)
+ goto failure;
+
+ /*
+ * We should send SIGPIPE in these cases according to
+ * 1003.1g draft 6.4. If we (the user) did a shutdown()
+ * call however we should not.
+ *
+ * Note: This routine isnt just used for datagrams and
+ * anyway some datagram protocols have a notion of
+ * close down.
+ */
+
+ err = -EPIPE;
+ if (sk->shutdown&SEND_SHUTDOWN)
+ goto failure;
+
+ if (fallback) {
+ /* The buffer get won't block, or use the atomic queue.
+ * It does produce annoying no free page messages still.
+ */
+ skb = sock_wmalloc(sk, size, 0, GFP_BUFFER);
+ if (skb)
+ break;
+ try_size = fallback;
+ }
+ skb = sock_wmalloc(sk, try_size, 0, sk->allocation);
+ if (skb)
+ break;
+
+ /*
+ * This means we have too many buffers for this socket already.
+ */
+
+ sk->socket->flags |= SO_NOSPACE;
+ err = -EAGAIN;
+ if (noblock)
+ goto failure;
+ err = -ERESTARTSYS;
+ if (signal_pending(current))
+ goto failure;
+ sock_wait_for_wmem(sk);
+ }
+
+ return skb;
+
+failure:
+ *errcode = err;
+ return NULL;
+}
+
+
+void __release_sock(struct sock *sk)
+{
+#ifdef CONFIG_INET
+ if (!sk->prot || !sk->backlog_rcv)
+ return;
+
+ /* See if we have any packets built up. */
+ start_bh_atomic();
+ while (!skb_queue_empty(&sk->back_log)) {
+ struct sk_buff * skb = sk->back_log.next;
+ __skb_unlink(skb, &sk->back_log);
+ sk->backlog_rcv(sk, skb);
+ }
+ end_bh_atomic();
+#endif
+}
+
+
+/*
+ * Generic socket manager library. Most simpler socket families
+ * use this to manage their socket lists. At some point we should
+ * hash these. By making this generic we get the lot hashed for free.
+ */
+
+void sklist_remove_socket(struct sock **list, struct sock *sk)
+{
+ struct sock *s;
+
+ start_bh_atomic();
+
+ s= *list;
+ if(s==sk)
+ {
+ *list = s->next;
+ end_bh_atomic();
+ return;
+ }
+ while(s && s->next)
+ {
+ if(s->next==sk)
+ {
+ s->next=sk->next;
+ break;
+ }
+ s=s->next;
+ }
+ end_bh_atomic();
+}
+
+void sklist_insert_socket(struct sock **list, struct sock *sk)
+{
+ start_bh_atomic();
+ sk->next= *list;
+ *list=sk;
+ end_bh_atomic();
+}
+
+/*
+ * This is only called from user mode. Thus it protects itself against
+ * interrupt users but doesn't worry about being called during work.
+ * Once it is removed from the queue no interrupt or bottom half will
+ * touch it and we are (fairly 8-) ) safe.
+ */
+
+void sklist_destroy_socket(struct sock **list, struct sock *sk);
+
+/*
+ * Handler for deferred kills.
+ */
+
+static void sklist_destroy_timer(unsigned long data)
+{
+ struct sock *sk=(struct sock *)data;
+ sklist_destroy_socket(NULL,sk);
+}
+
+/*
+ * Destroy a socket. We pass NULL for a list if we know the
+ * socket is not on a list.
+ */
+
+void sklist_destroy_socket(struct sock **list,struct sock *sk)
+{
+ struct sk_buff *skb;
+ if(list)
+ sklist_remove_socket(list, sk);
+
+ while((skb=skb_dequeue(&sk->receive_queue))!=NULL)
+ {
+ kfree_skb(skb);
+ }
+
+ if(atomic_read(&sk->wmem_alloc) == 0 &&
+ atomic_read(&sk->rmem_alloc) == 0 &&
+ sk->dead)
+ {
+ sk_free(sk);
+ }
+ else
+ {
+ /*
+ * Someone is using our buffers still.. defer
+ */
+ init_timer(&sk->timer);
+ sk->timer.expires=jiffies+SOCK_DESTROY_TIME;
+ sk->timer.function=sklist_destroy_timer;
+ sk->timer.data = (unsigned long)sk;
+ add_timer(&sk->timer);
+ }
+}
+
+/*
+ * Set of default routines for initialising struct proto_ops when
+ * the protocol does not support a particular function. In certain
+ * cases where it makes no sense for a protocol to have a "do nothing"
+ * function, some default processing is provided.
+ */
+
+int sock_no_dup(struct socket *newsock, struct socket *oldsock)
+{
+ struct sock *sk = oldsock->sk;
+
+ return net_families[sk->family]->create(newsock, sk->protocol);
+}
+
+int sock_no_release(struct socket *sock, struct socket *peersock)
+{
+ return 0;
+}
+
+int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
+{
+ return -EOPNOTSUPP;
+}
+
+int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
+ int len, int flags)
+{
+ return -EOPNOTSUPP;
+}
+
+int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
+{
+ return -EOPNOTSUPP;
+}
+
+int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
+{
+ return -EOPNOTSUPP;
+}
+
+int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
+ int *len, int peer)
+{
+ return -EOPNOTSUPP;
+}
+
+unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt)
+{
+ return 0;
+}
+
+int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+ return -EOPNOTSUPP;
+}
+
+int sock_no_listen(struct socket *sock, int backlog)
+{
+ return -EOPNOTSUPP;
+}
+
+int sock_no_shutdown(struct socket *sock, int how)
+{
+ return -EOPNOTSUPP;
+}
+
+int sock_no_setsockopt(struct socket *sock, int level, int optname,
+ char *optval, int optlen)
+{
+ return -EOPNOTSUPP;
+}
+
+int sock_no_getsockopt(struct socket *sock, int level, int optname,
+ char *optval, int *optlen)
+{
+ return -EOPNOTSUPP;
+}
+
+/*
+ * Note: if you add something that sleeps here then change sock_fcntl()
+ * to do proper fd locking.
+ */
+int sock_no_fcntl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+ struct sock *sk = sock->sk;
+
+ switch(cmd)
+ {
+ case F_SETOWN:
+ /*
+ * This is a little restrictive, but it's the only
+ * way to make sure that you can't send a sigurg to
+ * another process.
+ */
+ if (current->pgrp != -arg &&
+ current->pid != arg &&
+ !capable(CAP_KILL)) return(-EPERM);
+ sk->proc = arg;
+ return(0);
+ case F_GETOWN:
+ return(sk->proc);
+ default:
+ return(-EINVAL);
+ }
+}
+
+int sock_no_sendmsg(struct socket *sock, struct msghdr *m, int flags,
+ struct scm_cookie *scm)
+{
+ return -EOPNOTSUPP;
+}
+
+int sock_no_recvmsg(struct socket *sock, struct msghdr *m, int flags,
+ struct scm_cookie *scm)
+{
+ return -EOPNOTSUPP;
+}
+
+
+
+/*
+ * Default Socket Callbacks
+ */
+
+void sock_def_wakeup(struct sock *sk)
+{
+ if(!sk->dead)
+ wake_up_interruptible(sk->sleep);
+}
+
+void sock_def_error_report(struct sock *sk)
+{
+ if (!sk->dead) {
+ wake_up_interruptible(sk->sleep);
+ sock_wake_async(sk->socket,0);
+ }
+}
+
+void sock_def_readable(struct sock *sk, int len)
+{
+ if(!sk->dead) {
+ wake_up_interruptible(sk->sleep);
+ sock_wake_async(sk->socket,1);
+ }
+}
+
+void sock_def_write_space(struct sock *sk)
+{
+ /* Do not wake up a writer until he can make "significant"
+ * progress. --DaveM
+ */
+ if(!sk->dead &&
+ ((atomic_read(&sk->wmem_alloc) << 1) <= sk->sndbuf)) {
+ wake_up_interruptible(sk->sleep);
+
+ /* Should agree with poll, otherwise some programs break */
+ if (sock_writeable(sk))
+ sock_wake_async(sk->socket, 2);
+ }
+}
+
+void sock_def_destruct(struct sock *sk)
+{
+ if (sk->protinfo.destruct_hook)
+ kfree(sk->protinfo.destruct_hook);
+}
+
+void sock_init_data(struct socket *sock, struct sock *sk)
+{
+ skb_queue_head_init(&sk->receive_queue);
+ skb_queue_head_init(&sk->write_queue);
+ skb_queue_head_init(&sk->back_log);
+ skb_queue_head_init(&sk->error_queue);
+
+ init_timer(&sk->timer);
+
+ sk->allocation = GFP_KERNEL;
+ sk->rcvbuf = sysctl_rmem_default;
+ sk->sndbuf = sysctl_wmem_default;
+ sk->state = TCP_CLOSE;
+ sk->zapped = 1;
+ sk->socket = sock;
+
+ if(sock)
+ {
+ sk->type = sock->type;
+ sk->sleep = &sock->wait;
+ sock->sk = sk;
+ }
+
+ sk->state_change = sock_def_wakeup;
+ sk->data_ready = sock_def_readable;
+ sk->write_space = sock_def_write_space;
+ sk->error_report = sock_def_error_report;
+ sk->destruct = sock_def_destruct;
+
+ sk->peercred.pid = 0;
+ sk->peercred.uid = -1;
+ sk->peercred.gid = -1;
+
+}
diff --git a/pfinet/linux-src/net/core/sysctl_net_core.c b/pfinet/linux-src/net/core/sysctl_net_core.c
new file mode 100644
index 00000000..446ca145
--- /dev/null
+++ b/pfinet/linux-src/net/core/sysctl_net_core.c
@@ -0,0 +1,61 @@
+/* -*- linux-c -*-
+ * sysctl_net_core.c: sysctl interface to net core subsystem.
+ *
+ * Begun April 1, 1996, Mike Shaver.
+ * Added /proc/sys/net/core directory entry (empty =) ). [MS]
+ */
+
+#include <linux/mm.h>
+#include <linux/sysctl.h>
+#include <linux/config.h>
+
+#ifdef CONFIG_SYSCTL
+
+extern int netdev_max_backlog;
+extern int netdev_fastroute;
+extern int net_msg_cost;
+extern int net_msg_burst;
+
+extern __u32 sysctl_wmem_max;
+extern __u32 sysctl_rmem_max;
+extern __u32 sysctl_wmem_default;
+extern __u32 sysctl_rmem_default;
+
+extern int sysctl_core_destroy_delay;
+extern int sysctl_optmem_max;
+
+ctl_table core_table[] = {
+#ifdef CONFIG_NET
+ {NET_CORE_WMEM_MAX, "wmem_max",
+ &sysctl_wmem_max, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_CORE_RMEM_MAX, "rmem_max",
+ &sysctl_rmem_max, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_CORE_WMEM_DEFAULT, "wmem_default",
+ &sysctl_wmem_default, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_CORE_RMEM_DEFAULT, "rmem_default",
+ &sysctl_rmem_default, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_CORE_MAX_BACKLOG, "netdev_max_backlog",
+ &netdev_max_backlog, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+#ifdef CONFIG_NET_FASTROUTE
+ {NET_CORE_FASTROUTE, "netdev_fastroute",
+ &netdev_fastroute, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+#endif
+ {NET_CORE_MSG_COST, "message_cost",
+ &net_msg_cost, sizeof(int), 0644, NULL,
+ &proc_dointvec_jiffies},
+ {NET_CORE_MSG_BURST, "message_burst",
+ &net_msg_burst, sizeof(int), 0644, NULL,
+ &proc_dointvec_jiffies},
+ {NET_CORE_OPTMEM_MAX, "optmem_max",
+ &sysctl_optmem_max, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+#endif /* CONFIG_NET */
+ { 0 }
+};
+#endif
diff --git a/pfinet/linux-src/net/core/utils.c b/pfinet/linux-src/net/core/utils.c
new file mode 100644
index 00000000..415926b8
--- /dev/null
+++ b/pfinet/linux-src/net/core/utils.c
@@ -0,0 +1,66 @@
+/*
+ * Generic address resultion entity
+ *
+ * Authors:
+ * net_random Alan Cox
+ * net_ratelimit Andy Kleen
+ *
+ * Created by Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+
+static unsigned long net_rand_seed = 152L;
+
+unsigned long net_random(void)
+{
+ net_rand_seed=net_rand_seed*69069L+1;
+ return net_rand_seed^jiffies;
+}
+
+void net_srandom(unsigned long entropy)
+{
+ net_rand_seed ^= entropy;
+ net_random();
+}
+
+int net_msg_cost = 5*HZ;
+int net_msg_burst = 10*5*HZ;
+
+/*
+ * This enforces a rate limit: not more than one kernel message
+ * every 5secs to make a denial-of-service attack impossible.
+ *
+ * All warning printk()s should be guarded by this function.
+ */
+int net_ratelimit(void)
+{
+ static unsigned long toks = 10*5*HZ;
+ static unsigned long last_msg;
+ static int missed;
+ unsigned long now = jiffies;
+
+ toks += now - xchg(&last_msg, now);
+ if (toks > net_msg_burst)
+ toks = net_msg_burst;
+ if (toks >= net_msg_cost) {
+ toks -= net_msg_cost;
+ if (missed)
+ printk(KERN_WARNING "NET: %d messages suppressed.\n", missed);
+ missed = 0;
+ return 1;
+ }
+ missed++;
+ return 0;
+}