From c7923f6aa252a29ccb4f16bd91469c9000a2bd94 Mon Sep 17 00:00:00 2001
From: "Michael I. Bushnell" <mib@gnu.org>
Date: Wed, 12 Jul 1995 15:42:49 +0000
Subject: Initial revision

---
 pfinet/linux-inet/af_inet.c | 1564 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 1564 insertions(+)
 create mode 100644 pfinet/linux-inet/af_inet.c

(limited to 'pfinet/linux-inet/af_inet.c')

diff --git a/pfinet/linux-inet/af_inet.c b/pfinet/linux-inet/af_inet.c
new file mode 100644
index 00000000..a82b59c1
--- /dev/null
+++ b/pfinet/linux-inet/af_inet.c
@@ -0,0 +1,1564 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		AF_INET protocol family socket handler.
+ *
+ * Version:	@(#)af_inet.c	(from sock.c) 1.0.17	06/02/93
+ *
+ * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
+ *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *		Florian La Roche, <flla@stud.uni-sb.de>
+ *		Alan Cox, <A.Cox@swansea.ac.uk>
+ *
+ * Changes (see also sock.c)
+ *
+ *		A.N.Kuznetsov	:	Socket death error in accept().
+ *		John Richardson :	Fix non blocking error in connect()
+ *					so sockets that fail to connect
+ *					don't return -EINPROGRESS.
+ *		Alan Cox	:	Asynchronous I/O support
+ *		Alan Cox	:	Keep correct socket pointer on sock structures
+ *					when accept() ed
+ *		Alan Cox	:	Semantics of SO_LINGER aren't state moved
+ *					to close when you look carefully. With
+ *					this fixed and the accept bug fixed 
+ *					some RPC stuff seems happier.
+ *		Niibe Yutaka	:	4.4BSD style write async I/O
+ *		Alan Cox, 
+ *		Tony Gale 	:	Fixed reuse semantics.
+ *		Alan Cox	:	bind() shouldn't abort existing but dead
+ *					sockets. Stops FTP netin:.. I hope.
+ *		Alan Cox	:	bind() works correctly for RAW sockets. Note
+ *					that FreeBSD at least is broken in this respect
+ *					so be careful with compatibility tests...
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+
+#include <asm/segment.h>
+#include <asm/system.h>
+
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include "ip.h"
+#include "protocol.h"
+#include "arp.h"
+#include "rarp.h"
+#include "route.h"
+#include "tcp.h"
+#include "udp.h"
+#include <linux/skbuff.h>
+#include "sock.h"
+#include "raw.h"
+#include "icmp.h"
+
+#define min(a,b)	((a)<(b)?(a):(b))
+
+extern struct proto packet_prot;
+
+
+/*
+ *	See if a socket number is in use.
+ */
+ 
+static int sk_inuse(struct proto *prot, int num)
+{
+	struct sock *sk;
+
+	for(sk = prot->sock_array[num & (SOCK_ARRAY_SIZE -1 )];
+		sk != NULL;  sk=sk->next) 
+	{
+		if (sk->num == num) 
+			return(1);
+	}
+	return(0);
+}
+
+
+/*
+ *	Pick a new socket number
+ */
+
+unsigned short get_new_socknum(struct proto *prot, unsigned short base)
+{
+	static int start=0;
+
+	/*
+	 * Used to cycle through the port numbers so the
+	 * chances of a confused connection drop.
+	 */
+	 
+	int i, j;
+	int best = 0;
+	int size = 32767; /* a big num. */
+	struct sock *sk;
+
+	if (base == 0) 
+		base = PROT_SOCK+1+(start % 1024);
+	if (base <= PROT_SOCK) 
+	{
+		base += PROT_SOCK+(start % 1024);
+	}
+
+	/* Now look through the entire array and try to find an empty ptr. */
+	for(i=0; i < SOCK_ARRAY_SIZE; i++) 
+	{
+		j = 0;
+		sk = prot->sock_array[(i+base+1) &(SOCK_ARRAY_SIZE -1)];
+		while(sk != NULL) 
+		{
+			sk = sk->next;
+			j++;
+		}
+		if (j == 0) 
+		{
+			start =(i+1+start )%1024;
+			return(i+base+1);
+		}
+		if (j < size) 
+		{
+			best = i;
+			size = j;
+		}
+	}
+
+	/* Now make sure the one we want is not in use. */
+
+	while(sk_inuse(prot, base +best+1)) 
+	{
+		best += SOCK_ARRAY_SIZE;
+	}
+	return(best+base+1);
+}
+
+/*
+ *	Add a socket into the socket tables by number.
+ */
+
+void put_sock(unsigned short num, struct sock *sk)
+{
+	struct sock *sk1;
+	struct sock *sk2;
+	int mask;
+	unsigned long flags;
+
+	sk->num = num;
+	sk->next = NULL;
+	num = num &(SOCK_ARRAY_SIZE -1);
+
+	/* We can't have an interrupt re-enter here. */
+	save_flags(flags);
+	cli();
+
+	sk->prot->inuse += 1;
+	if (sk->prot->highestinuse < sk->prot->inuse)
+		sk->prot->highestinuse = sk->prot->inuse;
+
+	if (sk->prot->sock_array[num] == NULL) 
+	{
+		sk->prot->sock_array[num] = sk;
+		restore_flags(flags);
+		return;
+	}
+	restore_flags(flags);
+	for(mask = 0xff000000; mask != 0xffffffff; mask = (mask >> 8) | mask) 
+	{
+		if ((mask & sk->saddr) &&
+		    (mask & sk->saddr) != (mask & 0xffffffff)) 
+		{
+			mask = mask << 8;
+			break;
+		}
+	}
+	cli();
+	sk1 = sk->prot->sock_array[num];
+	for(sk2 = sk1; sk2 != NULL; sk2=sk2->next) 
+	{
+		if (!(sk2->saddr & mask)) 
+		{
+			if (sk2 == sk1) 
+			{
+				sk->next = sk->prot->sock_array[num];
+				sk->prot->sock_array[num] = sk;
+				sti();
+				return;
+			}
+			sk->next = sk2;
+			sk1->next= sk;
+			sti();
+			return;
+		}
+		sk1 = sk2;
+	}
+
+	/* Goes at the end. */
+	sk->next = NULL;
+	sk1->next = sk;
+	sti();
+}
+
+/*
+ *	Remove a socket from the socket tables.
+ */
+
+static void remove_sock(struct sock *sk1)
+{
+	struct sock *sk2;
+	unsigned long flags;
+
+	if (!sk1->prot) 
+	{
+		printk("sock.c: remove_sock: sk1->prot == NULL\n");
+		return;
+	}
+
+	/* We can't have this changing out from under us. */
+	save_flags(flags);
+	cli();
+	sk2 = sk1->prot->sock_array[sk1->num &(SOCK_ARRAY_SIZE -1)];
+	if (sk2 == sk1) 
+	{
+		sk1->prot->inuse -= 1;
+		sk1->prot->sock_array[sk1->num &(SOCK_ARRAY_SIZE -1)] = sk1->next;
+		restore_flags(flags);
+		return;
+	}
+
+	while(sk2 && sk2->next != sk1) 
+	{
+		sk2 = sk2->next;
+	}
+
+	if (sk2) 
+	{
+		sk1->prot->inuse -= 1;
+		sk2->next = sk1->next;
+		restore_flags(flags);
+		return;
+	}
+	restore_flags(flags);
+}
+
+/*
+ *	Destroy an AF_INET socket
+ */
+ 
+void destroy_sock(struct sock *sk)
+{
+	struct sk_buff *skb;
+
+  	sk->inuse = 1;			/* just to be safe. */
+
+  	/* In case it's sleeping somewhere. */
+  	if (!sk->dead) 
+  		sk->write_space(sk);
+
+  	remove_sock(sk);
+  
+  	/* Now we can no longer get new packets. */
+  	delete_timer(sk);
+  	/* Nor send them */
+	del_timer(&sk->retransmit_timer);
+	
+	while ((skb = tcp_dequeue_partial(sk)) != NULL) {
+		IS_SKB(skb);
+		kfree_skb(skb, FREE_WRITE);
+	}
+
+	/* Cleanup up the write buffer. */
+  	while((skb = skb_dequeue(&sk->write_queue)) != NULL) {
+		IS_SKB(skb);
+		kfree_skb(skb, FREE_WRITE);
+  	}
+  	
+  	/*
+  	 *	Don't discard received data until the user side kills its
+  	 *	half of the socket.
+  	 */
+
+	if (sk->dead) 
+	{
+  		while((skb=skb_dequeue(&sk->receive_queue))!=NULL) 
+  		{
+		/*
+		 * This will take care of closing sockets that were
+		 * listening and didn't accept everything.
+		 */
+			if (skb->sk != NULL && skb->sk != sk) 
+			{
+				IS_SKB(skb);
+				skb->sk->dead = 1;
+				skb->sk->prot->close(skb->sk, 0);
+			}
+			IS_SKB(skb);
+			kfree_skb(skb, FREE_READ);
+		}
+	}	
+
+	/* Now we need to clean up the send head. */
+	cli();
+	for(skb = sk->send_head; skb != NULL; )
+	{
+		struct sk_buff *skb2;
+
+		/*
+		 * We need to remove skb from the transmit queue,
+		 * or maybe the arp queue.
+		 */
+		if (skb->next  && skb->prev) {
+/*			printk("destroy_sock: unlinked skb\n");*/
+			IS_SKB(skb);
+			skb_unlink(skb);
+		}
+		skb->dev = NULL;
+		skb2 = skb->link3;
+		kfree_skb(skb, FREE_WRITE);
+		skb = skb2;
+	}
+	sk->send_head = NULL;
+	sti();
+
+  	/* And now the backlog. */
+  	while((skb=skb_dequeue(&sk->back_log))!=NULL) 
+  	{
+		/* this should never happen. */
+/*		printk("cleaning back_log\n");*/
+		kfree_skb(skb, FREE_READ);
+	}
+
+	/* Now if it has a half accepted/ closed socket. */
+	if (sk->pair) 
+	{
+		sk->pair->dead = 1;
+		sk->pair->prot->close(sk->pair, 0);
+		sk->pair = NULL;
+  	}
+
+	/*
+	 * Now if everything is gone we can free the socket
+	 * structure, otherwise we need to keep it around until
+	 * everything is gone.
+	 */
+
+	  if (sk->dead && sk->rmem_alloc == 0 && sk->wmem_alloc == 0) 
+	  {
+		kfree_s((void *)sk,sizeof(*sk));
+	  } 
+	  else 
+	  {
+		/* this should never happen. */
+		/* actually it can if an ack has just been sent. */
+		sk->destroy = 1;
+		sk->ack_backlog = 0;
+		sk->inuse = 0;
+		reset_timer(sk, TIME_DESTROY, SOCK_DESTROY_TIME);
+  	}
+}
+
+/*
+ *	The routines beyond this point handle the behaviour of an AF_INET
+ *	socket object. Mostly it punts to the subprotocols of IP to do
+ *	the work.
+ */
+ 
+static int inet_fcntl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+	struct sock *sk;
+
+	sk = (struct sock *) sock->data;
+
+	switch(cmd) 
+	{
+		case F_SETOWN:
+			/*
+			 * This is a little restrictive, but it's the only
+			 * way to make sure that you can't send a sigurg to
+			 * another process.
+			 */
+			if (!suser() && current->pgrp != -arg &&
+				current->pid != arg) return(-EPERM);
+			sk->proc = arg;
+			return(0);
+		case F_GETOWN:
+			return(sk->proc);
+		default:
+			return(-EINVAL);
+	}
+}
+
+/*
+ *	Set socket options on an inet socket.
+ */
+ 
+static int inet_setsockopt(struct socket *sock, int level, int optname,
+		    char *optval, int optlen)
+{
+  	struct sock *sk = (struct sock *) sock->data;  
+	if (level == SOL_SOCKET)
+		return sock_setsockopt(sk,level,optname,optval,optlen);
+	if (sk->prot->setsockopt==NULL)
+		return(-EOPNOTSUPP);
+	else
+		return sk->prot->setsockopt(sk,level,optname,optval,optlen);
+}
+
+/*
+ *	Get a socket option on an AF_INET socket.
+ */
+
+static int inet_getsockopt(struct socket *sock, int level, int optname,
+		    char *optval, int *optlen)
+{
+  	struct sock *sk = (struct sock *) sock->data;  	
+  	if (level == SOL_SOCKET) 
+  		return sock_getsockopt(sk,level,optname,optval,optlen);
+  	if(sk->prot->getsockopt==NULL)  	
+  		return(-EOPNOTSUPP);
+  	else
+  		return sk->prot->getsockopt(sk,level,optname,optval,optlen);
+}
+
+/*
+ *	Automatically bind an unbound socket.
+ */
+
+static int inet_autobind(struct sock *sk)
+{
+	/* We may need to bind the socket. */
+	if (sk->num == 0) 
+	{
+		sk->num = get_new_socknum(sk->prot, 0);
+		if (sk->num == 0) 
+			return(-EAGAIN);
+		put_sock(sk->num, sk);
+		sk->dummy_th.source = ntohs(sk->num);
+	}
+	return 0;
+}
+
+/*
+ *	Move a socket into listening state.
+ */
+ 
+static int inet_listen(struct socket *sock, int backlog)
+{
+	struct sock *sk = (struct sock *) sock->data;
+
+	if(inet_autobind(sk)!=0)
+		return -EAGAIN;
+
+	/* We might as well re use these. */ 
+	/*
+	 * note that the backlog is "unsigned char", so truncate it
+	 * somewhere. We might as well truncate it to what everybody
+	 * else does..
+	 */
+	if (backlog > 5)
+		backlog = 5;
+	sk->max_ack_backlog = backlog;
+	if (sk->state != TCP_LISTEN)
+	{
+		sk->ack_backlog = 0;
+		sk->state = TCP_LISTEN;
+	}
+	return(0);
+}
+
+/*
+ *	Default callbacks for user INET sockets. These just wake up
+ *	the user owning the socket.
+ */
+
+static void def_callback1(struct sock *sk)
+{
+	if(!sk->dead)
+		wake_up_interruptible(sk->sleep);
+}
+
+static void def_callback2(struct sock *sk,int len)
+{
+	if(!sk->dead)
+	{
+		wake_up_interruptible(sk->sleep);
+		sock_wake_async(sk->socket, 1);
+	}
+}
+
+static void def_callback3(struct sock *sk)
+{
+	if(!sk->dead)
+	{
+		wake_up_interruptible(sk->sleep);
+		sock_wake_async(sk->socket, 2);
+	}
+}
+
+/*
+ *	Create an inet socket.
+ *
+ *	FIXME: Gcc would generate much better code if we set the parameters
+ *	up in in-memory structure order. Gcc68K even more so
+ */
+
+static int inet_create(struct socket *sock, int protocol)
+{
+	struct sock *sk;
+	struct proto *prot;
+	int err;
+
+	sk = (struct sock *) kmalloc(sizeof(*sk), GFP_KERNEL);
+	if (sk == NULL) 
+		return(-ENOBUFS);
+	sk->num = 0;
+	sk->reuse = 0;
+	switch(sock->type) 
+	{
+		case SOCK_STREAM:
+		case SOCK_SEQPACKET:
+			if (protocol && protocol != IPPROTO_TCP) 
+			{
+				kfree_s((void *)sk, sizeof(*sk));
+				return(-EPROTONOSUPPORT);
+			}
+			protocol = IPPROTO_TCP;
+			sk->no_check = TCP_NO_CHECK;
+			prot = &tcp_prot;
+			break;
+
+		case SOCK_DGRAM:
+			if (protocol && protocol != IPPROTO_UDP) 
+			{
+				kfree_s((void *)sk, sizeof(*sk));
+				return(-EPROTONOSUPPORT);
+			}
+			protocol = IPPROTO_UDP;
+			sk->no_check = UDP_NO_CHECK;
+			prot=&udp_prot;
+			break;
+      
+		case SOCK_RAW:
+			if (!suser()) 
+			{
+				kfree_s((void *)sk, sizeof(*sk));
+				return(-EPERM);
+			}
+			if (!protocol) 
+			{
+				kfree_s((void *)sk, sizeof(*sk));
+				return(-EPROTONOSUPPORT);
+			}
+			prot = &raw_prot;
+			sk->reuse = 1;
+			sk->no_check = 0;	/*
+						 * Doesn't matter no checksum is
+						 * performed anyway.
+						 */
+			sk->num = protocol;
+			break;
+
+		case SOCK_PACKET:
+			if (!suser()) 
+			{
+				kfree_s((void *)sk, sizeof(*sk));
+				return(-EPERM);
+			}
+			if (!protocol) 
+			{
+				kfree_s((void *)sk, sizeof(*sk));
+				return(-EPROTONOSUPPORT);
+			}
+			prot = &packet_prot;
+			sk->reuse = 1;
+			sk->no_check = 0;	/* Doesn't matter no checksum is
+						 * performed anyway.
+						 */
+			sk->num = protocol;
+			break;
+
+		default:
+			kfree_s((void *)sk, sizeof(*sk));
+			return(-ESOCKTNOSUPPORT);
+	}
+	sk->socket = sock;
+#ifdef CONFIG_TCP_NAGLE_OFF
+	sk->nonagle = 1;
+#else    
+	sk->nonagle = 0;
+#endif  
+	sk->type = sock->type;
+	sk->stamp.tv_sec=0;
+	sk->protocol = protocol;
+	sk->wmem_alloc = 0;
+	sk->rmem_alloc = 0;
+	sk->sndbuf = SK_WMEM_MAX;
+	sk->rcvbuf = SK_RMEM_MAX;
+	sk->pair = NULL;
+	sk->opt = NULL;
+	sk->write_seq = 0;
+	sk->acked_seq = 0;
+	sk->copied_seq = 0;
+	sk->fin_seq = 0;
+	sk->urg_seq = 0;
+	sk->urg_data = 0;
+	sk->proc = 0;
+	sk->rtt = 0;				/*TCP_WRITE_TIME << 3;*/
+	sk->rto = TCP_TIMEOUT_INIT;		/*TCP_WRITE_TIME*/
+	sk->mdev = 0;
+	sk->backoff = 0;
+	sk->packets_out = 0;
+	sk->cong_window = 1; /* start with only sending one packet at a time. */
+	sk->cong_count = 0;
+	sk->ssthresh = 0;
+	sk->max_window = 0;
+	sk->urginline = 0;
+	sk->intr = 0;
+	sk->linger = 0;
+	sk->destroy = 0;
+	sk->priority = 1;
+	sk->shutdown = 0;
+	sk->keepopen = 0;
+	sk->zapped = 0;
+	sk->done = 0;
+	sk->ack_backlog = 0;
+	sk->window = 0;
+	sk->bytes_rcv = 0;
+	sk->state = TCP_CLOSE;
+	sk->dead = 0;
+	sk->ack_timed = 0;
+	sk->partial = NULL;
+	sk->user_mss = 0;
+	sk->debug = 0;
+
+	/* this is how many unacked bytes we will accept for this socket.  */
+	sk->max_unacked = 2048; /* needs to be at most 2 full packets. */
+
+	/* how many packets we should send before forcing an ack. 
+	   if this is set to zero it is the same as sk->delay_acks = 0 */
+	sk->max_ack_backlog = 0;
+	sk->inuse = 0;
+	sk->delay_acks = 0;
+	skb_queue_head_init(&sk->write_queue);
+	skb_queue_head_init(&sk->receive_queue);
+	sk->mtu = 576;
+	sk->prot = prot;
+	sk->sleep = sock->wait;
+	sk->daddr = 0;
+	sk->saddr = 0 /* ip_my_addr() */;
+	sk->err = 0;
+	sk->next = NULL;
+	sk->pair = NULL;
+	sk->send_tail = NULL;
+	sk->send_head = NULL;
+	sk->timeout = 0;
+	sk->broadcast = 0;
+	sk->localroute = 0;
+	init_timer(&sk->timer);
+	init_timer(&sk->retransmit_timer);
+	sk->timer.data = (unsigned long)sk;
+	sk->timer.function = &net_timer;
+	skb_queue_head_init(&sk->back_log);
+	sk->blog = 0;
+	sock->data =(void *) sk;
+	sk->dummy_th.doff = sizeof(sk->dummy_th)/4;
+	sk->dummy_th.res1=0;
+	sk->dummy_th.res2=0;
+	sk->dummy_th.urg_ptr = 0;
+	sk->dummy_th.fin = 0;
+	sk->dummy_th.syn = 0;
+	sk->dummy_th.rst = 0;
+	sk->dummy_th.psh = 0;
+	sk->dummy_th.ack = 0;
+	sk->dummy_th.urg = 0;
+	sk->dummy_th.dest = 0;
+	sk->ip_tos=0;
+	sk->ip_ttl=64;
+#ifdef CONFIG_IP_MULTICAST
+	sk->ip_mc_loop=1;
+	sk->ip_mc_ttl=1;
+	*sk->ip_mc_name=0;
+	sk->ip_mc_list=NULL;
+#endif
+  	
+	sk->state_change = def_callback1;
+	sk->data_ready = def_callback2;
+	sk->write_space = def_callback3;
+	sk->error_report = def_callback1;
+
+	if (sk->num) 
+	{
+	/*
+	 * It assumes that any protocol which allows
+	 * the user to assign a number at socket
+	 * creation time automatically
+	 * shares.
+	 */
+		put_sock(sk->num, sk);
+		sk->dummy_th.source = ntohs(sk->num);
+	}
+
+	if (sk->prot->init) 
+	{
+		err = sk->prot->init(sk);
+		if (err != 0) 
+		{
+			destroy_sock(sk);
+			return(err);
+		}
+	}
+	return(0);
+}
+
+
+/*
+ *	Duplicate a socket.
+ */
+ 
+static int inet_dup(struct socket *newsock, struct socket *oldsock)
+{
+	return(inet_create(newsock,((struct sock *)(oldsock->data))->protocol));
+}
+
+/*
+ * Return 1 if we still have things to send in our buffers.
+ */
+static inline int closing(struct sock * sk)
+{
+	switch (sk->state) {
+		case TCP_FIN_WAIT1:
+		case TCP_CLOSING:
+		case TCP_LAST_ACK:
+			return 1;
+	}
+	return 0;
+}
+
+
+/*
+ *	The peer socket should always be NULL (or else). When we call this
+ *	function we are destroying the object and from then on nobody
+ *	should refer to it.
+ */
+ 
+static int inet_release(struct socket *sock, struct socket *peer)
+{
+	struct sock *sk = (struct sock *) sock->data;
+	if (sk == NULL) 
+		return(0);
+
+	sk->state_change(sk);
+
+	/* Start closing the connection.  This may take a while. */
+
+#ifdef CONFIG_IP_MULTICAST
+	/* Applications forget to leave groups before exiting */
+	ip_mc_drop_socket(sk);
+#endif
+	/*
+	 * If linger is set, we don't return until the close
+	 * is complete.  Other wise we return immediately. The
+	 * actually closing is done the same either way.
+	 *
+	 * If the close is due to the process exiting, we never
+	 * linger..
+	 */
+
+	if (sk->linger == 0 || (current->flags & PF_EXITING))
+	{
+		sk->prot->close(sk,0);
+		sk->dead = 1;
+	} 
+	else 
+	{
+		sk->prot->close(sk, 0);
+		cli();
+		if (sk->lingertime)
+			current->timeout = jiffies + HZ*sk->lingertime;
+		while(closing(sk) && current->timeout>0) 
+		{
+			interruptible_sleep_on(sk->sleep);
+			if (current->signal & ~current->blocked) 
+			{
+				break;
+#if 0
+				/* not working now - closes can't be restarted */
+				sti();
+				current->timeout=0;
+				return(-ERESTARTSYS);
+#endif
+			}
+		}
+		current->timeout=0;
+		sti();
+		sk->dead = 1;
+	}
+	sk->inuse = 1;
+
+	/* This will destroy it. */
+	release_sock(sk);
+	sock->data = NULL;
+	sk->socket = NULL;
+	return(0);
+}
+
+
+/* this needs to be changed to disallow
+   the rebinding of sockets.   What error
+   should it return? */
+
+static int inet_bind(struct socket *sock, struct sockaddr *uaddr,
+	       int addr_len)
+{
+	struct sockaddr_in *addr=(struct sockaddr_in *)uaddr;
+	struct sock *sk=(struct sock *)sock->data, *sk2;
+	unsigned short snum = 0 /* Stoopid compiler.. this IS ok */;
+	int chk_addr_ret;
+
+	/* check this error. */
+	if (sk->state != TCP_CLOSE)
+		return(-EIO);
+	if(addr_len<sizeof(struct sockaddr_in))
+		return -EINVAL;
+		
+	if(sock->type != SOCK_RAW)
+	{
+		if (sk->num != 0) 
+			return(-EINVAL);
+
+		snum = ntohs(addr->sin_port);
+
+		/*
+		 * We can't just leave the socket bound wherever it is, it might
+		 * be bound to a privileged port. However, since there seems to
+		 * be a bug here, we will leave it if the port is not privileged.
+		 */
+		if (snum == 0) 
+		{
+			snum = get_new_socknum(sk->prot, 0);
+		}
+		if (snum < PROT_SOCK && !suser()) 
+			return(-EACCES);
+	}
+	
+	chk_addr_ret = ip_chk_addr(addr->sin_addr.s_addr);
+	if (addr->sin_addr.s_addr != 0 && chk_addr_ret != IS_MYADDR && chk_addr_ret != IS_MULTICAST)
+		return(-EADDRNOTAVAIL);	/* Source address MUST be ours! */
+	  	
+	if (chk_addr_ret || addr->sin_addr.s_addr == 0)
+		sk->saddr = addr->sin_addr.s_addr;
+	
+	if(sock->type != SOCK_RAW)
+	{
+		/* Make sure we are allowed to bind here. */
+		cli();
+		for(sk2 = sk->prot->sock_array[snum & (SOCK_ARRAY_SIZE -1)];
+					sk2 != NULL; sk2 = sk2->next) 
+		{
+		/* should be below! */
+			if (sk2->num != snum) 
+				continue;
+			if (!sk->reuse)
+			{
+				sti();
+				return(-EADDRINUSE);
+			}
+			
+			if (sk2->num != snum) 
+				continue;		/* more than one */
+			if (sk2->saddr != sk->saddr) 
+				continue;	/* socket per slot ! -FB */
+			if (!sk2->reuse || sk2->state==TCP_LISTEN) 
+			{
+				sti();
+				return(-EADDRINUSE);
+			}
+		}
+		sti();
+
+		remove_sock(sk);
+		put_sock(snum, sk);
+		sk->dummy_th.source = ntohs(sk->num);
+		sk->daddr = 0;
+		sk->dummy_th.dest = 0;
+	}
+	return(0);
+}
+
+/*
+ *	Handle sk->err properly. The cli/sti matter.
+ */
+ 
+static int inet_error(struct sock *sk)
+{
+	unsigned long flags;
+	int err;
+	save_flags(flags);
+	cli();	
+	err=sk->err;
+	sk->err=0;
+	restore_flags(flags);
+	return -err;
+}
+
+/*
+ *	Connect to a remote host. There is regrettably still a little
+ *	TCP 'magic' in here.
+ */
+ 
+static int inet_connect(struct socket *sock, struct sockaddr * uaddr,
+		  int addr_len, int flags)
+{
+	struct sock *sk=(struct sock *)sock->data;
+	int err;
+	sock->conn = NULL;
+
+	if (sock->state == SS_CONNECTING && tcp_connected(sk->state))
+	{
+		sock->state = SS_CONNECTED;
+		/* Connection completing after a connect/EINPROGRESS/select/connect */
+		return 0;	/* Rock and roll */
+	}
+
+	if (sock->state == SS_CONNECTING && sk->protocol == IPPROTO_TCP && (flags & O_NONBLOCK))
+		return -EALREADY;	/* Connecting is currently in progress */
+  	
+	if (sock->state != SS_CONNECTING) 
+	{
+		/* We may need to bind the socket. */
+		if(inet_autobind(sk)!=0)
+			return(-EAGAIN);
+		if (sk->prot->connect == NULL) 
+			return(-EOPNOTSUPP);
+		err = sk->prot->connect(sk, (struct sockaddr_in *)uaddr, addr_len);
+		if (err < 0) 
+			return(err);
+  		sock->state = SS_CONNECTING;
+	}
+	
+	if (sk->state > TCP_FIN_WAIT2 && sock->state==SS_CONNECTING)
+	{
+		sock->state=SS_UNCONNECTED;
+		cli();
+		err=sk->err;
+		sk->err=0;
+		sti();
+		return -err;
+	}
+
+	if (sk->state != TCP_ESTABLISHED &&(flags & O_NONBLOCK)) 
+	  	return(-EINPROGRESS);
+
+	cli(); /* avoid the race condition */
+	while(sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV) 
+	{
+		interruptible_sleep_on(sk->sleep);
+		if (current->signal & ~current->blocked) 
+		{
+			sti();
+			return(-ERESTARTSYS);
+		}
+		/* This fixes a nasty in the tcp/ip code. There is a hideous hassle with
+		   icmp error packets wanting to close a tcp or udp socket. */
+		if(sk->err && sk->protocol == IPPROTO_TCP)
+		{
+			sti();
+			sock->state = SS_UNCONNECTED;
+			err = -sk->err;
+			sk->err=0;
+			return err; /* set by tcp_err() */
+		}
+	}
+	sti();
+	sock->state = SS_CONNECTED;
+
+	if (sk->state != TCP_ESTABLISHED && sk->err) 
+	{
+		sock->state = SS_UNCONNECTED;
+		err=sk->err;
+		sk->err=0;
+		return(-err);
+	}
+	return(0);
+}
+
+
+static int inet_socketpair(struct socket *sock1, struct socket *sock2)
+{
+	 return(-EOPNOTSUPP);
+}
+
+
+/*
+ *	Accept a pending connection. The TCP layer now gives BSD semantics.
+ */
+
+static int inet_accept(struct socket *sock, struct socket *newsock, int flags)
+{
+	struct sock *sk1, *sk2;
+	int err;
+
+	sk1 = (struct sock *) sock->data;
+
+	/*
+	 * We've been passed an extra socket.
+	 * We need to free it up because the tcp module creates
+	 * its own when it accepts one.
+	 */
+	if (newsock->data)
+	{
+	  	struct sock *sk=(struct sock *)newsock->data;
+	  	newsock->data=NULL;
+	  	sk->dead = 1;
+	  	destroy_sock(sk);
+	}
+  
+	if (sk1->prot->accept == NULL) 
+		return(-EOPNOTSUPP);
+
+	/* Restore the state if we have been interrupted, and then returned. */
+	if (sk1->pair != NULL ) 
+	{
+		sk2 = sk1->pair;
+		sk1->pair = NULL;
+	} 
+	else
+	{
+		sk2 = sk1->prot->accept(sk1,flags);
+		if (sk2 == NULL) 
+		{
+			if (sk1->err <= 0)
+				printk("Warning sock.c:sk1->err <= 0.  Returning non-error.\n");
+			err=sk1->err;
+			sk1->err=0;
+			return(-err);
+		}
+	}
+	newsock->data = (void *)sk2;
+	sk2->sleep = newsock->wait;
+	sk2->socket = newsock;
+	newsock->conn = NULL;
+	if (flags & O_NONBLOCK) 
+		return(0);
+
+	cli(); /* avoid the race. */
+	while(sk2->state == TCP_SYN_RECV) 
+	{
+		interruptible_sleep_on(sk2->sleep);
+		if (current->signal & ~current->blocked) 
+		{
+			sti();
+			sk1->pair = sk2;
+			sk2->sleep = NULL;
+			sk2->socket=NULL;
+			newsock->data = NULL;
+			return(-ERESTARTSYS);
+		}
+	}
+	sti();
+
+	if (sk2->state != TCP_ESTABLISHED && sk2->err > 0) 
+	{
+		err = -sk2->err;
+		sk2->err=0;
+		sk2->dead=1;	/* ANK */
+		destroy_sock(sk2);
+		newsock->data = NULL;
+		return(err);
+	}
+	newsock->state = SS_CONNECTED;
+	return(0);
+}
+
+
+/*
+ *	This does both peername and sockname.
+ */
+ 
+static int inet_getname(struct socket *sock, struct sockaddr *uaddr,
+		 int *uaddr_len, int peer)
+{
+	struct sockaddr_in *sin=(struct sockaddr_in *)uaddr;
+	struct sock *sk;
+  
+	sin->sin_family = AF_INET;
+	sk = (struct sock *) sock->data;
+	if (peer) 
+	{
+		if (!tcp_connected(sk->state)) 
+			return(-ENOTCONN);
+		sin->sin_port = sk->dummy_th.dest;
+		sin->sin_addr.s_addr = sk->daddr;
+	} 
+	else 
+	{
+		sin->sin_port = sk->dummy_th.source;
+		if (sk->saddr == 0) 
+			sin->sin_addr.s_addr = ip_my_addr();
+		else 
+			sin->sin_addr.s_addr = sk->saddr;
+	}
+	*uaddr_len = sizeof(*sin);
+	return(0);
+}
+
+
+/*
+ *	The assorted BSD I/O operations
+ */
+
+static int inet_recvfrom(struct socket *sock, void *ubuf, int size, int noblock, 
+		   unsigned flags, struct sockaddr *sin, int *addr_len )
+{
+	struct sock *sk = (struct sock *) sock->data;
+	
+	if (sk->prot->recvfrom == NULL) 
+		return(-EOPNOTSUPP);
+	if(sk->err)
+		return inet_error(sk);
+	/* We may need to bind the socket. */
+	if(inet_autobind(sk)!=0)
+		return(-EAGAIN);
+	return(sk->prot->recvfrom(sk, (unsigned char *) ubuf, size, noblock, flags,
+			     (struct sockaddr_in*)sin, addr_len));
+}
+
+
+static int inet_recv(struct socket *sock, void *ubuf, int size, int noblock,
+	  unsigned flags)
+{
+	/* BSD explicitly states these are the same - so we do it this way to be sure */
+	return inet_recvfrom(sock,ubuf,size,noblock,flags,NULL,NULL);
+}
+
+static int inet_read(struct socket *sock, char *ubuf, int size, int noblock)
+{
+	struct sock *sk = (struct sock *) sock->data;
+	
+	if(sk->err)
+		return inet_error(sk);
+	/* We may need to bind the socket. */
+	if(inet_autobind(sk))
+		return(-EAGAIN);	
+	return(sk->prot->read(sk, (unsigned char *) ubuf, size, noblock, 0));
+}
+
+static int inet_send(struct socket *sock, void *ubuf, int size, int noblock, 
+	       unsigned flags)
+{
+	struct sock *sk = (struct sock *) sock->data;
+	if (sk->shutdown & SEND_SHUTDOWN) 
+	{
+		send_sig(SIGPIPE, current, 1);
+		return(-EPIPE);
+	}
+	if(sk->err)
+		return inet_error(sk);
+	/* We may need to bind the socket. */
+	if(inet_autobind(sk)!=0)
+		return(-EAGAIN);
+	return(sk->prot->write(sk, (unsigned char *) ubuf, size, noblock, flags));
+}
+
+static int inet_write(struct socket *sock, char *ubuf, int size, int noblock)
+{
+	return inet_send(sock,ubuf,size,noblock,0);
+}
+
+static int inet_sendto(struct socket *sock, void *ubuf, int size, int noblock, 
+	    unsigned flags, struct sockaddr *sin, int addr_len)
+{
+	struct sock *sk = (struct sock *) sock->data;
+	if (sk->shutdown & SEND_SHUTDOWN) 
+	{
+		send_sig(SIGPIPE, current, 1);
+		return(-EPIPE);
+	}
+	if (sk->prot->sendto == NULL) 
+		return(-EOPNOTSUPP);
+	if(sk->err)
+		return inet_error(sk);
+	/* We may need to bind the socket. */
+	if(inet_autobind(sk)!=0)
+		return -EAGAIN;
+	return(sk->prot->sendto(sk, (unsigned char *) ubuf, size, noblock, flags, 
+			   (struct sockaddr_in *)sin, addr_len));
+}
+
+
+static int inet_shutdown(struct socket *sock, int how)
+{
+	struct sock *sk=(struct sock*)sock->data;
+
+	/*
+	 * This should really check to make sure
+	 * the socket is a TCP socket. (WHY AC...)
+	 */
+	how++; /* maps 0->1 has the advantage of making bit 1 rcvs and
+		       1->2 bit 2 snds.
+		       2->3 */
+	if ((how & ~SHUTDOWN_MASK) || how==0)	/* MAXINT->0 */
+		return(-EINVAL);
+	if (sock->state == SS_CONNECTING && sk->state == TCP_ESTABLISHED)
+		sock->state = SS_CONNECTED;
+	if (!tcp_connected(sk->state)) 
+		return(-ENOTCONN);
+	sk->shutdown |= how;
+	if (sk->prot->shutdown)
+		sk->prot->shutdown(sk, how);
+	return(0);
+}
+
+
+static int inet_select(struct socket *sock, int sel_type, select_table *wait )
+{
+	struct sock *sk=(struct sock *) sock->data;
+	if (sk->prot->select == NULL) 
+	{
+		return(0);
+	}
+	return(sk->prot->select(sk, sel_type, wait));
+}
+
+/*
+ *	ioctl() calls you can issue on an INET socket. Most of these are
+ *	device configuration and stuff and very rarely used. Some ioctls
+ *	pass on to the socket itself.
+ *
+ *	NOTE: I like the idea of a module for the config stuff. ie ifconfig
+ *	loads the devconfigure module does its configuring and unloads it.
+ *	There's a good 20K of config code hanging around the kernel.
+ */
+
+static int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+	struct sock *sk=(struct sock *)sock->data;
+	int err;
+
+	switch(cmd) 
+	{
+		case FIOSETOWN:
+		case SIOCSPGRP:
+			err=verify_area(VERIFY_READ,(int *)arg,sizeof(long));
+			if(err)
+				return err;
+			sk->proc = get_fs_long((int *) arg);
+			return(0);
+		case FIOGETOWN:
+		case SIOCGPGRP:
+			err=verify_area(VERIFY_WRITE,(void *) arg, sizeof(long));
+			if(err)
+				return err;
+			put_fs_long(sk->proc,(int *)arg);
+			return(0);			
+		case SIOCGSTAMP:
+			if(sk->stamp.tv_sec==0)
+				return -ENOENT;
+			err=verify_area(VERIFY_WRITE,(void *)arg,sizeof(struct timeval));
+			if(err)
+				return err;
+			memcpy_tofs((void *)arg,&sk->stamp,sizeof(struct timeval));
+			return 0;
+		case SIOCADDRT: case SIOCADDRTOLD:
+		case SIOCDELRT: case SIOCDELRTOLD:
+			return(ip_rt_ioctl(cmd,(void *) arg));
+		case SIOCDARP:
+		case SIOCGARP:
+		case SIOCSARP:
+			return(arp_ioctl(cmd,(void *) arg));
+#ifdef CONFIG_INET_RARP			
+		case SIOCDRARP:
+		case SIOCGRARP:
+		case SIOCSRARP:
+			return(rarp_ioctl(cmd,(void *) arg));
+#endif
+		case SIOCGIFCONF:
+		case SIOCGIFFLAGS:
+		case SIOCSIFFLAGS:
+		case SIOCGIFADDR:
+		case SIOCSIFADDR:
+
+/* begin multicast support change */
+		case SIOCADDMULTI:
+		case SIOCDELMULTI:
+/* end multicast support change */
+		
+		case SIOCGIFDSTADDR:
+		case SIOCSIFDSTADDR:
+		case SIOCGIFBRDADDR:
+		case SIOCSIFBRDADDR:
+		case SIOCGIFNETMASK:
+		case SIOCSIFNETMASK:
+		case SIOCGIFMETRIC:
+		case SIOCSIFMETRIC:
+		case SIOCGIFMEM:
+		case SIOCSIFMEM:
+		case SIOCGIFMTU:
+		case SIOCSIFMTU:
+		case SIOCSIFLINK:
+		case SIOCGIFHWADDR:
+		case SIOCSIFHWADDR:
+		case OLD_SIOCGIFHWADDR:
+		case SIOCSIFMAP:
+		case SIOCGIFMAP:
+		case SIOCSIFSLAVE:
+		case SIOCGIFSLAVE:
+			return(dev_ioctl(cmd,(void *) arg));
+
+		default:
+			if ((cmd >= SIOCDEVPRIVATE) &&
+			   (cmd <= (SIOCDEVPRIVATE + 15)))
+				return(dev_ioctl(cmd,(void *) arg));
+
+			if (sk->prot->ioctl==NULL) 
+				return(-EINVAL);
+			return(sk->prot->ioctl(sk, cmd, arg));
+	}
+	/*NOTREACHED*/
+	return(0);
+}
+
+/*
+ * This routine must find a socket given a TCP or UDP header.
+ * Everything is assumed to be in net order.
+ *
+ * We give priority to more closely bound ports: if some socket
+ * is bound to a particular foreign address, it will get the packet
+ * rather than somebody listening to any address..
+ */
+
+struct sock *get_sock(struct proto *prot, unsigned short num,
+				unsigned long raddr,
+				unsigned short rnum, unsigned long laddr)
+{
+	struct sock *s;
+	struct sock *result = NULL;
+	int badness = -1;
+	unsigned short hnum;
+
+	hnum = ntohs(num);
+
+	/*
+	 * SOCK_ARRAY_SIZE must be a power of two.  This will work better
+	 * than a prime unless 3 or more sockets end up using the same
+	 * array entry.  This should not be a problem because most
+	 * well known sockets don't overlap that much, and for
+	 * the other ones, we can just be careful about picking our
+	 * socket number when we choose an arbitrary one.
+	 */
+
+	for(s = prot->sock_array[hnum & (SOCK_ARRAY_SIZE - 1)];
+			s != NULL; s = s->next) 
+	{
+		int score = 0;
+
+		if (s->num != hnum) 
+			continue;
+
+		if(s->dead && (s->state == TCP_CLOSE))
+			continue;
+		/* local address matches? */
+		if (s->saddr) {
+			if (s->saddr != laddr)
+				continue;
+			score++;
+		}
+		/* remote address matches? */
+		if (s->daddr) {
+			if (s->daddr != raddr)
+				continue;
+			score++;
+		}
+		/* remote port matches? */
+		if (s->dummy_th.dest) {
+			if (s->dummy_th.dest != rnum)
+				continue;
+			score++;
+		}
+		/* perfect match? */
+		if (score == 3)
+			return s;
+		/* no, check if this is the best so far.. */
+		if (score <= badness)
+			continue;
+		result = s;
+		badness = score;
+  	}
+  	return result;
+}
+
+/*
+ *	Deliver a datagram to raw sockets.
+ */
+ 
+struct sock *get_sock_raw(struct sock *sk, 
+				unsigned short num,
+				unsigned long raddr,
+				unsigned long laddr)
+{
+	struct sock *s;
+
+	s=sk;
+
+	for(; s != NULL; s = s->next) 
+	{
+		if (s->num != num) 
+			continue;
+		if(s->dead && (s->state == TCP_CLOSE))
+			continue;
+		if(s->daddr && s->daddr!=raddr)
+			continue;
+ 		if(s->saddr  && s->saddr!=laddr)
+			continue;
+		return(s);
+  	}
+  	return(NULL);
+}
+
+#ifdef CONFIG_IP_MULTICAST
+/*
+ *	Deliver a datagram to broadcast/multicast sockets.
+ */
+ 
+struct sock *get_sock_mcast(struct sock *sk, 
+				unsigned short num,
+				unsigned long raddr,
+				unsigned short rnum, unsigned long laddr)
+{
+	struct sock *s;
+	unsigned short hnum;
+
+	hnum = ntohs(num);
+
+	/*
+	 * SOCK_ARRAY_SIZE must be a power of two.  This will work better
+	 * than a prime unless 3 or more sockets end up using the same
+	 * array entry.  This should not be a problem because most
+	 * well known sockets don't overlap that much, and for
+	 * the other ones, we can just be careful about picking our
+	 * socket number when we choose an arbitrary one.
+	 */
+	
+	s=sk;
+
+	for(; s != NULL; s = s->next) 
+	{
+		if (s->num != hnum) 
+			continue;
+		if(s->dead && (s->state == TCP_CLOSE))
+			continue;
+		if(s->daddr && s->daddr!=raddr)
+			continue;
+		if (s->dummy_th.dest != rnum && s->dummy_th.dest != 0) 
+			continue;
+ 		if(s->saddr  && s->saddr!=laddr)
+			continue;
+		return(s);
+  	}
+  	return(NULL);
+}
+
+#endif
+
+static struct proto_ops inet_proto_ops = {
+	AF_INET,
+
+	inet_create,
+	inet_dup,
+	inet_release,
+	inet_bind,
+	inet_connect,
+	inet_socketpair,
+	inet_accept,
+	inet_getname, 
+	inet_read,
+	inet_write,
+	inet_select,
+	inet_ioctl,
+	inet_listen,
+	inet_send,
+	inet_recv,
+	inet_sendto,
+	inet_recvfrom,
+	inet_shutdown,
+	inet_setsockopt,
+	inet_getsockopt,
+	inet_fcntl,
+};
+
+extern unsigned long seq_offset;
+
+/*
+ *	Called by socket.c on kernel startup.  
+ */
+ 
+void inet_proto_init(struct net_proto *pro)
+{
+	struct inet_protocol *p;
+	int i;
+
+
+	printk("Swansea University Computer Society TCP/IP for NET3.019\n");
+
+	/*
+	 *	Tell SOCKET that we are alive... 
+	 */
+   
+  	(void) sock_register(inet_proto_ops.family, &inet_proto_ops);
+
+  	seq_offset = CURRENT_TIME*250;
+
+	/*
+	 *	Add all the protocols. 
+	 */
+	 
+	for(i = 0; i < SOCK_ARRAY_SIZE; i++) 
+	{
+		tcp_prot.sock_array[i] = NULL;
+		udp_prot.sock_array[i] = NULL;
+		raw_prot.sock_array[i] = NULL;
+  	}
+	tcp_prot.inuse = 0;
+	tcp_prot.highestinuse = 0;
+	udp_prot.inuse = 0;
+	udp_prot.highestinuse = 0;
+	raw_prot.inuse = 0;
+	raw_prot.highestinuse = 0;
+
+	printk("IP Protocols: ");
+	for(p = inet_protocol_base; p != NULL;) 
+	{
+		struct inet_protocol *tmp = (struct inet_protocol *) p->next;
+		inet_add_protocol(p);
+		printk("%s%s",p->name,tmp?", ":"\n");
+		p = tmp;
+	}
+	/*
+	 *	Set the ARP module up
+	 */
+	arp_init();
+  	/*
+  	 *	Set the IP module up
+  	 */
+	ip_init();
+}
+
-- 
cgit v1.2.3