diff options
Diffstat (limited to 'pfinet')
107 files changed, 28334 insertions, 0 deletions
diff --git a/pfinet/ChangeLog b/pfinet/ChangeLog new file mode 100644 index 00000000..255dd7b4 --- /dev/null +++ b/pfinet/ChangeLog @@ -0,0 +1,410 @@ +2000-01-27 Roland McGrath <roland@baalperazim.frob.com> + + * mapped-time.h: Include <maptime.h>. + (fill_timeval): Function removed. + (fetch_jiffies): Use maptime_read. + * timer-emul.c (init_time): Use maptime_map and maptime_read. + * linux/sched.h (fetch_xtime): Use maptime_read. + * linux/time.h (do_gettimeofday): Likewise. + +1999-10-22 Roland McGrath <roland@baalperazim.frob.com> + + * socket-ops.c (S_socket_getopt): Implement the call. + All options supported by the code in linux-inet are in + fact of type int, so we can support just that one size. + (This is still a bogus untyped interface!) + +1999-09-13 Roland McGrath <roland@baalperazim.frob.com> + + * io-ops.c: Reverted changes related to io_map_segment. + +1999-09-09 Thomas Bushnell, BSG <tb@mit.edu> + + * ethernet.c (ethernet_open): Don't start the input work thread + here. + (more_packets, mark_bh, input_work_thread): Move + these to ... + * devices.c: ... here. + Also include "pfinet.h" instead of list of <> includes. + * main.c (main): Launch input work thread here. + * pfinet.h (input_work_thread): Declare function. + + * devices.c (add_device): Remove unused function. + * pfinet.h (incoming_net_packet): Remove unused declaration. + +1999-09-07 Thomas Bushnell, BSG <tb@mit.edu> + + * io-ops.c (S_io_map_segment): New function. + +1999-07-11 Roland McGrath <roland@baalperazim.frob.com> + + * pfinet.h: Add #include <sys/mman.h> for munmap decl. + +1999-07-10 Roland McGrath <roland@baalperazim.frob.com> + + * io-ops.c: Add #include <sys/mman.h> for munmap decl. + +1999-07-09 Thomas Bushnell, BSG <tb@mit.edu> + + * io-ops.c (S_io_read): Use mmap instead of vm_allocate. + * socket-ops.c (S_socket_whatis_address): Likewise. + +1999-07-03 Thomas Bushnell, BSG <tb@mit.edu> + + * io-ops.c (S_io_read): Use munmap instead of vm_deallocate. + (S_io_reauthenticate): Likewise. + +1999-06-13 Roland McGrath <roland@baalperazim.frob.com> + + * main.c (trivfs_goaway): Exit if there are no socket ports. + +Sun Mar 14 18:33:54 1999 Thomas Bushnell, BSG <tb@mit.edu> + + * ethernet.c (ether_filter): Use a shorter filter program. + (ether_filter_len): Compute more safely. + +1999-02-28 Roland McGrath <roland@baalperazim.frob.com> + + * main.c (S_startup_dosync): Use ports_class_iterate. + (sigterm_handler): Likewise. + +Tue Feb 16 05:50:27 1999 Thomas Bushnell, BSG <tb@mit.edu> + + * io-ops.c (S_io_revoke): New function. Don't attempt to + implement. (Other systems don't even permit non-file revokes.) + +1998-10-24 Roland McGrath <roland@baalperazim.frob.com> + + * ethernet.c (ethernet_open): Check error return from device_open and + device_set_filter. + (setup_ethernet_device): Check error return from device_get_status. + + * ethernet.c (ethername): Remove static variable. + (setup_ethernet_device): Don't set it. + (ethernet_open): Don't use it; use DEV->name instead. + +1998-09-04 Roland McGrath <roland@baalperazim.frob.com> + + * loopback.c (loopback_type_trans): Return u_int16_t. + +Fri Apr 24 15:43:07 1998 Thomas Bushnell, n/BSG <tb@mit.edu> + + * io-ops.c (S_io_select): When returning EINTR after cancellation, + deallocate REPLY because the stub won't do it for us. Reported by + UCHIYAMA Yasushi (uch@nop.or.jp). + +1997-08-25 Miles Bader <miles@gnu.ai.mit.edu> + + * io-ops.c (S_io_write): Negate linux error codes before returning. + +1997-08-19 Miles Bader <miles@gnu.ai.mit.edu> + + * linux-inet/tcp.c (tcp_conn_request): SK->err holds errno values, + not linux negative errno return vals. + +Wed Aug 20 14:05:52 1997 Thomas Bushnell, n/BSG <thomas@gnu.ai.mit.edu> + + * main.c (main): New args for + ports_manage_port_operations_multithread. + +Fri Aug 8 11:47:08 1997 Thomas Bushnell, n/BSG <thomas@gnu.ai.mit.edu> + + * linux-inet/tcp.c (tcp_check): Move to end of file so it isn't + inlined and can be profiled easily. + * linux-inet/udp.c (udp_check): Likewise. + +Mon Jul 21 14:58:13 1997 Thomas Bushnell, n/BSG <thomas@gnu.ai.mit.edu> + + * Makefile (HURDLIBS): Add iohelp. + + * main.c (trivfs_goaway): If FSYS_GOAWAY_FORCE is set, then go + away, otherwise, continue to return EBUSY. + +Fri Apr 25 10:46:49 1997 Miles Bader <miles@gnu.ai.mit.edu> + + * options.c (trivfs_append_args): Renamed from trivfs_get_options. + Don't initialize ARGZ/ARGZ_LEN. + +Tue Feb 4 16:53:19 1997 Miles Bader <miles@gnu.ai.mit.edu> + + * socket-ops.c (S_socket_send): Don't deref ADDR if it's 0. + (S_socket_bind): Return EADDRNOTAVAIL if ADDR is 0. + +Fri Oct 25 20:02:17 1996 Thomas Bushnell, n/BSG <thomas@gnu.ai.mit.edu> + + * loopback.c (loopback_type_trans): New function. + (setup_loopback_device): Bother to mark interface up + too. Set LOOPBACK_DEV.type_trans. + +Thu Oct 24 22:38:55 1996 Thomas Bushnell, n/BSG <thomas@gnu.ai.mit.edu> + + * main.c (main): Call setup_loopback_device before parsing args + (and thus before the ethernet device shows up). + * loopback.c: New file. + * Makefile (SRCS): Add loopback.c. + * pfinet.h (loopback_dev): New variable. + + * pfinet.h (ip_rt_del): New prototype. + +Thu Sep 12 16:47:24 1996 Thomas Bushnell, n/BSG <thomas@gnu.ai.mit.edu> + + * Makefile (HURDLIBS): New variable. + (pfinet): Omit special dependency. + + * Makefile (vpath %.c): Put this after Makeconf inclusion so it + catches setting of $(srcdir) there. + +Fri Sep 6 16:46:43 1996 Thomas Bushnell, n/BSG <thomas@gnu.ai.mit.edu> + + * socket-ops.c (S_socket_recv): Bother to pass + USER->sock->userflags through to the recvfrom routine. + +Sat Jul 20 15:48:29 1996 Michael I. Bushnell, p/BSG <mib@gnu.ai.mit.edu> + + * Makefile (lndist-asm-files): Look for files in $(srcdir). + (lndist-linux-files): Likewise. + (lndist-linux-inet-files): Likewise. + +Fri Jul 19 09:39:08 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * pfinet.h (make_sock_user): Declaration updated. + +Thu Jul 18 23:19:07 1996 Michael I. Bushnell, p/BSG <mib@gnu.ai.mit.edu> + + * misc.c (make_sock_user): New parm `noinstall'; if it's set use + non-installing version of ports_create_port. All callers + changed. + * io-ops.c (S_io_reauthenticate): Install NEWUSER port right into + portset after it's fully initialized. + +Sat Jul 13 20:18:18 1996 Michael I. Bushnell, p/BSG <mib@gnu.ai.mit.edu> + + * io-ops.c (S_io_reauthenticate): Repeat auth_server_authenticate + for as long as we get EINTR. Deal with other errors without + crashing. + +Sun Jul 7 21:29:42 1996 Michael I. Bushnell, p/BSG <mib@gnu.ai.mit.edu> + + * io-ops.c (S_io_reauthenticate): Don't use unsafe MOVE_SEND in + call to auth_server_authenticate. + +Fri Jul 5 19:42:54 1996 Michael I. Bushnell, p/BSG <mib@gnu.ai.mit.edu> + + * ethernet.c (setup_ethernet_device): Linux's device `mtu' member + does not include the hardware header size; subtract that off the + value the kernel specifies as maximum packet size in setting + ETHER_DEV.mtu. + +Thu Jun 27 10:14:10 1996 Michael I. Bushnell, p/BSG <mib@gnu.ai.mit.edu> + + * Makefile (lndist): Add lndist-asm-files. + (lndist-asm-files): New rule. + (ASMHEADERS): New variable. + ($(top_srcdir)/hurd-snap/$(dir)/asm): New rule. + +Tue Jun 25 14:00:07 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * linux-inet/route.c (ip_rt_del): New function. + * linux-inet/route.h (ip_rt_del): New declaration. + + * options.c (trivfs_get_options): Initialize *ARGZ & *ARGZ_LEN. + (parse_opt): Fix test for address-less netmask. + Fix byte order when using IN_* macros. + Base default netmask on dev->pa_addr, not in->address. + Don't clear dev->pa_addr. + Delete old routing entries before adding new ones. + (trivfs_get_options): Add & use ADD_ADDR_OPT macro. + (parse_hook_add_interface): Initialize H->curint->device. + +Mon Jun 24 16:48:51 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * main.c (sigterm_handler): Renamed from sighup_handle. Make void. + Deal with SIGTERM instead of SIGHUP. + (main): Use SIGTERM & sigterm_handler instead of SIGHUP &c. + + * options.c (parse_hook_add_interface): Realloc the correct number + of bytes. + +Mon Jun 24 16:33:55 1996 Michael I. Bushnell, p/BSG <mib@gnu.ai.mit.edu> + + * main.c (sighup_handle): New function. + (arrange_shutdown_notification): Register SIGHUP handler. + +Mon Jun 24 12:44:17 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * options.c (parse_opt): Don't fail if there were no options. + (RETURN): New macro. + (PERR, FAIL): Use RETURN instead of return to free memory if nec. + +Fri Jun 21 16:42:29 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * main.c (main): Pass ARGC & ARGV to argp_parse in the correct order. + +Thu Jun 20 22:39:17 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * options.c: Renamed from opts.c. + (parse_opt): Fix lots of typos. + (trivfs_get_options): New function. + (get_opts, trivfs_S_fsys_get_options, trivfs_S_file_get_fs_options): + Functions removed. + <netinet/in.h>, <arpa/inet.h>: New includes. + * Makefile (pfinet): Depend on ../libfshelp/libfshelp.a & + ../libshouldbeinlibc/libshouldbeinlibc.a. + (SRCS): Add options.c. + +Thu Jun 20 19:47:40 1996 Michael I. Bushnell, p/BSG <mib@gnu.ai.mit.edu> + + * main.c: Include <hurd/startup.h> and <string.h>. + (shutdown_notify_class): Declare variable. + (S_startup_dosync): Give enough args to ports_lookup_port. + (S_startup_dosync/do1): Return a value. + (arrange_shutdown_notification): Correct spelling of + ports_create_port. Initialize shutdown_notify_class. + (find_device): Construct arg to strcmp correctly. Correct + spelling of ENXIO. + +Wed Jun 19 18:46:35 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * opts.c (trivfs_S_fsys_set_options): Function removed. + (trivfs_runtime_argp): New variable. + +Sun Jun 16 22:45:54 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * opts.c (parse_opt): Rename ERR macro to FAIL. Use argp_failure. + +Sat Jun 15 19:47:06 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * main.c (main): Rearrange for arg parsing some more. + (enumerate_devices): New function. + (pfinet_argp): New declaration. + (already_open): Make global. + <error.h>, <argp.h>: New includes. + +Fri Jun 14 15:41:57 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * main.c (find_device): New function. + (main): Rearrange to use new argument parsing. + +Thu Jun 13 16:55:42 1996 Michael I. Bushnell, p/BSG <mib@gnu.ai.mit.edu> + + * Makefile (MIGSRCS): Add startup_notifyServer.c. + * main.c (S_startup_dosync, arrange_shutdown_notification): New + functions. + (pfinet_demuxer): Call startup_notify_server. + (main): Call arrange_shutdown_notification). + +Tue May 14 14:12:25 1996 Michael I. Bushnell, p/BSG <mib@gnu.ai.mit.edu> + + * main.c (main): Initialize fsys_identity. + * misc.c (sock_alloc): Initialize SOCK->identity. + (sock_release): Destroy SOCK->identity if it's been set. + * io-ops.c (S_io_identity): New function. + * pfinet.h (fsys_identity): New variable. + * linux/net.h (struct socket) [_HURD_]: New member `identity'. + + * ethernet.c (ethernet_open): Delete superfluous arg to + assert_perror. + +Fri May 10 16:56:52 1996 Miles Bader <miles@gnu.ai.mit.edu> + + * misc.c (make_sock_user): Pass correct args to ports_create_port. + + * ethernet.c (ethernet_open): Pass in ERRNO to assert_perror. + +Thu May 9 20:27:28 1996 Michael I. Bushnell, p/BSG <mib@gnu.ai.mit.edu> + + * io-ops.c (S_io_reauthenticate): Use new auth_server_authenticate + interface. + + * io-ops.c (S_io_select): Drop ID_TAG arg. + + * ethernet.c (ethernet_thread): Return any_t. + (input_work_thread): Likewise. + + * ethernet.c (ethernet_open): Use new ports_create_port call. + * socket-ops.c (S_socket_create_address): Likewise. + * misc.c (make_sock_user): Likewise. + (make_sockaddr_port): Likewise. + +Wed Apr 24 18:35:50 1996 Roland McGrath <roland@delasyd.gnu.ai.mit.edu> + + * main.c (main): Take third cmdline arg and add gateway route. + +Mon Apr 15 12:53:24 1996 Michael I. Bushnell, p/BSG <mib@gnu.ai.mit.edu> + + * Makefile (CPPFLAGS): Look for config.h in $(srcdir). + (io-MIGSFLAGS, socket-MIGSFLAGS): Look for mutations.h in + $(srcdir). + (vpath %.c): Find linux-inet dir under #(srcdir)> + (SRCS): Add time.c. + (LCLHDRS, LINUXHDRS, FROBBEDLINUXHEADERS): New variables. + (lndist, lndist-linux-inet-files, lndist-linux-files, + $(top-srcdir)/hurd-snap/$(dir)/linux-inet, + $(top-srcdir)/hurd-snap/$(dir)/linux): New targets. + +Mon Feb 26 13:36:14 1996 Michael I. Bushnell, p/BSG <mib@gnu.ai.mit.edu> + + * misc.c (end_using_sockaddr_port): Only call ports_port_deref if + arg is valid. + (end_using_socket_port): Likewise. + + * linux/interrupt.h (mark_bh): Remove stubby inline, replace + with real declaration. + * pfinet.h (packet_queue_lock): New variable. + * sched.c (packet_queue_lock): Provide initialization. + * ethernet.c (ethernet_open): Put READPT in etherport_bucket + instead of pfinet_bucket. Fork ethernet_thread and + input_work_thread before returning. + (more_packets, etherport_bucket): New variables. + (input_work_thread, ethernet_thread, mark_bh): New functions. + + * main.c (pfinet_demuxer): Don't use call ethernet_demuxer. + +Thu Feb 22 17:54:25 1996 Michael I. Bushnell, p/BSG <mib@gnu.ai.mit.edu> + + * ethernet.c (ethernet_open): Request maximum queue limit + on our read port. + +Thu Feb 8 18:32:33 1996 Michael I. Bushnell, p/BSG <mib@gnu.ai.mit.edu> + + * socket-ops.c (S_socket_connect): Don't return EINVAL for + SS_CONNECTED state. + +Mon Jan 22 13:47:47 1996 Roland McGrath <roland@churchy.gnu.ai.mit.edu> + + * timer-emul.c (init_time): There are *still* a million microseconds to + the second. + +Thu Jan 18 12:37:57 1996 Michael I. Bushnell, p/BSG <mib@gnu.ai.mit.edu> + + * mapped-time.h (fetch_jiffies): Uh, duh, there are a million + microseconds in a second, not just a thousand. + +Tue Dec 26 19:29:16 1995 Miles Bader <miles@gnu.ai.mit.edu> + + * io-ops.c (S_io_select): Add REPLY parameter, and request + notification if it dies. + * mutations.h (IO_SELECT_REPLY_PORT): New def. + +Thu Dec 14 18:50:07 1995 Michael I. Bushnell, p/BSG <mib@gnu.ai.mit.edu> + + * ethernet.c (ethernet_open): Don't set filter priority high. + +Mon Dec 11 13:23:49 1995 Miles Bader <miles@gnu.ai.mit.edu> + + * socket-ops.c (S_socket_recv): Whoops, incorrectly negated this + particular return value. Rename the variable `recvd' to make + things more clear. + (S_socket_send): Ditto (`sent'), plus correctly check the return + value when deciding whether to deallocate ADDR's send right. + +Thu Dec 7 18:24:37 1995 Miles Bader <miles@gnu.ai.mit.edu> + + * socket-ops.c (S_socket_recv, S_socket_bind, S_socket_send): + Negate error return values from linux code. + (S_socket_setopt): Implement. + +Wed Aug 23 14:20:48 1995 Miles Bader <miles@churchy.gnu.ai.mit.edu> + + * Makefile (pfinet): Put all dependencies here. + (HURDLIBS): Removed. diff --git a/pfinet/Makefile b/pfinet/Makefile new file mode 100644 index 00000000..e86512a9 --- /dev/null +++ b/pfinet/Makefile @@ -0,0 +1,76 @@ +# +# Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc. +# Written by Michael I. Bushnell. +# +# This file is part of the GNU Hurd. +# +# The GNU Hurd is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2, or (at +# your option) any later version. +# +# The GNU Hurd is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. + +dir := pfinet +makemode := server +LINUXSRCS= af_inet.c arp.c datagram.c dev.c dev_mcast.c devinet.c eth.c \ + icmp.c igmp.c ip.c \ + proc.c protocol.c raw.c route.c skbuff.c sock.c \ + tcp.c timer.c udp.c utils.c +UNUSEDSRC = packet.c ipx.c ip_fw.c p8022.c p8023.c pe2.c psnap.c rarp.c +SRCS = sched.c timer-emul.c devices.c socket.c main.c ethernet.c \ + io-ops.c socket-ops.c misc.c time.c options.c loopback.c +MIGSRCS = ioServer.c socketServer.c startup_notifyServer.c +OBJS= $(subst .c,.o,$(LINUXSRCS) $(SRCS) $(MIGSRCS)) +LCLHDRS= config.h mapped-time.h mutations.h pfinet.h +LINUXHDRS = arp.h datalink.h eth.h icmp.h ip.h ipx.h ipxcall.h p8022.h \ + p8022call.h protocol.h psnap.h psnapcall.h rarp.h raw.h route.h \ + snmp.h sock.h tcp.h udp.h +FROBBEDLINUXHEADERS = autoconf.h config.h errno.h etherdevice.h fcntl.h \ + icmp.h if.h if_arp.h if_ether.h igmp.h in.h inet.h interrupt.h \ + ip.h ip_fw.h ipx.h kernel.h major.h malloc.h mm.h net.h netdevice.h \ + notifier.h param.h route.h sched.h skbuff.h socket.h sockios.h stat.h \ + string.h tcp.h termios.h time.h timer.h types.h udp.h un.h wait.h +ASMHEADERS=bitops.h segment.h system.h + +HURDLIBS=trivfs fshelp threads ports ihash shouldbeinlibc iohelp + +target = pfinet + +include ../Makeconf + +vpath %.c $(srcdir)/linux-inet + +CPPFLAGS += -imacros $(srcdir)/config.h + +io-MIGSFLAGS = -imacros $(srcdir)/mutations.h +socket-MIGSFLAGS = -imacros $(srcdir)/mutations.h + +# cpp doesn't automatically make dependencies for -imacros dependencies. argh. +io_S.h ioServer.c socket_S.h socketServer.c: mutations.h +$(OBJS): config.h + +lndist: lndist-linux-inet-files lndist-linux-files lndist-asm-files + +lndist-linux-inet-files: $(top_srcdir)/hurd-snap/$(dir)/linux-inet + ln $(addprefix $(srcdir)/linux-inet/,$(LINUXSRCS) $(UNUSEDSRC) $(LINUXHDRS)) $< + +lndist-linux-files: $(top_srcdir)/hurd-snap/$(dir)/linux + ln $(addprefix $(srcdir)/linux/,$(FROBBEDLINUXHEADERS)) $< + +lndist-asm-files: $(top_srcdir)/hurd-snap/$(dir)/asm + ln $(addprefix $(srcdir)/asm/,$(ASMHEADERS)) $< + +$(top_srcdir)/hurd-snap/$(dir)/linux-inet: + mkdir $@ +$(top_srcdir)/hurd-snap/$(dir)/linux: + mkdir $@ +$(top_srcdir)/hurd-snap/$(dir)/asm: + mkdir $@ diff --git a/pfinet/asm/bitops.h b/pfinet/asm/bitops.h new file mode 100644 index 00000000..ee339bd6 --- /dev/null +++ b/pfinet/asm/bitops.h @@ -0,0 +1,135 @@ +#ifndef _I386_BITOPS_H +#define _I386_BITOPS_H + +/* + * Copyright 1992, Linus Torvalds. + */ + +/* + * These have to be done with inline assembly: that way the bit-setting + * is guaranteed to be atomic. All bit operations return 0 if the bit + * was cleared before the operation and != 0 if it was not. + * + * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1). + */ + +/* + * Some hacks to defeat gcc over-optimizations.. + */ +struct __dummy { unsigned long a[100]; }; +#define ADDR (*(struct __dummy *) addr) + +extern __inline__ int set_bit(int nr, void * addr) +{ + int oldbit; + + __asm__ __volatile__("btsl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"=m" (ADDR) + :"r" (nr)); + return oldbit; +} + +extern __inline__ int clear_bit(int nr, void * addr) +{ + int oldbit; + + __asm__ __volatile__("btrl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"=m" (ADDR) + :"r" (nr)); + return oldbit; +} + +extern __inline__ int change_bit(int nr, void * addr) +{ + int oldbit; + + __asm__ __volatile__("btcl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"=m" (ADDR) + :"r" (nr)); + return oldbit; +} + +/* + * This routine doesn't need to be atomic, but it's faster to code it + * this way. + */ +extern __inline__ int test_bit(int nr, void * addr) +{ + int oldbit; + + __asm__ __volatile__("btl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit) + :"m" (ADDR),"r" (nr)); + return oldbit; +} + +/* + * Find-bit routines.. + */ +extern inline int find_first_zero_bit(void * addr, unsigned size) +{ + int res; + + if (!size) + return 0; + __asm__(" + cld + movl $-1,%%eax + repe; scasl + je 1f + subl $4,%%edi + movl (%%edi),%%eax + notl %%eax + bsfl %%eax,%%edx + jmp 2f +1: xorl %%edx,%%edx +2: subl %%ebx,%%edi + shll $3,%%edi + addl %%edi,%%edx" + :"=d" (res) + :"c" ((size + 31) >> 5), "D" (addr), "b" (addr) + :"ax", "bx", "cx", "di"); + return res; +} + +extern inline int find_next_zero_bit (void * addr, int size, int offset) +{ + unsigned long * p = ((unsigned long *) addr) + (offset >> 5); + int set = 0, bit = offset & 31, res; + + if (bit) { + /* + * Look for zero in first byte + */ + __asm__(" + bsfl %1,%0 + jne 1f + movl $32, %0 +1: " + : "=r" (set) + : "r" (~(*p >> bit))); + if (set < (32 - bit)) + return set + offset; + set = 32 - bit; + p++; + } + /* + * No zero yet, search remaining full bytes for a zero + */ + res = find_first_zero_bit (p, size - 32 * (p - (unsigned long *) addr)); + return (offset + set + res); +} + +/* + * ffz = Find First Zero in word. Undefined if no zero exists, + * so code should check against ~0UL first.. + */ +extern inline unsigned long ffz(unsigned long word) +{ + __asm__("bsfl %1,%0" + :"=r" (word) + :"r" (~word)); + return word; +} + +#endif /* _I386_BITOPS_H */ diff --git a/pfinet/asm/segment.h b/pfinet/asm/segment.h new file mode 100644 index 00000000..34c35b96 --- /dev/null +++ b/pfinet/asm/segment.h @@ -0,0 +1,21 @@ +#ifndef _HACK_ASM_SEGMENT_H_ +#define _HACK_ASM_SEGMENT_H_ + +#include <sys/types.h> + +#define get_fs_long(addr) (*(long *)(addr)) +#define get_user_long(addr) (*(long *)(addr)) + +#define get_fs_byte(addr) (*(char *)(addr)) +#define get_user_byte(addr) (*(char *)(addr)) + +#define put_fs_long(x,addr) (*(long *)(addr) = (x)) +#define put_user_long(x,addr) (*(long *)(addr) = (x) + +#define put_fs_byte(x,addr) (*(char *)(addr) = (x)) +#define put_user_byte(x,addr) (*(char *)(addr) = (x)) + +#define memcpy_fromfs(a,b,s) (memcpy (a, b, s)) +#define memcpy_tofs(a,b,s) (memcpy (a, b, s)) + +#endif diff --git a/pfinet/asm/system.h b/pfinet/asm/system.h new file mode 100644 index 00000000..f828c3bb --- /dev/null +++ b/pfinet/asm/system.h @@ -0,0 +1,10 @@ +#ifndef _HACK_ASM_SYSTEM_H_ +#define _HACK_ASM_SYSTEM_H_ + +#define intr_count 0 +#define save_flags(x) ((x) = 0) +#define restore_flags(x) +#define cli() +#define sti() + +#endif diff --git a/pfinet/config.h b/pfinet/config.h new file mode 100644 index 00000000..eb8314ef --- /dev/null +++ b/pfinet/config.h @@ -0,0 +1,28 @@ +#define __KERNEL__ +#define _HURD_ +#define ENONET ENETUNREACH + +#define CONFIG_INET +#define CONFIG_NET + +/* #undef CONFIG_INET_RARP */ +/* #undef CONFIG_IP_MULTICAST */ +/* #undef CONFIG_IP_FORWARD */ +/* #undef CONFIG_IP_FIREWALL */ +/* #undef CONFIG_IP_FIREWALL_DEBUG */ +/* #undef CONFIG_IP_FIREWALL_VERBOSE */ +/* #undef DEBUG_CONFIG_IP_FIREWALL */ +/* #undef CONFIG_IP_ACCT */ + +/* #undef CONFIG_SKB_CHECK */ + +/* #undef CONFIG_TCP_NAGLE_OFF */ +/* #undef CONFIG_AX25 */ +/* #undef CONFIG_IPX */ +/* #undef CONFIG_ATALK */ +/* #undef CONFIG_SLAVE_BALANCING */ + +/* #undef CONFIG_INET_PCTCP */ +/* #undef CONFIG_INET_SNARL */ + +/* #undef CONFIG_I_AM_A_BROKEN_BSD_WEENIE */ diff --git a/pfinet/devices.c b/pfinet/devices.c new file mode 100644 index 00000000..34d865a6 --- /dev/null +++ b/pfinet/devices.c @@ -0,0 +1,60 @@ +/* + Copyright (C) 1995, 1996, 1999 Free Software Foundation, Inc. + Written by Michael I. Bushnell, p/BSG. + + This file is part of the GNU Hurd. + + The GNU Hurd is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2, or (at + your option) any later version. + + The GNU Hurd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. */ + +#include "pfinet.h" + +struct device *dev_base; +struct device loopback_dev; + +device_t master_device; + +static struct condition more_packets = CONDITION_INITIALIZER; + +void +init_devices (void) +{ + error_t err; + + err = get_privileged_ports (0, &master_device); + if (err) + { + perror ("Cannot fetch master device port"); + exit (1); + } + + dev_base = 0; +} + +void +mark_bh (int arg) +{ + condition_broadcast (&more_packets); +} + +any_t +input_work_thread (any_t arg) +{ + mutex_lock (&global_lock); + for (;;) + { + condition_wait (&more_packets, &global_lock); + net_bh (0); + } +} diff --git a/pfinet/ethernet.c b/pfinet/ethernet.c new file mode 100644 index 00000000..85162fd8 --- /dev/null +++ b/pfinet/ethernet.c @@ -0,0 +1,229 @@ +/* + Copyright (C) 1995, 1996, 1998, 1999 Free Software Foundation, Inc. + Written by Michael I. Bushnell, p/BSG. + + This file is part of the GNU Hurd. + + The GNU Hurd is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2, or (at + your option) any later version. + + The GNU Hurd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. */ + +#include <device/device.h> +#include <device/net_status.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <netinet/in.h> +#include <string.h> +#include <error.h> + +#include "pfinet.h" + +device_t ether_port; + +struct port_class *etherreadclass; +struct port_info *readpt; +mach_port_t readptname; + +struct device ether_dev; + +struct enet_statistics retbuf; + + +/* Mach doesn't provide this. DAMN. */ +struct enet_statistics * +ethernet_get_stats (struct device *dev) +{ + return &retbuf; +} + +int +ethernet_stop (struct device *dev) +{ + return 0; +} + +void +ethernet_set_multi (struct device *dev, int numaddrs, void *addrs) +{ + assert (numaddrs == 0); +} + +static short ether_filter[] = +{ + NETF_PUSHLIT | NETF_NOP, + 1 +}; +static int ether_filter_len = sizeof (ether_filter) / sizeof (short); + +static struct port_bucket *etherport_bucket; + + +any_t +ethernet_thread (any_t arg) +{ + ports_manage_port_operations_one_thread (etherport_bucket, + ethernet_demuxer, + 0); + return 0; +} + +int +ethernet_demuxer (mach_msg_header_t *inp, + mach_msg_header_t *outp) +{ + struct net_rcv_msg *msg = (struct net_rcv_msg *) inp; + struct sk_buff *skb; + int datalen; + + if (inp->msgh_id != NET_RCV_MSG_ID) + return 0; + + if (inp->msgh_local_port != readptname) + { + if (inp->msgh_remote_port != MACH_PORT_NULL) + mach_port_deallocate (mach_task_self (), inp->msgh_remote_port); + return 1; + } + + datalen = ETH_HLEN + + msg->packet_type.msgt_number - sizeof (struct packet_header); + + mutex_lock (&global_lock); + skb = alloc_skb (datalen, GFP_ATOMIC); + skb->len = datalen; + skb->dev = ðer_dev; + + /* Copy the two parts of the frame into the buffer. */ + bcopy (msg->header, skb->data, ETH_HLEN); + bcopy (msg->packet + sizeof (struct packet_header), + skb->data + ETH_HLEN, + datalen - ETH_HLEN); + + /* Drop it on the queue. */ + netif_rx (skb); + mutex_unlock (&global_lock); + + return 1; +} + + +int +ethernet_open (struct device *dev) +{ + error_t err; + + if (ether_port != MACH_PORT_NULL) + return 0; + + etherreadclass = ports_create_class (0, 0); + errno = ports_create_port (etherreadclass, etherport_bucket, + sizeof (struct port_info), &readpt); + assert_perror (errno); + readptname = ports_get_right (readpt); + mach_port_insert_right (mach_task_self (), readptname, readptname, + MACH_MSG_TYPE_MAKE_SEND); + + mach_port_set_qlimit (mach_task_self (), readptname, MACH_PORT_QLIMIT_MAX); + + err = device_open (master_device, D_WRITE | D_READ, dev->name, ðer_port); + if (err) + error (2, err, "%s", dev->name); + + err = device_set_filter (ether_port, ports_get_right (readpt), + MACH_MSG_TYPE_MAKE_SEND, 0, + ether_filter, ether_filter_len); + if (err) + error (2, err, "%s", dev->name); + cthread_detach (cthread_fork (ethernet_thread, 0)); + return 0; +} + + +/* Transmit an ethernet frame */ +int +ethernet_xmit (struct sk_buff *skb, struct device *dev) +{ + u_int count; + int err; + + err = device_write (ether_port, D_NOWAIT, 0, skb->data, skb->len, &count); + assert (err == 0); + assert (count == skb->len); + dev_kfree_skb (skb, FREE_WRITE); + return 0; +} + +void +setup_ethernet_device (char *name) +{ + struct net_status netstat; + u_int count; + int net_address[2]; + int i; + error_t err; + + etherport_bucket = ports_create_bucket (); + + /* Interface buffers. */ + ether_dev.name = name; + for (i = 0; i < DEV_NUMBUFFS; i++) + skb_queue_head_init (ðer_dev.buffs[i]); + + /* Functions */ + ether_dev.open = ethernet_open; + ether_dev.stop = ethernet_stop; + ether_dev.hard_start_xmit = ethernet_xmit; + ether_dev.hard_header = eth_header; + ether_dev.rebuild_header = eth_rebuild_header; + ether_dev.type_trans = eth_type_trans; + ether_dev.get_stats = ethernet_get_stats; + ether_dev.set_multicast_list = ethernet_set_multi; + + /* Some more fields */ + ether_dev.type = ARPHRD_ETHER; + ether_dev.hard_header_len = sizeof (struct ethhdr); + ether_dev.addr_len = ETH_ALEN; + for (i = 0; i < 6; i++) + ether_dev.broadcast[i] = 0xff; + ether_dev.flags = IFF_BROADCAST | IFF_MULTICAST; + ether_dev.family = AF_INET; /* hmm. */ + ether_dev.pa_addr = ether_dev.pa_brdaddr = ether_dev.pa_mask = 0; + ether_dev.pa_alen = sizeof (unsigned long); + + ethernet_open (ðer_dev); + + /* Fetch hardware information */ + count = NET_STATUS_COUNT; + err = device_get_status (ether_port, NET_STATUS, + (dev_status_t) &netstat, &count); + if (err) + error (2, err, "%s: Cannot get device status", name); + ether_dev.mtu = netstat.max_packet_size - ether_dev.hard_header_len; + assert (netstat.header_format == HDR_ETHERNET); + assert (netstat.header_size == ETH_HLEN); + assert (netstat.address_size == ETH_ALEN); + + count = 2; + assert (count * sizeof (int) >= ETH_ALEN); + err = device_get_status (ether_port, NET_ADDRESS, net_address, &count); + if (err) + error (2, err, "%s: Cannot get hardware Ethernet address", name); + net_address[0] = ntohl (net_address[0]); + net_address[1] = ntohl (net_address[1]); + bcopy (net_address, ether_dev.dev_addr, ETH_ALEN); + + /* That should be enough. */ + + ether_dev.next = dev_base; + dev_base = ðer_dev; +} diff --git a/pfinet/io-ops.c b/pfinet/io-ops.c new file mode 100644 index 00000000..b6831c31 --- /dev/null +++ b/pfinet/io-ops.c @@ -0,0 +1,640 @@ +/* + Copyright (C) 1995, 1996, 1997, 1998, 1999 Free Software Foundation, Inc. + Written by Michael I. Bushnell, p/BSG. + + This file is part of the GNU Hurd. + + The GNU Hurd is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2, or (at + your option) any later version. + + The GNU Hurd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. */ + +#include "pfinet.h" +#include "io_S.h" +#include <netinet/in.h> +#include <linux/wait.h> +#include <linux-inet/sock.h> +#include <fcntl.h> +#include <string.h> +#include <unistd.h> +#include <mach/notify.h> +#include <sys/mman.h> + +error_t +S_io_write (struct sock_user *user, + char *data, + u_int datalen, + off_t offset, + mach_msg_type_number_t *amount) +{ + error_t err; + + if (!user) + return EOPNOTSUPP; + + mutex_lock (&global_lock); + become_task (user); + err = (*user->sock->ops->write) (user->sock, data, datalen, + user->sock->userflags); + mutex_unlock (&global_lock); + + if (err < 0) + err = -err; + else + { + *amount = err; + err = 0; + } + + return err; +} + +error_t +S_io_read (struct sock_user *user, + char **data, + u_int *datalen, + off_t offset, + mach_msg_type_number_t amount) +{ + error_t err; + int alloced = 0; + + if (!user) + return EOPNOTSUPP; + + /* Instead of this, we should peek and the socket and only + allocate as much as necessary. */ + if (amount > *datalen) + { + *data = mmap (0, amount, PROT_READ|PROT_WRITE, MAP_ANON, 0, 0); + alloced = 1; + } + + mutex_lock (&global_lock); + become_task (user); + err = (*user->sock->ops->read) (user->sock, *data, amount, + user->sock->userflags); + mutex_unlock (&global_lock); + + if (err < 0) + err = -err; + else + { + *datalen = err; + if (alloced && round_page (*datalen) < round_page (amount)) + munmap (*data + round_page (*datalen), + round_page (amount) - round_page (*datalen)); + err = 0; + } + return err; +} + +error_t +S_io_seek (struct sock_user *user, + off_t offset, + int whence, + off_t *newp) +{ + return user ? ESPIPE : EOPNOTSUPP; +} + +error_t +S_io_readable (struct sock_user *user, + mach_msg_type_number_t *amount) +{ + struct sock *sk; + error_t err; + + if (!user) + return EOPNOTSUPP; + + mutex_lock (&global_lock); + become_task (user); + + /* We need to avoid calling the Linux ioctl routines, + so here is a rather ugly break of modularity. */ + + sk = (struct sock *) user->sock->data; + err = 0; + + /* Linux's af_inet.c ioctl routine just calls the protocol-specific + ioctl routine; it's those routines that we need to simulate. So + this switch corresponds to the initialization of SK->prot in + af_inet.c:inet_create. */ + switch (user->sock->type) + { + case SOCK_STREAM: + case SOCK_SEQPACKET: + /* These guts are copied from tcp.c:tcp_ioctl. */ + if (sk->state == TCP_LISTEN) + err = EINVAL; + else + { + sk->inuse = 1; + *amount = tcp_readable (sk); + release_sock (sk); + } + break; + + case SOCK_DGRAM: + /* These guts are copied from udp.c:udp_ioctl (TIOCINQ). */ + if (sk->state == TCP_LISTEN) + err = EINVAL; + else + /* Boy, I really love the C language. */ + *amount = (skb_peek (&sk->receive_queue) + ? : &((struct sk_buff){}))->len; + break; + + case SOCK_RAW: + default: + err = EOPNOTSUPP; + break; + } + + mutex_unlock (&global_lock); + return err; +} + +error_t +S_io_set_all_openmodes (struct sock_user *user, + int bits) +{ + if (!user) + return EOPNOTSUPP; + + mutex_lock (&global_lock); + if (bits & O_NONBLOCK) + user->sock->userflags |= O_NONBLOCK; + else + user->sock->userflags &= ~O_NONBLOCK; + mutex_unlock (&global_lock); + return 0; +} + +error_t +S_io_get_openmodes (struct sock_user *user, + int *bits) +{ + struct sock *sk; + + if (!user) + return EOPNOTSUPP; + + mutex_lock (&global_lock); + sk = user->sock->data; + + *bits = 0; + if (!(sk->shutdown & SEND_SHUTDOWN)) + *bits |= O_WRITE; + if (!(sk->shutdown & RCV_SHUTDOWN)) + *bits |= O_READ; + if (user->sock->userflags & O_NONBLOCK) + *bits |= O_NONBLOCK; + + mutex_unlock (&global_lock); + return 0; +} + +error_t +S_io_set_some_openmodes (struct sock_user *user, + int bits) +{ + if (!user) + return EOPNOTSUPP; + + mutex_lock (&global_lock); + if (bits & O_NONBLOCK) + user->sock->userflags |= O_NONBLOCK; + mutex_unlock (&global_lock); + return 0; +} + +error_t +S_io_clear_some_openmodes (struct sock_user *user, + int bits) +{ + if (!user) + return EOPNOTSUPP; + + mutex_lock (&global_lock); + if (bits & O_NONBLOCK) + user->sock->userflags &= ~O_NONBLOCK; + mutex_unlock (&global_lock); + return 0; +} + +error_t +S_io_select (struct sock_user *user, + mach_port_t reply, mach_msg_type_name_t reply_type, + int *select_type) +{ + int avail = 0; + int cancel = 0; + int requested_notify = 0; + select_table table; + struct select_table_elt *elt, *nxt; + + if (!user) + return EOPNOTSUPP; + + mutex_lock (&global_lock); + become_task (user); + + /* In Linux, this means (supposedly) that I/O will never be possible. + That's a lose, so prevent it from happening. */ + assert (user->sock->ops->select); + + /* The select function returns one if the specified I/O type is + immediately possible. If it returns zero, then it is not + immediately possible, and it has called select_wait. Eventually + it will wakeup the wait queue specified in the select_wait call; + at that point we should retry the call. */ + + for (;;) + { + condition_init (&table.master_condition); + table.head = 0; + + if (*select_type & SELECT_READ) + avail |= ((*user->sock->ops->select) (user->sock, SEL_IN, &table) + ? SELECT_READ : 0); + if (*select_type & SELECT_WRITE) + avail |= ((*user->sock->ops->select) (user->sock, SEL_OUT, &table) + ? SELECT_WRITE : 0); + if (*select_type & SELECT_URG) + avail |= ((*user->sock->ops->select) (user->sock, SEL_EX, &table) + ? SELECT_URG : 0); + + if (!avail) + { + if (! requested_notify) + { + ports_interrupt_self_on_notification (user, reply, + MACH_NOTIFY_DEAD_NAME); + requested_notify = 1; + } + cancel = hurd_condition_wait (&table.master_condition, &global_lock); + } + + /* Drop the conditions implications and structures allocated in the + select table. */ + for (elt = table.head; elt; elt = nxt) + { + condition_unimplies (elt->dependent_condition, + &table.master_condition); + nxt = elt->next; + free (elt); + } + + if (avail) + { + mutex_unlock (&global_lock); + *select_type = avail; + return 0; + } + + if (cancel) + { + mutex_unlock (&global_lock); + mach_port_deallocate (mach_task_self (), reply); + return EINTR; + } + } +} + +/* Establish that the condition in WAIT_ADDRESS should imply + the condition in P. Also, add us to the queue in P so + that the relation can be undone at the proper time. */ +void +select_wait (struct wait_queue **wait_address, select_table *p) +{ + struct select_table_elt *elt; + + /* tcp.c happens to use an uninitalized wait queue; + so this special hack is for that. */ + if (*wait_address == 0) + { + *wait_address = malloc (sizeof (struct wait_queue)); + condition_init (&(*wait_address)->c); + } + + elt = malloc (sizeof (struct select_table_elt)); + elt->dependent_condition = &(*wait_address)->c; + elt->next = p->head; + p->head = elt; + + condition_implies (elt->dependent_condition, &p->master_condition); +} + +error_t +S_io_stat (struct sock_user *user, + struct stat *st) +{ + if (!user) + return EOPNOTSUPP; + + bzero (st, sizeof (struct stat)); + + st->st_fstype = FSTYPE_SOCKET; + st->st_fsid = getpid (); + st->st_ino = (ino_t) user->sock; /* why not? */ + + st->st_blksize = 512; /* ???? */ + return 0; +} + +error_t +S_io_reauthenticate (struct sock_user *user, + mach_port_t rend) +{ + struct sock_user *newuser; + uid_t gubuf[20], ggbuf[20], aubuf[20], agbuf[20]; + uid_t *gen_uids, *gen_gids, *aux_uids, *aux_gids; + u_int genuidlen, gengidlen, auxuidlen, auxgidlen; + error_t err; + int i; + auth_t auth; + mach_port_t newright; + + if (!user) + return EOPNOTSUPP; + + genuidlen = gengidlen = auxuidlen = auxgidlen = 20; + gen_uids = gubuf; + gen_gids = ggbuf; + aux_uids = aubuf; + aux_gids = agbuf; + + mutex_lock (&global_lock); + newuser = make_sock_user (user->sock, 0, 1); + + auth = getauth (); + newright = ports_get_right (newuser); + err = mach_port_insert_right (mach_task_self (), newright, newright, + MACH_MSG_TYPE_MAKE_SEND); + assert_perror (err); + do + err = auth_server_authenticate (auth, + rend, + MACH_MSG_TYPE_COPY_SEND, + newright, + MACH_MSG_TYPE_COPY_SEND, + &gen_uids, &genuidlen, + &aux_uids, &auxuidlen, + &gen_gids, &gengidlen, + &aux_gids, &auxgidlen); + while (err == EINTR); + mach_port_deallocate (mach_task_self (), rend); + mach_port_deallocate (mach_task_self (), newright); + mach_port_deallocate (mach_task_self (), auth); + + if (err) + newuser->isroot = 0; + else + for (i = 0; i < genuidlen; i++) + if (gen_uids[i] == 0) + newuser->isroot = 1; + + mach_port_move_member (mach_task_self (), newuser->pi.port_right, + pfinet_bucket->portset); + + mutex_unlock (&global_lock); + + ports_port_deref (newuser); + + if (gubuf != gen_uids) + munmap (gen_uids, genuidlen * sizeof (uid_t)); + if (ggbuf != gen_gids) + munmap (gen_gids, gengidlen * sizeof (uid_t)); + if (aubuf != aux_uids) + munmap (aux_uids, auxuidlen * sizeof (uid_t)); + if (agbuf != aux_gids) + munmap (aux_gids, auxgidlen * sizeof (uid_t)); + + return 0; +} + +error_t +S_io_restrict_auth (struct sock_user *user, + mach_port_t *newobject, + mach_msg_type_name_t *newobject_type, + uid_t *uids, + u_int uidslen, + uid_t *gids, + u_int gidslen) +{ + struct sock_user *newuser; + int i = 0; + int isroot; + + if (!user) + return EOPNOTSUPP; + + mutex_lock (&global_lock); + + isroot = 0; + if (user->isroot) + for (i = 0; i < uidslen && !isroot; i++) + if (uids[i] == 0) + isroot = 1; + + newuser = make_sock_user (user->sock, isroot, 0); + *newobject = ports_get_right (newuser); + *newobject_type = MACH_MSG_TYPE_MAKE_SEND; + ports_port_deref (newuser); + mutex_unlock (&global_lock); + return 0; +} + +error_t +S_io_duplicate (struct sock_user *user, + mach_port_t *newobject, + mach_msg_type_name_t *newobject_type) +{ + struct sock_user *newuser; + if (!user) + return EOPNOTSUPP; + + mutex_lock (&global_lock); + newuser = make_sock_user (user->sock, user->isroot, 0); + *newobject = ports_get_right (newuser); + *newobject_type = MACH_MSG_TYPE_MAKE_SEND; + ports_port_deref (newuser); + mutex_unlock (&global_lock); + return 0; +} + +error_t +S_io_identity (struct sock_user *user, + mach_port_t *id, + mach_msg_type_name_t *idtype, + mach_port_t *fsys, + mach_msg_type_name_t *fsystype, + int *fileno) +{ + error_t err; + + if (!user) + return EOPNOTSUPP; + + mutex_lock (&global_lock); + if (user->sock->identity == MACH_PORT_NULL) + { + err = mach_port_allocate (mach_task_self (), MACH_PORT_RIGHT_RECEIVE, + &user->sock->identity); + if (err) + { + mutex_unlock (&global_lock); + return err; + } + } + + *id = user->sock->identity; + *idtype = MACH_MSG_TYPE_MAKE_SEND; + *fsys = fsys_identity; + *fsystype = MACH_MSG_TYPE_MAKE_SEND; + *fileno = (ino_t) user->sock; /* matches S_io_stat above */ + + mutex_unlock (&global_lock); + return 0; +} + +error_t +S_io_revoke (struct sock_user *user) +{ + /* XXX maybe we should try */ + return EOPNOTSUPP; +} + + + +error_t +S_io_async (struct sock_user *user, + mach_port_t notify, + mach_port_t *id, + mach_msg_type_name_t *idtype) +{ + return EOPNOTSUPP; +} + +error_t +S_io_mod_owner (struct sock_user *user, + pid_t owner) +{ + return EOPNOTSUPP; +} + +error_t +S_io_get_owner (struct sock_user *user, + pid_t *owner) +{ + return EOPNOTSUPP; +} + +error_t +S_io_get_icky_async_id (struct sock_user *user, + mach_port_t *id, + mach_msg_type_name_t *idtype) +{ + return EOPNOTSUPP; +} + +error_t +S_io_server_version (struct sock_user *user, + char *name, + int *major, + int *minor, + int *edit) +{ + return EOPNOTSUPP; +} + +error_t +S_io_pathconf (struct sock_user *user, + int name, + int *value) +{ + return EOPNOTSUPP; +} + + + +error_t +S_io_map (struct sock_user *user, + mach_port_t *rdobj, + mach_msg_type_name_t *rdobj_type, + mach_port_t *wrobj, + mach_msg_type_name_t *wrobj_type) +{ + return EOPNOTSUPP; +} + +error_t +S_io_map_cntl (struct sock_user *user, + mach_port_t *obj, + mach_msg_type_name_t *obj_type) +{ + return EOPNOTSUPP; +} + +error_t +S_io_get_conch (struct sock_user *user) +{ + return EOPNOTSUPP; +} + +error_t +S_io_release_conch (struct sock_user *user) +{ + return EOPNOTSUPP; +} + +error_t +S_io_eofnotify (struct sock_user *user) +{ + return EOPNOTSUPP; +} + +error_t +S_io_prenotify (struct sock_user *user, + vm_offset_t start, + vm_offset_t end) +{ + return EOPNOTSUPP; +} + +error_t +S_io_postnotify (struct sock_user *user, + vm_offset_t start, + vm_offset_t end) +{ + return EOPNOTSUPP; +} + +error_t +S_io_readnotify (struct sock_user *user) +{ + return EOPNOTSUPP; +} + +error_t +S_io_readsleep (struct sock_user *user) +{ + return EOPNOTSUPP; +} + +error_t +S_io_sigio (struct sock_user *user) +{ + return EOPNOTSUPP; +} diff --git a/pfinet/linux-inet/af_inet.c b/pfinet/linux-inet/af_inet.c new file mode 100644 index 00000000..d20c8bfb --- /dev/null +++ b/pfinet/linux-inet/af_inet.c @@ -0,0 +1,1578 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * AF_INET protocol family socket handler. + * + * Version: @(#)af_inet.c (from sock.c) 1.0.17 06/02/93 + * + * Authors: Ross Biro, <bir7@leland.Stanford.Edu> + * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> + * Florian La Roche, <flla@stud.uni-sb.de> + * Alan Cox, <A.Cox@swansea.ac.uk> + * + * Changes (see also sock.c) + * + * A.N.Kuznetsov : Socket death error in accept(). + * John Richardson : Fix non blocking error in connect() + * so sockets that fail to connect + * don't return -EINPROGRESS. + * Alan Cox : Asynchronous I/O support + * Alan Cox : Keep correct socket pointer on sock structures + * when accept() ed + * Alan Cox : Semantics of SO_LINGER aren't state moved + * to close when you look carefully. With + * this fixed and the accept bug fixed + * some RPC stuff seems happier. + * Niibe Yutaka : 4.4BSD style write async I/O + * Alan Cox, + * Tony Gale : Fixed reuse semantics. + * Alan Cox : bind() shouldn't abort existing but dead + * sockets. Stops FTP netin:.. I hope. + * Alan Cox : bind() works correctly for RAW sockets. Note + * that FreeBSD at least is broken in this respect + * so be careful with compatibility tests... + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/config.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/in.h> +#include <linux/kernel.h> +#include <linux/major.h> +#include <linux/sched.h> +#include <linux/timer.h> +#include <linux/string.h> +#include <linux/sockios.h> +#include <linux/net.h> +#include <linux/fcntl.h> +#include <linux/mm.h> +#include <linux/interrupt.h> + +#include <asm/segment.h> +#include <asm/system.h> + +#include <linux/inet.h> +#include <linux/netdevice.h> +#include "ip.h" +#include "protocol.h" +#include "arp.h" +#include "rarp.h" +#include "route.h" +#include "tcp.h" +#include "udp.h" +#include <linux/skbuff.h> +#include "sock.h" +#include "raw.h" +#include "icmp.h" + +#define min(a,b) ((a)<(b)?(a):(b)) + +extern struct proto packet_prot; + + +/* + * See if a socket number is in use. + */ + +static int sk_inuse(struct proto *prot, int num) +{ + struct sock *sk; + + for(sk = prot->sock_array[num & (SOCK_ARRAY_SIZE -1 )]; + sk != NULL; sk=sk->next) + { + if (sk->num == num) + return(1); + } + return(0); +} + + +/* + * Pick a new socket number + */ + +unsigned short get_new_socknum(struct proto *prot, unsigned short base) +{ + static int start=0; + + /* + * Used to cycle through the port numbers so the + * chances of a confused connection drop. + */ + + int i, j; + int best = 0; + int size = 32767; /* a big num. */ + struct sock *sk; + + if (base == 0) + base = PROT_SOCK+1+(start % 1024); + if (base <= PROT_SOCK) + { + base += PROT_SOCK+(start % 1024); + } + + /* Now look through the entire array and try to find an empty ptr. */ + for(i=0; i < SOCK_ARRAY_SIZE; i++) + { + j = 0; + sk = prot->sock_array[(i+base+1) &(SOCK_ARRAY_SIZE -1)]; + while(sk != NULL) + { + sk = sk->next; + j++; + } + if (j == 0) + { + start =(i+1+start )%1024; + return(i+base+1); + } + if (j < size) + { + best = i; + size = j; + } + } + + /* Now make sure the one we want is not in use. */ + + while(sk_inuse(prot, base +best+1)) + { + best += SOCK_ARRAY_SIZE; + } + return(best+base+1); +} + +/* + * Add a socket into the socket tables by number. + */ + +void put_sock(unsigned short num, struct sock *sk) +{ + struct sock *sk1; + struct sock *sk2; + int mask; + unsigned long flags; + + sk->num = num; + sk->next = NULL; + num = num &(SOCK_ARRAY_SIZE -1); + + /* We can't have an interrupt re-enter here. */ + save_flags(flags); + cli(); + + sk->prot->inuse += 1; + if (sk->prot->highestinuse < sk->prot->inuse) + sk->prot->highestinuse = sk->prot->inuse; + + if (sk->prot->sock_array[num] == NULL) + { + sk->prot->sock_array[num] = sk; + restore_flags(flags); + return; + } + restore_flags(flags); + for(mask = 0xff000000; mask != 0xffffffff; mask = (mask >> 8) | mask) + { + if ((mask & sk->saddr) && + (mask & sk->saddr) != (mask & 0xffffffff)) + { + mask = mask << 8; + break; + } + } + cli(); + sk1 = sk->prot->sock_array[num]; + for(sk2 = sk1; sk2 != NULL; sk2=sk2->next) + { + if (!(sk2->saddr & mask)) + { + if (sk2 == sk1) + { + sk->next = sk->prot->sock_array[num]; + sk->prot->sock_array[num] = sk; + sti(); + return; + } + sk->next = sk2; + sk1->next= sk; + sti(); + return; + } + sk1 = sk2; + } + + /* Goes at the end. */ + sk->next = NULL; + sk1->next = sk; + sti(); +} + +/* + * Remove a socket from the socket tables. + */ + +static void remove_sock(struct sock *sk1) +{ + struct sock *sk2; + unsigned long flags; + + if (!sk1->prot) + { + printk("sock.c: remove_sock: sk1->prot == NULL\n"); + return; + } + + /* We can't have this changing out from under us. */ + save_flags(flags); + cli(); + sk2 = sk1->prot->sock_array[sk1->num &(SOCK_ARRAY_SIZE -1)]; + if (sk2 == sk1) + { + sk1->prot->inuse -= 1; + sk1->prot->sock_array[sk1->num &(SOCK_ARRAY_SIZE -1)] = sk1->next; + restore_flags(flags); + return; + } + + while(sk2 && sk2->next != sk1) + { + sk2 = sk2->next; + } + + if (sk2) + { + sk1->prot->inuse -= 1; + sk2->next = sk1->next; + restore_flags(flags); + return; + } + restore_flags(flags); +} + +/* + * Destroy an AF_INET socket + */ + +void destroy_sock(struct sock *sk) +{ + struct sk_buff *skb; + + sk->inuse = 1; /* just to be safe. */ + + /* In case it's sleeping somewhere. */ + if (!sk->dead) + sk->write_space(sk); + + remove_sock(sk); + + /* Now we can no longer get new packets. */ + delete_timer(sk); + /* Nor send them */ + del_timer(&sk->retransmit_timer); + + while ((skb = tcp_dequeue_partial(sk)) != NULL) { + IS_SKB(skb); + kfree_skb(skb, FREE_WRITE); + } + + /* Cleanup up the write buffer. */ + while((skb = skb_dequeue(&sk->write_queue)) != NULL) { + IS_SKB(skb); + kfree_skb(skb, FREE_WRITE); + } + + /* + * Don't discard received data until the user side kills its + * half of the socket. + */ + + if (sk->dead) + { + while((skb=skb_dequeue(&sk->receive_queue))!=NULL) + { + /* + * This will take care of closing sockets that were + * listening and didn't accept everything. + */ + if (skb->sk != NULL && skb->sk != sk) + { + IS_SKB(skb); + skb->sk->dead = 1; + skb->sk->prot->close(skb->sk, 0); + } + IS_SKB(skb); + kfree_skb(skb, FREE_READ); + } + } + + /* Now we need to clean up the send head. */ + cli(); + for(skb = sk->send_head; skb != NULL; ) + { + struct sk_buff *skb2; + + /* + * We need to remove skb from the transmit queue, + * or maybe the arp queue. + */ + if (skb->next && skb->prev) { +/* printk("destroy_sock: unlinked skb\n");*/ + IS_SKB(skb); + skb_unlink(skb); + } + skb->dev = NULL; + skb2 = skb->link3; + kfree_skb(skb, FREE_WRITE); + skb = skb2; + } + sk->send_head = NULL; + sti(); + + /* And now the backlog. */ + while((skb=skb_dequeue(&sk->back_log))!=NULL) + { + /* this should never happen. */ +/* printk("cleaning back_log\n");*/ + kfree_skb(skb, FREE_READ); + } + + /* Now if it has a half accepted/ closed socket. */ + if (sk->pair) + { + sk->pair->dead = 1; + sk->pair->prot->close(sk->pair, 0); + sk->pair = NULL; + } + + /* + * Now if everything is gone we can free the socket + * structure, otherwise we need to keep it around until + * everything is gone. + */ + + if (sk->dead && sk->rmem_alloc == 0 && sk->wmem_alloc == 0) + { + kfree_s((void *)sk,sizeof(*sk)); + } + else + { + /* this should never happen. */ + /* actually it can if an ack has just been sent. */ + sk->destroy = 1; + sk->ack_backlog = 0; + sk->inuse = 0; + reset_timer(sk, TIME_DESTROY, SOCK_DESTROY_TIME); + } +} + +/* + * The routines beyond this point handle the behaviour of an AF_INET + * socket object. Mostly it punts to the subprotocols of IP to do + * the work. + */ + +static int inet_fcntl(struct socket *sock, unsigned int cmd, unsigned long arg) +{ + struct sock *sk; + + sk = (struct sock *) sock->data; + + switch(cmd) + { + case F_SETOWN: + /* + * This is a little restrictive, but it's the only + * way to make sure that you can't send a sigurg to + * another process. + */ + if (!suser() && current->pgrp != -arg && + current->pid != arg) return(-EPERM); + sk->proc = arg; + return(0); + case F_GETOWN: + return(sk->proc); + default: + return(-EINVAL); + } +} + +/* + * Set socket options on an inet socket. + */ + +static int inet_setsockopt(struct socket *sock, int level, int optname, + char *optval, int optlen) +{ + struct sock *sk = (struct sock *) sock->data; + if (level == SOL_SOCKET) + return sock_setsockopt(sk,level,optname,optval,optlen); + if (sk->prot->setsockopt==NULL) + return(-EOPNOTSUPP); + else + return sk->prot->setsockopt(sk,level,optname,optval,optlen); +} + +/* + * Get a socket option on an AF_INET socket. + */ + +static int inet_getsockopt(struct socket *sock, int level, int optname, + char *optval, int *optlen) +{ + struct sock *sk = (struct sock *) sock->data; + if (level == SOL_SOCKET) + return sock_getsockopt(sk,level,optname,optval,optlen); + if(sk->prot->getsockopt==NULL) + return(-EOPNOTSUPP); + else + return sk->prot->getsockopt(sk,level,optname,optval,optlen); +} + +/* + * Automatically bind an unbound socket. + */ + +static int inet_autobind(struct sock *sk) +{ + /* We may need to bind the socket. */ + if (sk->num == 0) + { + sk->num = get_new_socknum(sk->prot, 0); + if (sk->num == 0) + return(-EAGAIN); + put_sock(sk->num, sk); + sk->dummy_th.source = ntohs(sk->num); + } + return 0; +} + +/* + * Move a socket into listening state. + */ + +static int inet_listen(struct socket *sock, int backlog) +{ + struct sock *sk = (struct sock *) sock->data; + + if(inet_autobind(sk)!=0) + return -EAGAIN; + + /* We might as well re use these. */ + /* + * note that the backlog is "unsigned char", so truncate it + * somewhere. We might as well truncate it to what everybody + * else does.. + */ + if (backlog > 5) + backlog = 5; + sk->max_ack_backlog = backlog; + if (sk->state != TCP_LISTEN) + { + sk->ack_backlog = 0; + sk->state = TCP_LISTEN; + } + return(0); +} + +/* + * Default callbacks for user INET sockets. These just wake up + * the user owning the socket. + */ + +static void def_callback1(struct sock *sk) +{ + if(!sk->dead) + wake_up_interruptible(sk->sleep); +} + +static void def_callback2(struct sock *sk,int len) +{ + if(!sk->dead) + { + wake_up_interruptible(sk->sleep); + sock_wake_async(sk->socket, 1); + } +} + +static void def_callback3(struct sock *sk) +{ + if(!sk->dead) + { + wake_up_interruptible(sk->sleep); + sock_wake_async(sk->socket, 2); + } +} + +/* + * Create an inet socket. + * + * FIXME: Gcc would generate much better code if we set the parameters + * up in in-memory structure order. Gcc68K even more so + */ + +static int inet_create(struct socket *sock, int protocol) +{ + struct sock *sk; + struct proto *prot; + int err; + + sk = (struct sock *) kmalloc(sizeof(*sk), GFP_KERNEL); + if (sk == NULL) + return(-ENOBUFS); + sk->num = 0; + sk->reuse = 0; + switch(sock->type) + { + case SOCK_STREAM: + case SOCK_SEQPACKET: + if (protocol && protocol != IPPROTO_TCP) + { + kfree_s((void *)sk, sizeof(*sk)); + return(-EPROTONOSUPPORT); + } + protocol = IPPROTO_TCP; + sk->no_check = TCP_NO_CHECK; + prot = &tcp_prot; + break; + + case SOCK_DGRAM: + if (protocol && protocol != IPPROTO_UDP) + { + kfree_s((void *)sk, sizeof(*sk)); + return(-EPROTONOSUPPORT); + } + protocol = IPPROTO_UDP; + sk->no_check = UDP_NO_CHECK; + prot=&udp_prot; + break; + + case SOCK_RAW: + if (!suser()) + { + kfree_s((void *)sk, sizeof(*sk)); + return(-EPERM); + } + if (!protocol) + { + kfree_s((void *)sk, sizeof(*sk)); + return(-EPROTONOSUPPORT); + } + prot = &raw_prot; + sk->reuse = 1; + sk->no_check = 0; /* + * Doesn't matter no checksum is + * performed anyway. + */ + sk->num = protocol; + break; + +#ifndef _HURD_ + case SOCK_PACKET: + if (!suser()) + { + kfree_s((void *)sk, sizeof(*sk)); + return(-EPERM); + } + if (!protocol) + { + kfree_s((void *)sk, sizeof(*sk)); + return(-EPROTONOSUPPORT); + } + prot = &packet_prot; + sk->reuse = 1; + sk->no_check = 0; /* Doesn't matter no checksum is + * performed anyway. + */ + sk->num = protocol; + break; +#endif + + default: + kfree_s((void *)sk, sizeof(*sk)); + return(-ESOCKTNOSUPPORT); + } + sk->socket = sock; +#ifdef CONFIG_TCP_NAGLE_OFF + sk->nonagle = 1; +#else + sk->nonagle = 0; +#endif + sk->type = sock->type; + sk->stamp.tv_sec=0; + sk->protocol = protocol; + sk->wmem_alloc = 0; + sk->rmem_alloc = 0; + sk->sndbuf = SK_WMEM_MAX; + sk->rcvbuf = SK_RMEM_MAX; + sk->pair = NULL; + sk->opt = NULL; + sk->write_seq = 0; + sk->acked_seq = 0; + sk->copied_seq = 0; + sk->fin_seq = 0; + sk->urg_seq = 0; + sk->urg_data = 0; + sk->proc = 0; + sk->rtt = 0; /*TCP_WRITE_TIME << 3;*/ + sk->rto = TCP_TIMEOUT_INIT; /*TCP_WRITE_TIME*/ + sk->mdev = 0; + sk->backoff = 0; + sk->packets_out = 0; + sk->cong_window = 1; /* start with only sending one packet at a time. */ + sk->cong_count = 0; + sk->ssthresh = 0; + sk->max_window = 0; + sk->urginline = 0; + sk->intr = 0; + sk->linger = 0; + sk->destroy = 0; + sk->priority = 1; + sk->shutdown = 0; + sk->keepopen = 0; + sk->zapped = 0; + sk->done = 0; + sk->ack_backlog = 0; + sk->window = 0; + sk->bytes_rcv = 0; + sk->state = TCP_CLOSE; + sk->dead = 0; + sk->ack_timed = 0; + sk->partial = NULL; + sk->user_mss = 0; + sk->debug = 0; + + /* this is how many unacked bytes we will accept for this socket. */ + sk->max_unacked = 2048; /* needs to be at most 2 full packets. */ + + /* how many packets we should send before forcing an ack. + if this is set to zero it is the same as sk->delay_acks = 0 */ + sk->max_ack_backlog = 0; + sk->inuse = 0; + sk->delay_acks = 0; + skb_queue_head_init(&sk->write_queue); + skb_queue_head_init(&sk->receive_queue); + sk->mtu = 576; + sk->prot = prot; + sk->sleep = sock->wait; + sk->daddr = 0; + sk->saddr = 0 /* ip_my_addr() */; + sk->err = 0; + sk->next = NULL; + sk->pair = NULL; + sk->send_tail = NULL; + sk->send_head = NULL; + sk->timeout = 0; + sk->broadcast = 0; + sk->localroute = 0; + init_timer(&sk->timer); + init_timer(&sk->retransmit_timer); + sk->timer.data = (unsigned long)sk; + sk->timer.function = &net_timer; + skb_queue_head_init(&sk->back_log); + sk->blog = 0; + sock->data =(void *) sk; + sk->dummy_th.doff = sizeof(sk->dummy_th)/4; + sk->dummy_th.res1=0; + sk->dummy_th.res2=0; + sk->dummy_th.urg_ptr = 0; + sk->dummy_th.fin = 0; + sk->dummy_th.syn = 0; + sk->dummy_th.rst = 0; + sk->dummy_th.psh = 0; + sk->dummy_th.ack = 0; + sk->dummy_th.urg = 0; + sk->dummy_th.dest = 0; + sk->ip_tos=0; + sk->ip_ttl=64; +#ifdef CONFIG_IP_MULTICAST + sk->ip_mc_loop=1; + sk->ip_mc_ttl=1; + *sk->ip_mc_name=0; + sk->ip_mc_list=NULL; +#endif + + sk->state_change = def_callback1; + sk->data_ready = def_callback2; + sk->write_space = def_callback3; + sk->error_report = def_callback1; + + if (sk->num) + { + /* + * It assumes that any protocol which allows + * the user to assign a number at socket + * creation time automatically + * shares. + */ + put_sock(sk->num, sk); + sk->dummy_th.source = ntohs(sk->num); + } + + if (sk->prot->init) + { + err = sk->prot->init(sk); + if (err != 0) + { + destroy_sock(sk); + return(err); + } + } + return(0); +} + + +/* + * Duplicate a socket. + */ + +static int inet_dup(struct socket *newsock, struct socket *oldsock) +{ + return(inet_create(newsock,((struct sock *)(oldsock->data))->protocol)); +} + +/* + * Return 1 if we still have things to send in our buffers. + */ +static inline int closing(struct sock * sk) +{ + switch (sk->state) { + case TCP_FIN_WAIT1: + case TCP_CLOSING: + case TCP_LAST_ACK: + return 1; + } + return 0; +} + + +/* + * The peer socket should always be NULL (or else). When we call this + * function we are destroying the object and from then on nobody + * should refer to it. + */ + +static int inet_release(struct socket *sock, struct socket *peer) +{ + struct sock *sk = (struct sock *) sock->data; + if (sk == NULL) + return(0); + + sk->state_change(sk); + + /* Start closing the connection. This may take a while. */ + +#ifdef CONFIG_IP_MULTICAST + /* Applications forget to leave groups before exiting */ + ip_mc_drop_socket(sk); +#endif + /* + * If linger is set, we don't return until the close + * is complete. Other wise we return immediately. The + * actually closing is done the same either way. + * + * If the close is due to the process exiting, we never + * linger.. + */ + + if (sk->linger == 0 || (current->flags & PF_EXITING)) + { + sk->prot->close(sk,0); + sk->dead = 1; + } + else + { + sk->prot->close(sk, 0); + cli(); + if (sk->lingertime) + current->timeout = jiffies + HZ*sk->lingertime; + while(closing(sk) && current->timeout>0) + { + interruptible_sleep_on(sk->sleep); + if (current->signal & ~current->blocked) + { + break; +#if 0 + /* not working now - closes can't be restarted */ + sti(); + current->timeout=0; + return(-ERESTARTSYS); +#endif + } + } + current->timeout=0; + sti(); + sk->dead = 1; + } + sk->inuse = 1; + + /* This will destroy it. */ + release_sock(sk); + sock->data = NULL; + sk->socket = NULL; + return(0); +} + + +/* this needs to be changed to disallow + the rebinding of sockets. What error + should it return? */ + +static int inet_bind(struct socket *sock, struct sockaddr *uaddr, + int addr_len) +{ + struct sockaddr_in *addr=(struct sockaddr_in *)uaddr; + struct sock *sk=(struct sock *)sock->data, *sk2; + unsigned short snum = 0 /* Stoopid compiler.. this IS ok */; + int chk_addr_ret; + + /* check this error. */ + if (sk->state != TCP_CLOSE) + return(-EIO); + if(addr_len<sizeof(struct sockaddr_in)) + return -EINVAL; + + if(sock->type != SOCK_RAW) + { + if (sk->num != 0) + return(-EINVAL); + + snum = ntohs(addr->sin_port); + + /* + * We can't just leave the socket bound wherever it is, it might + * be bound to a privileged port. However, since there seems to + * be a bug here, we will leave it if the port is not privileged. + */ + if (snum == 0) + { + snum = get_new_socknum(sk->prot, 0); + } + if (snum < PROT_SOCK && !suser()) + return(-EACCES); + } + + chk_addr_ret = ip_chk_addr(addr->sin_addr.s_addr); + if (addr->sin_addr.s_addr != 0 && chk_addr_ret != IS_MYADDR && chk_addr_ret != IS_MULTICAST) + return(-EADDRNOTAVAIL); /* Source address MUST be ours! */ + + if (chk_addr_ret || addr->sin_addr.s_addr == 0) + sk->saddr = addr->sin_addr.s_addr; + + if(sock->type != SOCK_RAW) + { + /* Make sure we are allowed to bind here. */ + cli(); + for(sk2 = sk->prot->sock_array[snum & (SOCK_ARRAY_SIZE -1)]; + sk2 != NULL; sk2 = sk2->next) + { + /* should be below! */ + if (sk2->num != snum) + continue; + if (!sk->reuse) + { + sti(); + return(-EADDRINUSE); + } + + if (sk2->num != snum) + continue; /* more than one */ + if (sk2->saddr != sk->saddr) + continue; /* socket per slot ! -FB */ + if (!sk2->reuse || sk2->state==TCP_LISTEN) + { + sti(); + return(-EADDRINUSE); + } + } + sti(); + + remove_sock(sk); + put_sock(snum, sk); + sk->dummy_th.source = ntohs(sk->num); + sk->daddr = 0; + sk->dummy_th.dest = 0; + } + return(0); +} + +/* + * Handle sk->err properly. The cli/sti matter. + */ + +static int inet_error(struct sock *sk) +{ + unsigned long flags; + int err; + save_flags(flags); + cli(); + err=sk->err; + sk->err=0; + restore_flags(flags); + return -err; +} + +/* + * Connect to a remote host. There is regrettably still a little + * TCP 'magic' in here. + */ + +static int inet_connect(struct socket *sock, struct sockaddr * uaddr, + int addr_len, int flags) +{ + struct sock *sk=(struct sock *)sock->data; + int err; + sock->conn = NULL; + + if (sock->state == SS_CONNECTING && tcp_connected(sk->state)) + { + sock->state = SS_CONNECTED; + /* Connection completing after a connect/EINPROGRESS/select/connect */ + return 0; /* Rock and roll */ + } + + if (sock->state == SS_CONNECTING && sk->protocol == IPPROTO_TCP && (flags & O_NONBLOCK)) + return -EALREADY; /* Connecting is currently in progress */ + + if (sock->state != SS_CONNECTING) + { + /* We may need to bind the socket. */ + if(inet_autobind(sk)!=0) + return(-EAGAIN); + if (sk->prot->connect == NULL) + return(-EOPNOTSUPP); + err = sk->prot->connect(sk, (struct sockaddr_in *)uaddr, addr_len); + if (err < 0) + return(err); + sock->state = SS_CONNECTING; + } + + if (sk->state > TCP_FIN_WAIT2 && sock->state==SS_CONNECTING) + { + sock->state=SS_UNCONNECTED; + cli(); + err=sk->err; + sk->err=0; + sti(); + return -err; + } + + if (sk->state != TCP_ESTABLISHED &&(flags & O_NONBLOCK)) + return(-EINPROGRESS); + + cli(); /* avoid the race condition */ + while(sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV) + { + interruptible_sleep_on(sk->sleep); + if (current->signal & ~current->blocked) + { + sti(); + return(-ERESTARTSYS); + } + /* This fixes a nasty in the tcp/ip code. There is a hideous hassle with + icmp error packets wanting to close a tcp or udp socket. */ + if(sk->err && sk->protocol == IPPROTO_TCP) + { + sti(); + sock->state = SS_UNCONNECTED; + err = -sk->err; + sk->err=0; + return err; /* set by tcp_err() */ + } + } + sti(); + sock->state = SS_CONNECTED; + + if (sk->state != TCP_ESTABLISHED && sk->err) + { + sock->state = SS_UNCONNECTED; + err=sk->err; + sk->err=0; + return(-err); + } + return(0); +} + + +static int inet_socketpair(struct socket *sock1, struct socket *sock2) +{ + return(-EOPNOTSUPP); +} + + +/* + * Accept a pending connection. The TCP layer now gives BSD semantics. + */ + +static int inet_accept(struct socket *sock, struct socket *newsock, int flags) +{ + struct sock *sk1, *sk2; + int err; + + sk1 = (struct sock *) sock->data; + + /* + * We've been passed an extra socket. + * We need to free it up because the tcp module creates + * its own when it accepts one. + */ + if (newsock->data) + { + struct sock *sk=(struct sock *)newsock->data; + newsock->data=NULL; + sk->dead = 1; + destroy_sock(sk); + } + + if (sk1->prot->accept == NULL) + return(-EOPNOTSUPP); + + /* Restore the state if we have been interrupted, and then returned. */ + if (sk1->pair != NULL ) + { + sk2 = sk1->pair; + sk1->pair = NULL; + } + else + { + sk2 = sk1->prot->accept(sk1,flags); + if (sk2 == NULL) + { + if (sk1->err <= 0) + printk("Warning sock.c:sk1->err <= 0. Returning non-error.\n"); + err=sk1->err; + sk1->err=0; + return(-err); + } + } + newsock->data = (void *)sk2; + sk2->sleep = newsock->wait; + sk2->socket = newsock; + newsock->conn = NULL; + if (flags & O_NONBLOCK) + return(0); + + cli(); /* avoid the race. */ + while(sk2->state == TCP_SYN_RECV) + { + interruptible_sleep_on(sk2->sleep); + if (current->signal & ~current->blocked) + { + sti(); + sk1->pair = sk2; + sk2->sleep = NULL; + sk2->socket=NULL; + newsock->data = NULL; + return(-ERESTARTSYS); + } + } + sti(); + + if (sk2->state != TCP_ESTABLISHED && sk2->err > 0) + { + err = -sk2->err; + sk2->err=0; + sk2->dead=1; /* ANK */ + destroy_sock(sk2); + newsock->data = NULL; + return(err); + } + newsock->state = SS_CONNECTED; + return(0); +} + + +/* + * This does both peername and sockname. + */ + +static int inet_getname(struct socket *sock, struct sockaddr *uaddr, + int *uaddr_len, int peer) +{ + struct sockaddr_in *sin=(struct sockaddr_in *)uaddr; + struct sock *sk; + + sin->sin_family = AF_INET; + sk = (struct sock *) sock->data; + if (peer) + { + if (!tcp_connected(sk->state)) + return(-ENOTCONN); + sin->sin_port = sk->dummy_th.dest; + sin->sin_addr.s_addr = sk->daddr; + } + else + { + sin->sin_port = sk->dummy_th.source; + if (sk->saddr == 0) + sin->sin_addr.s_addr = ip_my_addr(); + else + sin->sin_addr.s_addr = sk->saddr; + } + *uaddr_len = sizeof(*sin); + return(0); +} + + +/* + * The assorted BSD I/O operations + */ + +static int inet_recvfrom(struct socket *sock, void *ubuf, int size, int noblock, + unsigned flags, struct sockaddr *sin, int *addr_len ) +{ + struct sock *sk = (struct sock *) sock->data; + + if (sk->prot->recvfrom == NULL) + return(-EOPNOTSUPP); + if(sk->err) + return inet_error(sk); + /* We may need to bind the socket. */ + if(inet_autobind(sk)!=0) + return(-EAGAIN); + return(sk->prot->recvfrom(sk, (unsigned char *) ubuf, size, noblock, flags, + (struct sockaddr_in*)sin, addr_len)); +} + + +static int inet_recv(struct socket *sock, void *ubuf, int size, int noblock, + unsigned flags) +{ + /* BSD explicitly states these are the same - so we do it this way to be sure */ + return inet_recvfrom(sock,ubuf,size,noblock,flags,NULL,NULL); +} + +static int inet_read(struct socket *sock, char *ubuf, int size, int noblock) +{ + struct sock *sk = (struct sock *) sock->data; + + if(sk->err) + return inet_error(sk); + /* We may need to bind the socket. */ + if(inet_autobind(sk)) + return(-EAGAIN); + return(sk->prot->read(sk, (unsigned char *) ubuf, size, noblock, 0)); +} + +static int inet_send(struct socket *sock, void *ubuf, int size, int noblock, + unsigned flags) +{ + struct sock *sk = (struct sock *) sock->data; + if (sk->shutdown & SEND_SHUTDOWN) + { + send_sig(SIGPIPE, current, 1); + return(-EPIPE); + } + if(sk->err) + return inet_error(sk); + /* We may need to bind the socket. */ + if(inet_autobind(sk)!=0) + return(-EAGAIN); + return(sk->prot->write(sk, (unsigned char *) ubuf, size, noblock, flags)); +} + +static int inet_write(struct socket *sock, char *ubuf, int size, int noblock) +{ + return inet_send(sock,ubuf,size,noblock,0); +} + +static int inet_sendto(struct socket *sock, void *ubuf, int size, int noblock, + unsigned flags, struct sockaddr *sin, int addr_len) +{ + struct sock *sk = (struct sock *) sock->data; + if (sk->shutdown & SEND_SHUTDOWN) + { + send_sig(SIGPIPE, current, 1); + return(-EPIPE); + } + if (sk->prot->sendto == NULL) + return(-EOPNOTSUPP); + if(sk->err) + return inet_error(sk); + /* We may need to bind the socket. */ + if(inet_autobind(sk)!=0) + return -EAGAIN; + return(sk->prot->sendto(sk, (unsigned char *) ubuf, size, noblock, flags, + (struct sockaddr_in *)sin, addr_len)); +} + + +static int inet_shutdown(struct socket *sock, int how) +{ + struct sock *sk=(struct sock*)sock->data; + + /* + * This should really check to make sure + * the socket is a TCP socket. (WHY AC...) + */ + how++; /* maps 0->1 has the advantage of making bit 1 rcvs and + 1->2 bit 2 snds. + 2->3 */ + if ((how & ~SHUTDOWN_MASK) || how==0) /* MAXINT->0 */ + return(-EINVAL); + if (sock->state == SS_CONNECTING && sk->state == TCP_ESTABLISHED) + sock->state = SS_CONNECTED; + if (!tcp_connected(sk->state)) + return(-ENOTCONN); + sk->shutdown |= how; + if (sk->prot->shutdown) + sk->prot->shutdown(sk, how); + return(0); +} + + +static int inet_select(struct socket *sock, int sel_type, select_table *wait ) +{ + struct sock *sk=(struct sock *) sock->data; + if (sk->prot->select == NULL) + { + return(0); + } + return(sk->prot->select(sk, sel_type, wait)); +} + +#ifndef _HURD_ + +/* + * ioctl() calls you can issue on an INET socket. Most of these are + * device configuration and stuff and very rarely used. Some ioctls + * pass on to the socket itself. + * + * NOTE: I like the idea of a module for the config stuff. ie ifconfig + * loads the devconfigure module does its configuring and unloads it. + * There's a good 20K of config code hanging around the kernel. + */ + +static int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) +{ + struct sock *sk=(struct sock *)sock->data; + int err; + + switch(cmd) + { + case FIOSETOWN: + case SIOCSPGRP: + err=verify_area(VERIFY_READ,(int *)arg,sizeof(long)); + if(err) + return err; + sk->proc = get_fs_long((int *) arg); + return(0); + case FIOGETOWN: + case SIOCGPGRP: + err=verify_area(VERIFY_WRITE,(void *) arg, sizeof(long)); + if(err) + return err; + put_fs_long(sk->proc,(int *)arg); + return(0); + case SIOCGSTAMP: + if(sk->stamp.tv_sec==0) + return -ENOENT; + err=verify_area(VERIFY_WRITE,(void *)arg,sizeof(struct timeval)); + if(err) + return err; + memcpy_tofs((void *)arg,&sk->stamp,sizeof(struct timeval)); + return 0; + case SIOCADDRT: case SIOCADDRTOLD: + case SIOCDELRT: case SIOCDELRTOLD: + return(ip_rt_ioctl(cmd,(void *) arg)); + case SIOCDARP: + case SIOCGARP: + case SIOCSARP: + return(arp_ioctl(cmd,(void *) arg)); +#ifdef CONFIG_INET_RARP + case SIOCDRARP: + case SIOCGRARP: + case SIOCSRARP: + return(rarp_ioctl(cmd,(void *) arg)); +#endif + case SIOCGIFCONF: + case SIOCGIFFLAGS: + case SIOCSIFFLAGS: + case SIOCGIFADDR: + case SIOCSIFADDR: + +/* begin multicast support change */ + case SIOCADDMULTI: + case SIOCDELMULTI: +/* end multicast support change */ + + case SIOCGIFDSTADDR: + case SIOCSIFDSTADDR: + case SIOCGIFBRDADDR: + case SIOCSIFBRDADDR: + case SIOCGIFNETMASK: + case SIOCSIFNETMASK: + case SIOCGIFMETRIC: + case SIOCSIFMETRIC: + case SIOCGIFMEM: + case SIOCSIFMEM: + case SIOCGIFMTU: + case SIOCSIFMTU: + case SIOCSIFLINK: + case SIOCGIFHWADDR: + case SIOCSIFHWADDR: + case OLD_SIOCGIFHWADDR: + case SIOCSIFMAP: + case SIOCGIFMAP: + case SIOCSIFSLAVE: + case SIOCGIFSLAVE: + return(dev_ioctl(cmd,(void *) arg)); + + default: + if ((cmd >= SIOCDEVPRIVATE) && + (cmd <= (SIOCDEVPRIVATE + 15))) + return(dev_ioctl(cmd,(void *) arg)); + + if (sk->prot->ioctl==NULL) + return(-EINVAL); + return(sk->prot->ioctl(sk, cmd, arg)); + } + /*NOTREACHED*/ + return(0); + +} +#else /* _HURD_ */ +static int inet_ioctl (struct socket *sock, + unsigned int cm, + unsigned long arg) +{ + return EOPNOTSUPP; +} +#endif + + +/* + * This routine must find a socket given a TCP or UDP header. + * Everything is assumed to be in net order. + * + * We give priority to more closely bound ports: if some socket + * is bound to a particular foreign address, it will get the packet + * rather than somebody listening to any address.. + */ + +struct sock *get_sock(struct proto *prot, unsigned short num, + unsigned long raddr, + unsigned short rnum, unsigned long laddr) +{ + struct sock *s; + struct sock *result = NULL; + int badness = -1; + unsigned short hnum; + + hnum = ntohs(num); + + /* + * SOCK_ARRAY_SIZE must be a power of two. This will work better + * than a prime unless 3 or more sockets end up using the same + * array entry. This should not be a problem because most + * well known sockets don't overlap that much, and for + * the other ones, we can just be careful about picking our + * socket number when we choose an arbitrary one. + */ + + for(s = prot->sock_array[hnum & (SOCK_ARRAY_SIZE - 1)]; + s != NULL; s = s->next) + { + int score = 0; + + if (s->num != hnum) + continue; + + if(s->dead && (s->state == TCP_CLOSE)) + continue; + /* local address matches? */ + if (s->saddr) { + if (s->saddr != laddr) + continue; + score++; + } + /* remote address matches? */ + if (s->daddr) { + if (s->daddr != raddr) + continue; + score++; + } + /* remote port matches? */ + if (s->dummy_th.dest) { + if (s->dummy_th.dest != rnum) + continue; + score++; + } + /* perfect match? */ + if (score == 3) + return s; + /* no, check if this is the best so far.. */ + if (score <= badness) + continue; + result = s; + badness = score; + } + return result; +} + +/* + * Deliver a datagram to raw sockets. + */ + +struct sock *get_sock_raw(struct sock *sk, + unsigned short num, + unsigned long raddr, + unsigned long laddr) +{ + struct sock *s; + + s=sk; + + for(; s != NULL; s = s->next) + { + if (s->num != num) + continue; + if(s->dead && (s->state == TCP_CLOSE)) + continue; + if(s->daddr && s->daddr!=raddr) + continue; + if(s->saddr && s->saddr!=laddr) + continue; + return(s); + } + return(NULL); +} + +#ifdef CONFIG_IP_MULTICAST +/* + * Deliver a datagram to broadcast/multicast sockets. + */ + +struct sock *get_sock_mcast(struct sock *sk, + unsigned short num, + unsigned long raddr, + unsigned short rnum, unsigned long laddr) +{ + struct sock *s; + unsigned short hnum; + + hnum = ntohs(num); + + /* + * SOCK_ARRAY_SIZE must be a power of two. This will work better + * than a prime unless 3 or more sockets end up using the same + * array entry. This should not be a problem because most + * well known sockets don't overlap that much, and for + * the other ones, we can just be careful about picking our + * socket number when we choose an arbitrary one. + */ + + s=sk; + + for(; s != NULL; s = s->next) + { + if (s->num != hnum) + continue; + if(s->dead && (s->state == TCP_CLOSE)) + continue; + if(s->daddr && s->daddr!=raddr) + continue; + if (s->dummy_th.dest != rnum && s->dummy_th.dest != 0) + continue; + if(s->saddr && s->saddr!=laddr) + continue; + return(s); + } + return(NULL); +} + +#endif + +static struct proto_ops inet_proto_ops = { + AF_INET, + + inet_create, + inet_dup, + inet_release, + inet_bind, + inet_connect, + inet_socketpair, + inet_accept, + inet_getname, + inet_read, + inet_write, + inet_select, + inet_ioctl, + inet_listen, + inet_send, + inet_recv, + inet_sendto, + inet_recvfrom, + inet_shutdown, + inet_setsockopt, + inet_getsockopt, + inet_fcntl, +}; + +extern unsigned long seq_offset; + +/* + * Called by socket.c on kernel startup. + */ + +void inet_proto_init(struct net_proto *pro) +{ + struct inet_protocol *p; + int i; + + + printk("Swansea University Computer Society TCP/IP for NET3.019\n"); + + /* + * Tell SOCKET that we are alive... + */ + + (void) sock_register(inet_proto_ops.family, &inet_proto_ops); + + seq_offset = CURRENT_TIME*250; + + /* + * Add all the protocols. + */ + + for(i = 0; i < SOCK_ARRAY_SIZE; i++) + { + tcp_prot.sock_array[i] = NULL; + udp_prot.sock_array[i] = NULL; + raw_prot.sock_array[i] = NULL; + } + tcp_prot.inuse = 0; + tcp_prot.highestinuse = 0; + udp_prot.inuse = 0; + udp_prot.highestinuse = 0; + raw_prot.inuse = 0; + raw_prot.highestinuse = 0; + + printk("IP Protocols: "); + for(p = inet_protocol_base; p != NULL;) + { + struct inet_protocol *tmp = (struct inet_protocol *) p->next; + inet_add_protocol(p); + printk("%s%s",p->name,tmp?", ":"\n"); + p = tmp; + } + /* + * Set the ARP module up + */ + arp_init(); + /* + * Set the IP module up + */ + ip_init(); +} + diff --git a/pfinet/linux-inet/arp.c b/pfinet/linux-inet/arp.c new file mode 100644 index 00000000..5e00caf7 --- /dev/null +++ b/pfinet/linux-inet/arp.c @@ -0,0 +1,1295 @@ +/* linux/net/inet/arp.c + * + * Copyright (C) 1994 by Florian La Roche + * + * This module implements the Address Resolution Protocol ARP (RFC 826), + * which is used to convert IP addresses (or in the future maybe other + * high-level addresses into a low-level hardware address (like an Ethernet + * address). + * + * FIXME: + * Experiment with better retransmit timers + * Clean up the timer deletions + * If you create a proxy entry set your interface address to the address + * and then delete it, proxies may get out of sync with reality - check this + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * + * Fixes: + * Alan Cox : Removed the ethernet assumptions in Florian's code + * Alan Cox : Fixed some small errors in the ARP logic + * Alan Cox : Allow >4K in /proc + * Alan Cox : Make ARP add its own protocol entry + * + * Ross Martin : Rewrote arp_rcv() and arp_get_info() + * Stephen Henson : Add AX25 support to arp_get_info() + * Alan Cox : Drop data when a device is downed. + * Alan Cox : Use init_timer(). + * Alan Cox : Double lock fixes. + * Martin Seine : Move the arphdr structure + * to if_arp.h for compatibility. + * with BSD based programs. + * Andrew Tridgell : Added ARP netmask code and + * re-arranged proxy handling. + * Alan Cox : Changed to use notifiers. + * Niibe Yutaka : Reply for this device or proxies only. + * Alan Cox : Don't proxy across hardware types! + */ + +#include <linux/types.h> +#include <linux/string.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/config.h> +#include <linux/socket.h> +#include <linux/sockios.h> +#include <linux/errno.h> +#include <linux/if_arp.h> +#include <linux/in.h> +#include <linux/mm.h> +#include <asm/system.h> +#include <asm/segment.h> +#include <stdarg.h> +#include <linux/inet.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include "ip.h" +#include "route.h" +#include "protocol.h" +#include "tcp.h" +#include <linux/skbuff.h> +#include "sock.h" +#include "arp.h" +#ifdef CONFIG_AX25 +#include "ax25.h" +#endif + + +/* + * This structure defines the ARP mapping cache. As long as we make changes + * in this structure, we keep interrupts of. But normally we can copy the + * hardware address and the device pointer in a local variable and then make + * any "long calls" to send a packet out. + */ + +struct arp_table +{ + struct arp_table *next; /* Linked entry list */ + unsigned long last_used; /* For expiry */ + unsigned int flags; /* Control status */ + unsigned long ip; /* ip address of entry */ + unsigned long mask; /* netmask - used for generalised proxy arps (tridge) */ + unsigned char ha[MAX_ADDR_LEN]; /* Hardware address */ + unsigned char hlen; /* Length of hardware address */ + unsigned short htype; /* Type of hardware in use */ + struct device *dev; /* Device the entry is tied to */ + + /* + * The following entries are only used for unresolved hw addresses. + */ + + struct timer_list timer; /* expire timer */ + int retries; /* remaining retries */ + struct sk_buff_head skb; /* list of queued packets */ +}; + + +/* + * Configurable Parameters (don't touch unless you know what you are doing + */ + +/* + * If an arp request is send, ARP_RES_TIME is the timeout value until the + * next request is send. + */ + +#define ARP_RES_TIME (250*(HZ/10)) + +/* + * The number of times an arp request is send, until the host is + * considered unreachable. + */ + +#define ARP_MAX_TRIES 3 + +/* + * After that time, an unused entry is deleted from the arp table. + */ + +#define ARP_TIMEOUT (600*HZ) + +/* + * How often is the function 'arp_check_retries' called. + * An entry is invalidated in the time between ARP_TIMEOUT and + * (ARP_TIMEOUT+ARP_CHECK_INTERVAL). + */ + +#define ARP_CHECK_INTERVAL (60 * HZ) + +enum proxy { + PROXY_EXACT=0, + PROXY_ANY, + PROXY_NONE, +}; + +/* Forward declarations. */ +static void arp_check_expire (unsigned long); +static struct arp_table *arp_lookup(unsigned long paddr, enum proxy proxy); + + +static struct timer_list arp_timer = + { NULL, NULL, ARP_CHECK_INTERVAL, 0L, &arp_check_expire }; + +/* + * The default arp netmask is just 255.255.255.255 which means it's + * a single machine entry. Only proxy entries can have other netmasks + * +*/ + +#define DEF_ARP_NETMASK (~0) + + +/* + * The size of the hash table. Must be a power of two. + * Maybe we should remove hashing in the future for arp and concentrate + * on Patrick Schaaf's Host-Cache-Lookup... + */ + + +#define ARP_TABLE_SIZE 16 + +/* The ugly +1 here is to cater for proxy entries. They are put in their + own list for efficiency of lookup. If you don't want to find a proxy + entry then don't look in the last entry, otherwise do +*/ + +#define FULL_ARP_TABLE_SIZE (ARP_TABLE_SIZE+1) + +struct arp_table *arp_tables[FULL_ARP_TABLE_SIZE] = +{ + NULL, +}; + + +/* + * The last bits in the IP address are used for the cache lookup. + * A special entry is used for proxy arp entries + */ + +#define HASH(paddr) (htonl(paddr) & (ARP_TABLE_SIZE - 1)) +#define PROXY_HASH ARP_TABLE_SIZE + +/* + * Check if there are too old entries and remove them. If the ATF_PERM + * flag is set, they are always left in the arp cache (permanent entry). + * Note: Only fully resolved entries, which don't have any packets in + * the queue, can be deleted, since ARP_TIMEOUT is much greater than + * ARP_MAX_TRIES*ARP_RES_TIME. + */ + +static void arp_check_expire(unsigned long dummy) +{ + int i; + unsigned long now = jiffies; + unsigned long flags; + save_flags(flags); + cli(); + + for (i = 0; i < FULL_ARP_TABLE_SIZE; i++) + { + struct arp_table *entry; + struct arp_table **pentry = &arp_tables[i]; + + while ((entry = *pentry) != NULL) + { + if ((now - entry->last_used) > ARP_TIMEOUT + && !(entry->flags & ATF_PERM)) + { + *pentry = entry->next; /* remove from list */ + del_timer(&entry->timer); /* Paranoia */ + kfree_s(entry, sizeof(struct arp_table)); + } + else + pentry = &entry->next; /* go to next entry */ + } + } + restore_flags(flags); + + /* + * Set the timer again. + */ + + del_timer(&arp_timer); + arp_timer.expires = ARP_CHECK_INTERVAL; + add_timer(&arp_timer); +} + + +/* + * Release all linked skb's and the memory for this entry. + */ + +static void arp_release_entry(struct arp_table *entry) +{ + struct sk_buff *skb; + unsigned long flags; + + save_flags(flags); + cli(); + /* Release the list of `skb' pointers. */ + while ((skb = skb_dequeue(&entry->skb)) != NULL) + { + skb_device_lock(skb); + restore_flags(flags); + dev_kfree_skb(skb, FREE_WRITE); + } + restore_flags(flags); + del_timer(&entry->timer); + kfree_s(entry, sizeof(struct arp_table)); + return; +} + +/* + * Purge a device from the ARP queue + */ + +int arp_device_event(unsigned long event, void *ptr) +{ + struct device *dev=ptr; + int i; + unsigned long flags; + + if(event!=NETDEV_DOWN) + return NOTIFY_DONE; + /* + * This is a bit OTT - maybe we need some arp semaphores instead. + */ + + save_flags(flags); + cli(); + for (i = 0; i < FULL_ARP_TABLE_SIZE; i++) + { + struct arp_table *entry; + struct arp_table **pentry = &arp_tables[i]; + + while ((entry = *pentry) != NULL) + { + if(entry->dev==dev) + { + *pentry = entry->next; /* remove from list */ + del_timer(&entry->timer); /* Paranoia */ + kfree_s(entry, sizeof(struct arp_table)); + } + else + pentry = &entry->next; /* go to next entry */ + } + } + restore_flags(flags); + return NOTIFY_DONE; +} + + +/* + * Create and send an arp packet. If (dest_hw == NULL), we create a broadcast + * message. + */ + +void arp_send(int type, int ptype, unsigned long dest_ip, + struct device *dev, unsigned long src_ip, + unsigned char *dest_hw, unsigned char *src_hw) +{ + struct sk_buff *skb; + struct arphdr *arp; + unsigned char *arp_ptr; + + /* + * No arp on this interface. + */ + + if(dev->flags&IFF_NOARP) + return; + + /* + * Allocate a buffer + */ + + skb = alloc_skb(sizeof(struct arphdr)+ 2*(dev->addr_len+4) + + dev->hard_header_len, GFP_ATOMIC); + if (skb == NULL) + { + printk("ARP: no memory to send an arp packet\n"); + return; + } + skb->len = sizeof(struct arphdr) + dev->hard_header_len + 2*(dev->addr_len+4); + skb->arp = 1; + skb->dev = dev; + skb->free = 1; + + /* + * Fill the device header for the ARP frame + */ + + dev->hard_header(skb->data,dev,ptype,dest_hw?dest_hw:dev->broadcast,src_hw?src_hw:NULL,skb->len,skb); + + /* Fill out the arp protocol part. */ + arp = (struct arphdr *) (skb->data + dev->hard_header_len); + arp->ar_hrd = htons(dev->type); +#ifdef CONFIG_AX25 + arp->ar_pro = (dev->type != ARPHRD_AX25)? htons(ETH_P_IP) : htons(AX25_P_IP); +#else + arp->ar_pro = htons(ETH_P_IP); +#endif + arp->ar_hln = dev->addr_len; + arp->ar_pln = 4; + arp->ar_op = htons(type); + + arp_ptr=(unsigned char *)(arp+1); + + memcpy(arp_ptr, src_hw, dev->addr_len); + arp_ptr+=dev->addr_len; + memcpy(arp_ptr, &src_ip,4); + arp_ptr+=4; + if (dest_hw != NULL) + memcpy(arp_ptr, dest_hw, dev->addr_len); + else + memset(arp_ptr, 0, dev->addr_len); + arp_ptr+=dev->addr_len; + memcpy(arp_ptr, &dest_ip, 4); + + dev_queue_xmit(skb, dev, 0); +} + + +/* + * This function is called, if an entry is not resolved in ARP_RES_TIME. + * Either resend a request, or give it up and free the entry. + */ + +static void arp_expire_request (unsigned long arg) +{ + struct arp_table *entry = (struct arp_table *) arg; + struct arp_table **pentry; + unsigned long hash; + unsigned long flags; + + save_flags(flags); + cli(); + + /* + * Since all timeouts are handled with interrupts enabled, there is a + * small chance, that this entry has just been resolved by an incoming + * packet. This is the only race condition, but it is handled... + */ + + if (entry->flags & ATF_COM) + { + restore_flags(flags); + return; + } + + if (--entry->retries > 0) + { + unsigned long ip = entry->ip; + struct device *dev = entry->dev; + + /* Set new timer. */ + del_timer(&entry->timer); + entry->timer.expires = ARP_RES_TIME; + add_timer(&entry->timer); + restore_flags(flags); + arp_send(ARPOP_REQUEST, ETH_P_ARP, ip, dev, dev->pa_addr, + NULL, dev->dev_addr); + return; + } + + /* + * Arp request timed out. Delete entry and all waiting packets. + * If we give each entry a pointer to itself, we don't have to + * loop through everything again. Maybe hash is good enough, but + * I will look at it later. + */ + + hash = HASH(entry->ip); + + /* proxy entries shouldn't really time out so this is really + only here for completeness + */ + if (entry->flags & ATF_PUBL) + pentry = &arp_tables[PROXY_HASH]; + else + pentry = &arp_tables[hash]; + while (*pentry != NULL) + { + if (*pentry == entry) + { + *pentry = entry->next; /* delete from linked list */ + del_timer(&entry->timer); + restore_flags(flags); + arp_release_entry(entry); + return; + } + pentry = &(*pentry)->next; + } + restore_flags(flags); + printk("Possible ARP queue corruption.\n"); + /* + * We should never arrive here. + */ +} + + +/* + * This will try to retransmit everything on the queue. + */ + +static void arp_send_q(struct arp_table *entry, unsigned char *hw_dest) +{ + struct sk_buff *skb; + + unsigned long flags; + + /* + * Empty the entire queue, building its data up ready to send + */ + + if(!(entry->flags&ATF_COM)) + { + printk("arp_send_q: incomplete entry for %s\n", + in_ntoa(entry->ip)); + return; + } + + save_flags(flags); + + cli(); + while((skb = skb_dequeue(&entry->skb)) != NULL) + { + IS_SKB(skb); + skb_device_lock(skb); + restore_flags(flags); + if(!skb->dev->rebuild_header(skb->data,skb->dev,skb->raddr,skb)) + { + skb->arp = 1; + if(skb->sk==NULL) + dev_queue_xmit(skb, skb->dev, 0); + else + dev_queue_xmit(skb,skb->dev,skb->sk->priority); + } + else + { + /* This routine is only ever called when 'entry' is + complete. Thus this can't fail. */ + printk("arp_send_q: The impossible occurred. Please notify Alan.\n"); + printk("arp_send_q: active entity %s\n",in_ntoa(entry->ip)); + printk("arp_send_q: failed to find %s\n",in_ntoa(skb->raddr)); + } + } + restore_flags(flags); +} + + +/* + * Delete an ARP mapping entry in the cache. + */ + +void arp_destroy(unsigned long ip_addr, int force) +{ + int checked_proxies = 0; + struct arp_table *entry; + struct arp_table **pentry; + unsigned long hash = HASH(ip_addr); + +ugly: + cli(); + pentry = &arp_tables[hash]; + if (! *pentry) /* also check proxy entries */ + pentry = &arp_tables[PROXY_HASH]; + + while ((entry = *pentry) != NULL) + { + if (entry->ip == ip_addr) + { + if ((entry->flags & ATF_PERM) && !force) + return; + *pentry = entry->next; + del_timer(&entry->timer); + sti(); + arp_release_entry(entry); + /* this would have to be cleaned up */ + goto ugly; + /* perhaps like this ? + cli(); + entry = *pentry; + */ + } + pentry = &entry->next; + if (!checked_proxies && ! *pentry) + { /* ugly. we have to make sure we check proxy + entries as well */ + checked_proxies = 1; + pentry = &arp_tables[PROXY_HASH]; + } + } + sti(); +} + + +/* + * Receive an arp request by the device layer. Maybe I rewrite it, to + * use the incoming packet for the reply. The time for the current + * "overhead" isn't that high... + */ + +int arp_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt) +{ +/* + * We shouldn't use this type conversion. Check later. + */ + + struct arphdr *arp = (struct arphdr *)skb->h.raw; + unsigned char *arp_ptr= (unsigned char *)(arp+1); + struct arp_table *entry; + struct arp_table *proxy_entry; + int addr_hint,hlen,htype; + unsigned long hash; + unsigned char ha[MAX_ADDR_LEN]; /* So we can enable ints again. */ + long sip,tip; + unsigned char *sha,*tha; + +/* + * The hardware length of the packet should match the hardware length + * of the device. Similarly, the hardware types should match. The + * device should be ARP-able. Also, if pln is not 4, then the lookup + * is not from an IP number. We can't currently handle this, so toss + * it. + */ + if (arp->ar_hln != dev->addr_len || + dev->type != ntohs(arp->ar_hrd) || + dev->flags & IFF_NOARP || + arp->ar_pln != 4) + { + kfree_skb(skb, FREE_READ); + return 0; + } + +/* + * Another test. + * The logic here is that the protocol being looked up by arp should + * match the protocol the device speaks. If it doesn't, there is a + * problem, so toss the packet. + */ + switch(dev->type) + { +#ifdef CONFIG_AX25 + case ARPHRD_AX25: + if(arp->ar_pro != htons(AX25_P_IP)) + { + kfree_skb(skb, FREE_READ); + return 0; + } + break; +#endif + case ARPHRD_ETHER: + case ARPHRD_ARCNET: + if(arp->ar_pro != htons(ETH_P_IP)) + { + kfree_skb(skb, FREE_READ); + return 0; + } + break; + + default: + printk("ARP: dev->type mangled!\n"); + kfree_skb(skb, FREE_READ); + return 0; + } + +/* + * Extract fields + */ + + hlen = dev->addr_len; + htype = dev->type; + + sha=arp_ptr; + arp_ptr+=hlen; + memcpy(&sip,arp_ptr,4); + arp_ptr+=4; + tha=arp_ptr; + arp_ptr+=hlen; + memcpy(&tip,arp_ptr,4); + +/* + * Check for bad requests for 127.0.0.1. If this is one such, delete it. + */ + if(tip == INADDR_LOOPBACK) + { + kfree_skb(skb, FREE_READ); + return 0; + } + +/* + * Process entry. The idea here is we want to send a reply if it is a + * request for us or if it is a request for someone else that we hold + * a proxy for. We want to add an entry to our cache if it is a reply + * to us or if it is a request for our address. + * (The assumption for this last is that if someone is requesting our + * address, they are probably intending to talk to us, so it saves time + * if we cache their address. Their address is also probably not in + * our cache, since ours is not in their cache.) + * + * Putting this another way, we only care about replies if they are to + * us, in which case we add them to the cache. For requests, we care + * about those for us and those for our proxies. We reply to both, + * and in the case of requests for us we add the requester to the arp + * cache. + */ + + addr_hint = ip_chk_addr(tip); + + if(arp->ar_op == htons(ARPOP_REPLY)) + { + if(addr_hint!=IS_MYADDR) + { +/* + * Replies to other machines get tossed. + */ + kfree_skb(skb, FREE_READ); + return 0; + } +/* + * Fall through to code below that adds sender to cache. + */ + } + else + { +/* + * It is now an arp request + */ +/* + * Only reply for the real device address or when it's in our proxy tables + */ + if(tip!=dev->pa_addr) + { +/* + * To get in here, it is a request for someone else. We need to + * check if that someone else is one of our proxies. If it isn't, + * we can toss it. + */ + cli(); + for(proxy_entry=arp_tables[PROXY_HASH]; + proxy_entry; + proxy_entry = proxy_entry->next) + { + /* we will respond to a proxy arp request + if the masked arp table ip matches the masked + tip. This allows a single proxy arp table + entry to be used on a gateway machine to handle + all requests for a whole network, rather than + having to use a huge number of proxy arp entries + and having to keep them uptodate. + */ + if (proxy_entry->dev != dev && proxy_entry->htype == htype && + !((proxy_entry->ip^tip)&proxy_entry->mask)) + break; + + } + if (proxy_entry) + { + memcpy(ha, proxy_entry->ha, hlen); + sti(); + arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,ha); + kfree_skb(skb, FREE_READ); + return 0; + } + else + { + sti(); + kfree_skb(skb, FREE_READ); + return 0; + } + } + else + { +/* + * To get here, it must be an arp request for us. We need to reply. + */ + arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr); + } + } + + +/* + * Now all replies are handled. Next, anything that falls through to here + * needs to be added to the arp cache, or have its entry updated if it is + * there. + */ + + hash = HASH(sip); + cli(); + for(entry=arp_tables[hash];entry;entry=entry->next) + if(entry->ip==sip && entry->htype==htype) + break; + + if(entry) + { +/* + * Entry found; update it. + */ + memcpy(entry->ha, sha, hlen); + entry->hlen = hlen; + entry->last_used = jiffies; + if (!(entry->flags & ATF_COM)) + { +/* + * This entry was incomplete. Delete the retransmit timer + * and switch to complete status. + */ + del_timer(&entry->timer); + entry->flags |= ATF_COM; + sti(); +/* + * Send out waiting packets. We might have problems, if someone is + * manually removing entries right now -- entry might become invalid + * underneath us. + */ + arp_send_q(entry, sha); + } + else + { + sti(); + } + } + else + { +/* + * No entry found. Need to add a new entry to the arp table. + */ + entry = (struct arp_table *)kmalloc(sizeof(struct arp_table),GFP_ATOMIC); + if(entry == NULL) + { + sti(); + printk("ARP: no memory for new arp entry\n"); + + kfree_skb(skb, FREE_READ); + return 0; + } + + entry->mask = DEF_ARP_NETMASK; + entry->ip = sip; + entry->hlen = hlen; + entry->htype = htype; + entry->flags = ATF_COM; + init_timer(&entry->timer); + memcpy(entry->ha, sha, hlen); + entry->last_used = jiffies; + entry->dev = skb->dev; + skb_queue_head_init(&entry->skb); + entry->next = arp_tables[hash]; + arp_tables[hash] = entry; + sti(); + } + +/* + * Replies have been sent, and entries have been added. All done. + */ + kfree_skb(skb, FREE_READ); + return 0; +} + + +/* + * Find an arp mapping in the cache. If not found, post a request. + */ + +int arp_find(unsigned char *haddr, unsigned long paddr, struct device *dev, + unsigned long saddr, struct sk_buff *skb) +{ + struct arp_table *entry; + unsigned long hash; +#ifdef CONFIG_IP_MULTICAST + unsigned long taddr; +#endif + + switch (ip_chk_addr(paddr)) + { + case IS_MYADDR: + printk("ARP: arp called for own IP address\n"); + memcpy(haddr, dev->dev_addr, dev->addr_len); + skb->arp = 1; + return 0; +#ifdef CONFIG_IP_MULTICAST + case IS_MULTICAST: + if(dev->type==ARPHRD_ETHER || dev->type==ARPHRD_IEEE802) + { + haddr[0]=0x01; + haddr[1]=0x00; + haddr[2]=0x5e; + taddr=ntohl(paddr); + haddr[5]=taddr&0xff; + taddr=taddr>>8; + haddr[4]=taddr&0xff; + taddr=taddr>>8; + haddr[3]=taddr&0x7f; + return 0; + } + /* + * If a device does not support multicast broadcast the stuff (eg AX.25 for now) + */ +#endif + + case IS_BROADCAST: + memcpy(haddr, dev->broadcast, dev->addr_len); + skb->arp = 1; + return 0; + } + + hash = HASH(paddr); + cli(); + + /* + * Find an entry + */ + entry = arp_lookup(paddr, PROXY_NONE); + + if (entry != NULL) /* It exists */ + { + if (!(entry->flags & ATF_COM)) + { + /* + * A request was already send, but no reply yet. Thus + * queue the packet with the previous attempt + */ + + if (skb != NULL) + { + skb_queue_tail(&entry->skb, skb); + skb_device_unlock(skb); + } + sti(); + return 1; + } + + /* + * Update the record + */ + + entry->last_used = jiffies; + memcpy(haddr, entry->ha, dev->addr_len); + if (skb) + skb->arp = 1; + sti(); + return 0; + } + + /* + * Create a new unresolved entry. + */ + + entry = (struct arp_table *) kmalloc(sizeof(struct arp_table), + GFP_ATOMIC); + if (entry != NULL) + { + entry->mask = DEF_ARP_NETMASK; + entry->ip = paddr; + entry->hlen = dev->addr_len; + entry->htype = dev->type; + entry->flags = 0; + memset(entry->ha, 0, dev->addr_len); + entry->dev = dev; + entry->last_used = jiffies; + init_timer(&entry->timer); + entry->timer.function = arp_expire_request; + entry->timer.data = (unsigned long)entry; + entry->timer.expires = ARP_RES_TIME; + entry->next = arp_tables[hash]; + arp_tables[hash] = entry; + add_timer(&entry->timer); + entry->retries = ARP_MAX_TRIES; + skb_queue_head_init(&entry->skb); + if (skb != NULL) + { + skb_queue_tail(&entry->skb, skb); + skb_device_unlock(skb); + } + } + else + { + if (skb != NULL && skb->free) + kfree_skb(skb, FREE_WRITE); + } + sti(); + + /* + * If we didn't find an entry, we will try to send an ARP packet. + */ + + arp_send(ARPOP_REQUEST, ETH_P_ARP, paddr, dev, saddr, NULL, + dev->dev_addr); + + return 1; +} + + +/* + * Write the contents of the ARP cache to a PROCfs file. + */ + +#define HBUFFERLEN 30 + +int arp_get_info(char *buffer, char **start, off_t offset, int length) +{ + int len=0; + off_t begin=0; + off_t pos=0; + int size; + struct arp_table *entry; + char hbuffer[HBUFFERLEN]; + int i,j,k; + const char hexbuf[] = "0123456789ABCDEF"; + + size = sprintf(buffer,"IP address HW type Flags HW address Mask\n"); + + pos+=size; + len+=size; + + cli(); + for(i=0; i<FULL_ARP_TABLE_SIZE; i++) + { + for(entry=arp_tables[i]; entry!=NULL; entry=entry->next) + { +/* + * Convert hardware address to XX:XX:XX:XX ... form. + */ +#ifdef CONFIG_AX25 + + if(entry->htype==ARPHRD_AX25) + strcpy(hbuffer,ax2asc((ax25_address *)entry->ha)); + else { +#endif + + for(k=0,j=0;k<HBUFFERLEN-3 && j<entry->hlen;j++) + { + hbuffer[k++]=hexbuf[ (entry->ha[j]>>4)&15 ]; + hbuffer[k++]=hexbuf[ entry->ha[j]&15 ]; + hbuffer[k++]=':'; + } + hbuffer[--k]=0; + +#ifdef CONFIG_AX25 + } +#endif + size = sprintf(buffer+len, + "%-17s0x%-10x0x%-10x%s", + in_ntoa(entry->ip), + (unsigned int)entry->htype, + entry->flags, + hbuffer); + size += sprintf(buffer+len+size, + " %-17s\n", + entry->mask==DEF_ARP_NETMASK? + "*":in_ntoa(entry->mask)); + + len+=size; + pos=begin+len; + + if(pos<offset) + { + len=0; + begin=pos; + } + if(pos>offset+length) + break; + } + } + sti(); + + *start=buffer+(offset-begin); /* Start of wanted data */ + len-=(offset-begin); /* Start slop */ + if(len>length) + len=length; /* Ending slop */ + return len; +} + + +/* + * This will find an entry in the ARP table by looking at the IP address. + * If proxy is PROXY_EXACT then only exact IP matches will be allowed + * for proxy entries, otherwise the netmask will be used + */ + +static struct arp_table *arp_lookup(unsigned long paddr, enum proxy proxy) +{ + struct arp_table *entry; + unsigned long hash = HASH(paddr); + + for (entry = arp_tables[hash]; entry != NULL; entry = entry->next) + if (entry->ip == paddr) break; + + /* it's possibly a proxy entry (with a netmask) */ + if (!entry && proxy != PROXY_NONE) + for (entry=arp_tables[PROXY_HASH]; entry != NULL; entry = entry->next) + if ((proxy==PROXY_EXACT) ? (entry->ip==paddr) + : !((entry->ip^paddr)&entry->mask)) + break; + + return entry; +} + + +/* + * Set (create) an ARP cache entry. + */ + +static int arp_req_set(struct arpreq *req) +{ + struct arpreq r; + struct arp_table *entry; + struct sockaddr_in *si; + int htype, hlen; + unsigned long ip; + struct rtable *rt; + + memcpy_fromfs(&r, req, sizeof(r)); + + /* We only understand about IP addresses... */ + if (r.arp_pa.sa_family != AF_INET) + return -EPFNOSUPPORT; + + /* + * Find out about the hardware type. + * We have to be compatible with BSD UNIX, so we have to + * assume that a "not set" value (i.e. 0) means Ethernet. + */ + + switch (r.arp_ha.sa_family) { + case ARPHRD_ETHER: + htype = ARPHRD_ETHER; + hlen = ETH_ALEN; + break; + + case ARPHRD_ARCNET: + htype = ARPHRD_ARCNET; + hlen = 1; /* length of arcnet addresses */ + break; + +#ifdef CONFIG_AX25 + case ARPHRD_AX25: + htype = ARPHRD_AX25; + hlen = 7; + break; +#endif + default: + return -EPFNOSUPPORT; + } + + si = (struct sockaddr_in *) &r.arp_pa; + ip = si->sin_addr.s_addr; + if (ip == 0) + { + printk("ARP: SETARP: requested PA is 0.0.0.0 !\n"); + return -EINVAL; + } + + /* + * Is it reachable directly ? + */ + + rt = ip_rt_route(ip, NULL, NULL); + if (rt == NULL) + return -ENETUNREACH; + + /* + * Is there an existing entry for this address? + */ + + cli(); + + /* + * Find the entry + */ + entry = arp_lookup(ip, PROXY_EXACT); + if (entry && (entry->flags & ATF_PUBL) != (r.arp_flags & ATF_PUBL)) + { + sti(); + arp_destroy(ip,1); + cli(); + entry = NULL; + } + + /* + * Do we need to create a new entry + */ + + if (entry == NULL) + { + unsigned long hash = HASH(ip); + if (r.arp_flags & ATF_PUBL) + hash = PROXY_HASH; + + entry = (struct arp_table *) kmalloc(sizeof(struct arp_table), + GFP_ATOMIC); + if (entry == NULL) + { + sti(); + return -ENOMEM; + } + entry->ip = ip; + entry->hlen = hlen; + entry->htype = htype; + init_timer(&entry->timer); + entry->next = arp_tables[hash]; + arp_tables[hash] = entry; + skb_queue_head_init(&entry->skb); + } + /* + * We now have a pointer to an ARP entry. Update it! + */ + + memcpy(&entry->ha, &r.arp_ha.sa_data, hlen); + entry->last_used = jiffies; + entry->flags = r.arp_flags | ATF_COM; + if ((entry->flags & ATF_PUBL) && (entry->flags & ATF_NETMASK)) + { + si = (struct sockaddr_in *) &r.arp_netmask; + entry->mask = si->sin_addr.s_addr; + } + else + entry->mask = DEF_ARP_NETMASK; + entry->dev = rt->rt_dev; + sti(); + + return 0; +} + + +/* + * Get an ARP cache entry. + */ + +static int arp_req_get(struct arpreq *req) +{ + struct arpreq r; + struct arp_table *entry; + struct sockaddr_in *si; + + /* + * We only understand about IP addresses... + */ + + memcpy_fromfs(&r, req, sizeof(r)); + + if (r.arp_pa.sa_family != AF_INET) + return -EPFNOSUPPORT; + + /* + * Is there an existing entry for this address? + */ + + si = (struct sockaddr_in *) &r.arp_pa; + cli(); + entry = arp_lookup(si->sin_addr.s_addr,PROXY_ANY); + + if (entry == NULL) + { + sti(); + return -ENXIO; + } + + /* + * We found it; copy into structure. + */ + + memcpy(r.arp_ha.sa_data, &entry->ha, entry->hlen); + r.arp_ha.sa_family = entry->htype; + r.arp_flags = entry->flags; + sti(); + + /* + * Copy the information back + */ + + memcpy_tofs(req, &r, sizeof(r)); + return 0; +} + + +#ifndef _HURD_ +/* + * Handle an ARP layer I/O control request. + */ + +int arp_ioctl(unsigned int cmd, void *arg) +{ + struct arpreq r; + struct sockaddr_in *si; + int err; + + switch(cmd) + { + case SIOCDARP: + if (!suser()) + return -EPERM; + err = verify_area(VERIFY_READ, arg, sizeof(struct arpreq)); + if(err) + return err; + memcpy_fromfs(&r, arg, sizeof(r)); + if (r.arp_pa.sa_family != AF_INET) + return -EPFNOSUPPORT; + si = (struct sockaddr_in *) &r.arp_pa; + arp_destroy(si->sin_addr.s_addr, 1); + return 0; + case SIOCGARP: + err = verify_area(VERIFY_WRITE, arg, sizeof(struct arpreq)); + if(err) + return err; + return arp_req_get((struct arpreq *)arg); + case SIOCSARP: + if (!suser()) + return -EPERM; + err = verify_area(VERIFY_READ, arg, sizeof(struct arpreq)); + if(err) + return err; + return arp_req_set((struct arpreq *)arg); + default: + return -EINVAL; + } + /*NOTREACHED*/ + return 0; +} +#endif + +/* + * Called once on startup. + */ + +static struct packet_type arp_packet_type = +{ + 0, /* Should be: __constant_htons(ETH_P_ARP) - but this _doesn't_ come out constant! */ + NULL, /* All devices */ + arp_rcv, + NULL, + NULL +}; + +static struct notifier_block arp_dev_notifier={ + arp_device_event, + NULL, + 0 +}; + +void arp_init (void) +{ + /* Register the packet type */ + arp_packet_type.type=htons(ETH_P_ARP); + dev_add_pack(&arp_packet_type); + /* Start with the regular checks for expired arp entries. */ + add_timer(&arp_timer); + /* Register for device down reports */ + register_netdevice_notifier(&arp_dev_notifier); +} + diff --git a/pfinet/linux-inet/arp.h b/pfinet/linux-inet/arp.h new file mode 100644 index 00000000..a68adc30 --- /dev/null +++ b/pfinet/linux-inet/arp.h @@ -0,0 +1,18 @@ +/* linux/net/inet/arp.h */ +#ifndef _ARP_H +#define _ARP_H + +extern void arp_init(void); +extern void arp_destroy(unsigned long paddr, int force); +extern void arp_device_down(struct device *dev); +extern int arp_rcv(struct sk_buff *skb, struct device *dev, + struct packet_type *pt); +extern int arp_find(unsigned char *haddr, unsigned long paddr, + struct device *dev, unsigned long saddr, struct sk_buff *skb); +extern int arp_get_info(char *buffer, char **start, off_t origin, int length); +extern int arp_ioctl(unsigned int cmd, void *arg); +extern void arp_send(int type, int ptype, unsigned long dest_ip, + struct device *dev, unsigned long src_ip, + unsigned char *dest_hw, unsigned char *src_hw); + +#endif /* _ARP_H */ diff --git a/pfinet/linux-inet/datagram.c b/pfinet/linux-inet/datagram.c new file mode 100644 index 00000000..cd248cfb --- /dev/null +++ b/pfinet/linux-inet/datagram.c @@ -0,0 +1,210 @@ +/* + * SUCS NET3: + * + * Generic datagram handling routines. These are generic for all protocols. Possibly a generic IP version on top + * of these would make sense. Not tonight however 8-). + * This is used because UDP, RAW, PACKET and the to be released IPX layer all have identical select code and mostly + * identical recvfrom() code. So we share it here. The select was shared before but buried in udp.c so I moved it. + * + * Authors: Alan Cox <iiitac@pyr.swan.ac.uk>. (datagram_select() from old udp.c code) + * + * Fixes: + * Alan Cox : NULL return from skb_peek_copy() understood + * Alan Cox : Rewrote skb_read_datagram to avoid the skb_peek_copy stuff. + * Alan Cox : Added support for SOCK_SEQPACKET. IPX can no longer use the SO_TYPE hack but + * AX.25 now works right, and SPX is feasible. + * Alan Cox : Fixed write select of non IP protocol crash. + * Florian La Roche: Changed for my new skbuff handling. + * + * Note: + * A lot of this will change when the protocol/socket separation + * occurs. Using this will make things reasonably clean. + */ + +#include <linux/types.h> +#include <linux/kernel.h> +#include <asm/segment.h> +#include <asm/system.h> +#include <linux/mm.h> +#include <linux/interrupt.h> +#include <linux/in.h> +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/inet.h> +#include <linux/netdevice.h> +#include "ip.h" +#include "protocol.h" +#include "route.h" +#include "tcp.h" +#include "udp.h" +#include <linux/skbuff.h> +#include "sock.h" + + +/* + * Get a datagram skbuff, understands the peeking, nonblocking wakeups and possible + * races. This replaces identical code in packet,raw and udp, as well as the yet to + * be released IPX support. It also finally fixes the long standing peek and read + * race for datagram sockets. If you alter this routine remember it must be + * re-entrant. + */ + +struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock, int *err) +{ + struct sk_buff *skb; + unsigned long intflags; + + /* Socket is inuse - so the timer doesn't attack it */ + save_flags(intflags); +restart: + sk->inuse = 1; + while(skb_peek(&sk->receive_queue) == NULL) /* No data */ + { + /* If we are shutdown then no more data is going to appear. We are done */ + if (sk->shutdown & RCV_SHUTDOWN) + { + release_sock(sk); + *err=0; + return NULL; + } + + if(sk->err) + { + release_sock(sk); + *err=-sk->err; + sk->err=0; + return NULL; + } + + /* Sequenced packets can come disconnected. If so we report the problem */ + if(sk->type==SOCK_SEQPACKET && sk->state!=TCP_ESTABLISHED) + { + release_sock(sk); + *err=-ENOTCONN; + return NULL; + } + + /* User doesn't want to wait */ + if (noblock) + { + release_sock(sk); + *err=-EAGAIN; + return NULL; + } + release_sock(sk); + + /* Interrupts off so that no packet arrives before we begin sleeping. + Otherwise we might miss our wake up */ + cli(); + if (skb_peek(&sk->receive_queue) == NULL) + { + interruptible_sleep_on(sk->sleep); + /* Signals may need a restart of the syscall */ + if (current->signal & ~current->blocked) + { + restore_flags(intflags);; + *err=-ERESTARTSYS; + return(NULL); + } + if(sk->err != 0) /* Error while waiting for packet + eg an icmp sent earlier by the + peer has finally turned up now */ + { + *err = -sk->err; + sk->err=0; + restore_flags(intflags); + return NULL; + } + } + sk->inuse = 1; + restore_flags(intflags); + } + /* Again only user level code calls this function, so nothing interrupt level + will suddenly eat the receive_queue */ + if (!(flags & MSG_PEEK)) + { + skb=skb_dequeue(&sk->receive_queue); + if(skb!=NULL) + skb->users++; + else + goto restart; /* Avoid race if someone beats us to the data */ + } + else + { + cli(); + skb=skb_peek(&sk->receive_queue); + if(skb!=NULL) + skb->users++; + restore_flags(intflags); + if(skb==NULL) /* shouldn't happen but .. */ + *err=-EAGAIN; + } + return skb; +} + +void skb_free_datagram(struct sk_buff *skb) +{ + unsigned long flags; + + save_flags(flags); + cli(); + skb->users--; + if(skb->users>0) + { + restore_flags(flags); + return; + } + /* See if it needs destroying */ + if(!skb->next && !skb->prev) /* Been dequeued by someone - ie it's read */ + kfree_skb(skb,FREE_READ); + restore_flags(flags); +} + +void skb_copy_datagram(struct sk_buff *skb, int offset, char *to, int size) +{ + /* We will know all about the fraglist options to allow >4K receives + but not this release */ + memcpy_tofs(to,skb->h.raw+offset,size); +} + +/* + * Datagram select: Again totally generic. Moved from udp.c + * Now does seqpacket. + */ + +int datagram_select(struct sock *sk, int sel_type, select_table *wait) +{ + select_wait(sk->sleep, wait); + switch(sel_type) + { + case SEL_IN: + if (sk->type==SOCK_SEQPACKET && sk->state==TCP_CLOSE) + { + /* Connection closed: Wake up */ + return(1); + } + if (skb_peek(&sk->receive_queue) != NULL || sk->err != 0) + { /* This appears to be consistent + with other stacks */ + return(1); + } + return(0); + + case SEL_OUT: + if (sk->prot && sk->prot->wspace(sk) >= MIN_WRITE_SPACE) + { + return(1); + } + if (sk->prot==NULL && sk->sndbuf-sk->wmem_alloc >= MIN_WRITE_SPACE) + { + return(1); + } + return(0); + + case SEL_EX: + if (sk->err) + return(1); /* Socket has gone into error state (eg icmp error) */ + return(0); + } + return(0); +} diff --git a/pfinet/linux-inet/datalink.h b/pfinet/linux-inet/datalink.h new file mode 100644 index 00000000..34ae08da --- /dev/null +++ b/pfinet/linux-inet/datalink.h @@ -0,0 +1,17 @@ +#ifndef _NET_INET_DATALINK_H_ +#define _NET_INET_DATALINK_H_ + +struct datalink_proto { + unsigned short type_len; + unsigned char type[8]; + char *string_name; + unsigned short header_length; + int (*rcvfunc)(struct sk_buff *, struct device *, + struct packet_type *); + void (*datalink_header)(struct datalink_proto *, struct sk_buff *, + unsigned char *); + struct datalink_proto *next; +}; + +#endif + diff --git a/pfinet/linux-inet/dev.c b/pfinet/linux-inet/dev.c new file mode 100644 index 00000000..d393af11 --- /dev/null +++ b/pfinet/linux-inet/dev.c @@ -0,0 +1,1449 @@ +/* + * NET3 Protocol independent device support routines. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Derived from the non IP parts of dev.c 1.0.19 + * Authors: Ross Biro, <bir7@leland.Stanford.Edu> + * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> + * Mark Evans, <evansmp@uhura.aston.ac.uk> + * + * Additional Authors: + * Florian la Roche <rzsfl@rz.uni-sb.de> + * Alan Cox <gw4pts@gw4pts.ampr.org> + * David Hinds <dhinds@allegro.stanford.edu> + * + * Changes: + * Alan Cox : device private ioctl copies fields back. + * Alan Cox : Transmit queue code does relevant stunts to + * keep the queue safe. + * Alan Cox : Fixed double lock. + * Alan Cox : Fixed promisc NULL pointer trap + * ???????? : Support the full private ioctl range + * Alan Cox : Moved ioctl permission check into drivers + * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI + * Alan Cox : 100 backlog just doesn't cut it when + * you start doing multicast video 8) + * Alan Cox : Rewrote net_bh and list manager. + * Alan Cox : Fix ETH_P_ALL echoback lengths. + * + * Cleaned up and recommented by Alan Cox 2nd April 1994. I hope to have + * the rest as well commented in the end. + */ + +/* + * A lot of these includes will be going walkies very soon + */ + +#include <asm/segment.h> +#include <asm/system.h> +#include <asm/bitops.h> +#include <linux/config.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/string.h> +#include <linux/mm.h> +#include <linux/socket.h> +#include <linux/sockios.h> +#include <linux/in.h> +#include <linux/errno.h> +#include <linux/interrupt.h> +#include <linux/if_ether.h> +#include <linux/inet.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/notifier.h> +#include "ip.h" +#include "route.h" +#include <linux/skbuff.h> +#include "sock.h" +#include "arp.h" + + +/* + * The list of packet types we will receive (as opposed to discard) + * and the routines to invoke. + */ + +struct packet_type *ptype_base = NULL; + +/* + * Our notifier list + */ + +struct notifier_block *netdev_chain=NULL; + +/* + * Device drivers call our routines to queue packets here. We empty the + * queue in the bottom half handler. + */ + +static struct sk_buff_head backlog = +{ + (struct sk_buff *)&backlog, (struct sk_buff *)&backlog +#ifdef CONFIG_SKB_CHECK + ,SK_HEAD_SKB +#endif +}; + +/* + * We don't overdo the queue or we will thrash memory badly. + */ + +static int backlog_size = 0; + +/* + * Return the lesser of the two values. + */ + +static __inline__ unsigned long min(unsigned long a, unsigned long b) +{ + return (a < b)? a : b; +} + + +/****************************************************************************************** + + Protocol management and registration routines + +*******************************************************************************************/ + +/* + * For efficiency + */ + +static int dev_nit=0; + +/* + * Add a protocol ID to the list. Now that the input handler is + * smarter we can dispense with all the messy stuff that used to be + * here. + */ + +void dev_add_pack(struct packet_type *pt) +{ + if(pt->type==htons(ETH_P_ALL)) + dev_nit++; + pt->next = ptype_base; + ptype_base = pt; +} + + +/* + * Remove a protocol ID from the list. + */ + +void dev_remove_pack(struct packet_type *pt) +{ + struct packet_type **pt1; + if(pt->type==htons(ETH_P_ALL)) + dev_nit--; + for(pt1=&ptype_base; (*pt1)!=NULL; pt1=&((*pt1)->next)) + { + if(pt==(*pt1)) + { + *pt1=pt->next; + return; + } + } +} + +/***************************************************************************************** + + Device Interface Subroutines + +******************************************************************************************/ + +/* + * Find an interface by name. + */ + +struct device *dev_get(char *name) +{ + struct device *dev; + + for (dev = dev_base; dev != NULL; dev = dev->next) + { + if (strcmp(dev->name, name) == 0) + return(dev); + } + return(NULL); +} + + +/* + * Prepare an interface for use. + */ + +int dev_open(struct device *dev) +{ + int ret = 0; + + /* + * Call device private open method + */ + if (dev->open) + ret = dev->open(dev); + + /* + * If it went open OK then set the flags + */ + + if (ret == 0) + { + dev->flags |= (IFF_UP | IFF_RUNNING); + /* + * Initialise multicasting status + */ +#ifdef CONFIG_IP_MULTICAST + /* + * Join the all host group + */ + ip_mc_allhost(dev); +#endif + dev_mc_upload(dev); + notifier_call_chain(&netdev_chain, NETDEV_UP, dev); + } + return(ret); +} + + +/* + * Completely shutdown an interface. + */ + +int dev_close(struct device *dev) +{ + /* + * Only close a device if it is up. + */ + + if (dev->flags != 0) + { + int ct=0; + dev->flags = 0; + /* + * Call the device specific close. This cannot fail. + */ + if (dev->stop) + dev->stop(dev); + + notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev); +#if 0 + /* + * Delete the route to the device. + */ +#ifdef CONFIG_INET + ip_rt_flush(dev); + arp_device_down(dev); +#endif +#ifdef CONFIG_IPX + ipxrtr_device_down(dev); +#endif +#endif + /* + * Flush the multicast chain + */ + dev_mc_discard(dev); + /* + * Blank the IP addresses + */ + dev->pa_addr = 0; + dev->pa_dstaddr = 0; + dev->pa_brdaddr = 0; + dev->pa_mask = 0; + /* + * Purge any queued packets when we down the link + */ + while(ct<DEV_NUMBUFFS) + { + struct sk_buff *skb; + while((skb=skb_dequeue(&dev->buffs[ct]))!=NULL) + if(skb->free) + kfree_skb(skb,FREE_WRITE); + ct++; + } + } + return(0); +} + + +/* + * Device change register/unregister. These are not inline or static + * as we export them to the world. + */ + +int register_netdevice_notifier(struct notifier_block *nb) +{ + return notifier_chain_register(&netdev_chain, nb); +} + +int unregister_netdevice_notifier(struct notifier_block *nb) +{ + return notifier_chain_unregister(&netdev_chain,nb); +} + + + +/* + * Send (or queue for sending) a packet. + * + * IMPORTANT: When this is called to resend frames. The caller MUST + * already have locked the sk_buff. Apart from that we do the + * rest of the magic. + */ + +void dev_queue_xmit(struct sk_buff *skb, struct device *dev, int pri) +{ + unsigned long flags; + int nitcount; + struct packet_type *ptype; + int where = 0; /* used to say if the packet should go */ + /* at the front or the back of the */ + /* queue - front is a retransmit try */ + + if (dev == NULL) + { + printk("dev.c: dev_queue_xmit: dev = NULL\n"); + return; + } + + if(pri>=0 && !skb_device_locked(skb)) + skb_device_lock(skb); /* Shove a lock on the frame */ +#ifdef CONFIG_SLAVE_BALANCING + save_flags(flags); + cli(); + if(dev->slave!=NULL && dev->slave->pkt_queue < dev->pkt_queue && + (dev->slave->flags & IFF_UP)) + dev=dev->slave; + restore_flags(flags); +#endif +#ifdef CONFIG_SKB_CHECK + IS_SKB(skb); +#endif + skb->dev = dev; + + /* + * This just eliminates some race conditions, but not all... + */ + + if (skb->next != NULL) + { + /* + * Make sure we haven't missed an interrupt. + */ + printk("dev_queue_xmit: worked around a missed interrupt\n"); + dev->hard_start_xmit(NULL, dev); + return; + } + + /* + * Negative priority is used to flag a frame that is being pulled from the + * queue front as a retransmit attempt. It therefore goes back on the queue + * start on a failure. + */ + + if (pri < 0) + { + pri = -pri-1; + where = 1; + } + + if (pri >= DEV_NUMBUFFS) + { + printk("bad priority in dev_queue_xmit.\n"); + pri = 1; + } + + /* + * If the address has not been resolved. Call the device header rebuilder. + * This can cover all protocols and technically not just ARP either. + */ + + if (!skb->arp && dev->rebuild_header(skb->data, dev, skb->raddr, skb)) { + return; + } + + save_flags(flags); + cli(); + if (!where) { +#ifdef CONFIG_SLAVE_BALANCING + skb->in_dev_queue=1; +#endif + skb_queue_tail(dev->buffs + pri,skb); + skb_device_unlock(skb); /* Buffer is on the device queue and can be freed safely */ + skb = skb_dequeue(dev->buffs + pri); + skb_device_lock(skb); /* New buffer needs locking down */ +#ifdef CONFIG_SLAVE_BALANCING + skb->in_dev_queue=0; +#endif + } + restore_flags(flags); + + /* copy outgoing packets to any sniffer packet handlers */ + if(!where) + { + for (nitcount= dev_nit, ptype = ptype_base; nitcount > 0 && ptype != NULL; ptype = ptype->next) + { + /* Never send packets back to the socket + * they originated from - MvS (miquels@drinkel.ow.org) + */ + if (ptype->type == htons(ETH_P_ALL) && + (ptype->dev == dev || !ptype->dev) && + ((struct sock *)ptype->data != skb->sk)) + { + struct sk_buff *skb2; + if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL) + break; + /* + * The protocol knows this has (for other paths) been taken off + * and adds it back. + */ + skb2->len-=skb->dev->hard_header_len; + ptype->func(skb2, skb->dev, ptype); + nitcount--; + } + } + } + if (dev->hard_start_xmit(skb, dev) == 0) { + /* + * Packet is now solely the responsibility of the driver + */ + return; + } + + /* + * Transmission failed, put skb back into a list. Once on the list it's safe and + * no longer device locked (it can be freed safely from the device queue) + */ + cli(); +#ifdef CONFIG_SLAVE_BALANCING + skb->in_dev_queue=1; + dev->pkt_queue++; +#endif + skb_device_unlock(skb); + skb_queue_head(dev->buffs + pri,skb); + restore_flags(flags); +} + +/* + * Receive a packet from a device driver and queue it for the upper + * (protocol) levels. It always succeeds. This is the recommended + * interface to use. + */ + +void netif_rx(struct sk_buff *skb) +{ + static int dropping = 0; + + /* + * Any received buffers are un-owned and should be discarded + * when freed. These will be updated later as the frames get + * owners. + */ + skb->sk = NULL; + skb->free = 1; + if(skb->stamp.tv_sec==0) + skb->stamp = xtime; + + /* + * Check that we aren't overdoing things. + */ + + if (!backlog_size) + dropping = 0; + else if (backlog_size > 300) + dropping = 1; + + if (dropping) + { + kfree_skb(skb, FREE_READ); + return; + } + + /* + * Add it to the "backlog" queue. + */ +#ifdef CONFIG_SKB_CHECK + IS_SKB(skb); +#endif + skb_queue_tail(&backlog,skb); + backlog_size++; + + /* + * If any packet arrived, mark it for processing after the + * hardware interrupt returns. + */ + + mark_bh(NET_BH); + return; +} + + +/* + * The old interface to fetch a packet from a device driver. + * This function is the base level entry point for all drivers that + * want to send a packet to the upper (protocol) levels. It takes + * care of de-multiplexing the packet to the various modules based + * on their protocol ID. + * + * Return values: 1 <- exit I can't do any more + * 0 <- feed me more (i.e. "done", "OK"). + * + * This function is OBSOLETE and should not be used by any new + * device. + */ + +int dev_rint(unsigned char *buff, long len, int flags, struct device *dev) +{ + static int dropping = 0; + struct sk_buff *skb = NULL; + unsigned char *to; + int amount, left; + int len2; + + if (dev == NULL || buff == NULL || len <= 0) + return(1); + + if (flags & IN_SKBUFF) + { + skb = (struct sk_buff *) buff; + } + else + { + if (dropping) + { + if (skb_peek(&backlog) != NULL) + return(1); + printk("INET: dev_rint: no longer dropping packets.\n"); + dropping = 0; + } + + skb = alloc_skb(len, GFP_ATOMIC); + if (skb == NULL) + { + printk("dev_rint: packet dropped on %s (no memory) !\n", + dev->name); + dropping = 1; + return(1); + } + + /* + * First we copy the packet into a buffer, and save it for later. We + * in effect handle the incoming data as if it were from a circular buffer + */ + + to = skb->data; + left = len; + + len2 = len; + while (len2 > 0) + { + amount = min(len2, (unsigned long) dev->rmem_end - + (unsigned long) buff); + memcpy(to, buff, amount); + len2 -= amount; + left -= amount; + buff += amount; + to += amount; + if ((unsigned long) buff == dev->rmem_end) + buff = (unsigned char *) dev->rmem_start; + } + } + + /* + * Tag the frame and kick it to the proper receive routine + */ + + skb->len = len; + skb->dev = dev; + skb->free = 1; + + netif_rx(skb); + /* + * OK, all done. + */ + return(0); +} + + +/* + * This routine causes all interfaces to try to send some data. + */ + +void dev_transmit(void) +{ + struct device *dev; + + for (dev = dev_base; dev != NULL; dev = dev->next) + { + if (dev->flags != 0 && !dev->tbusy) { + /* + * Kick the device + */ + dev_tint(dev); + } + } +} + + +/********************************************************************************** + + Receive Queue Processor + +***********************************************************************************/ + +/* + * This is a single non-reentrant routine which takes the received packet + * queue and throws it at the networking layers in the hope that something + * useful will emerge. + */ + +volatile char in_bh = 0; /* Non-reentrant remember */ + +int in_net_bh() /* Used by timer.c */ +{ + return(in_bh==0?0:1); +} + +/* + * When we are called the queue is ready to grab, the interrupts are + * on and hardware can interrupt and queue to the receive queue a we + * run with no problems. + * This is run as a bottom half after an interrupt handler that does + * mark_bh(NET_BH); + */ + +void net_bh(void *tmp) +{ + struct sk_buff *skb; + struct packet_type *ptype; + struct packet_type *pt_prev; + unsigned short type; + + /* + * Atomically check and mark our BUSY state. + */ + + if (set_bit(1, (void*)&in_bh)) + return; + + /* + * Can we send anything now? We want to clear the + * decks for any more sends that get done as we + * process the input. + */ + + dev_transmit(); + + /* + * Any data left to process. This may occur because a + * mark_bh() is done after we empty the queue including + * that from the device which does a mark_bh() just after + */ + + cli(); + + /* + * While the queue is not empty + */ + + while((skb=skb_dequeue(&backlog))!=NULL) + { + /* + * We have a packet. Therefore the queue has shrunk + */ + backlog_size--; + + sti(); + + /* + * Bump the pointer to the next structure. + * This assumes that the basic 'skb' pointer points to + * the MAC header, if any (as indicated by its "length" + * field). Take care now! + */ + + skb->h.raw = skb->data + skb->dev->hard_header_len; + skb->len -= skb->dev->hard_header_len; + + /* + * Fetch the packet protocol ID. This is also quite ugly, as + * it depends on the protocol driver (the interface itself) to + * know what the type is, or where to get it from. The Ethernet + * interfaces fetch the ID from the two bytes in the Ethernet MAC + * header (the h_proto field in struct ethhdr), but other drivers + * may either use the ethernet ID's or extra ones that do not + * clash (eg ETH_P_AX25). We could set this before we queue the + * frame. In fact I may change this when I have time. + */ + + type = skb->dev->type_trans(skb, skb->dev); + + /* + * We got a packet ID. Now loop over the "known protocols" + * table (which is actually a linked list, but this will + * change soon if I get my way- FvK), and forward the packet + * to anyone who wants it. + * + * [FvK didn't get his way but he is right this ought to be + * hashed so we typically get a single hit. The speed cost + * here is minimal but no doubt adds up at the 4,000+ pkts/second + * rate we can hit flat out] + */ + pt_prev = NULL; + for (ptype = ptype_base; ptype != NULL; ptype = ptype->next) + { + if ((ptype->type == type || ptype->type == htons(ETH_P_ALL)) && (!ptype->dev || ptype->dev==skb->dev)) + { + /* + * We already have a match queued. Deliver + * to it and then remember the new match + */ + if(pt_prev) + { + struct sk_buff *skb2; + + skb2=skb_clone(skb, GFP_ATOMIC); + + /* + * Kick the protocol handler. This should be fast + * and efficient code. + */ + + if(skb2) + pt_prev->func(skb2, skb->dev, pt_prev); + } + /* Remember the current last to do */ + pt_prev=ptype; + } + } /* End of protocol list loop */ + + /* + * Is there a last item to send to ? + */ + + if(pt_prev) + pt_prev->func(skb, skb->dev, pt_prev); + /* + * Has an unknown packet has been received ? + */ + + else + kfree_skb(skb, FREE_WRITE); + + /* + * Again, see if we can transmit anything now. + * [Ought to take this out judging by tests it slows + * us down not speeds us up] + */ + + dev_transmit(); + cli(); + } /* End of queue loop */ + + /* + * We have emptied the queue + */ + + in_bh = 0; + sti(); + + /* + * One last output flush. + */ + + dev_transmit(); +} + + +/* + * This routine is called when an device driver (i.e. an + * interface) is ready to transmit a packet. + */ + +void dev_tint(struct device *dev) +{ + int i; + struct sk_buff *skb; + unsigned long flags; + + save_flags(flags); + /* + * Work the queues in priority order + */ + + for(i = 0;i < DEV_NUMBUFFS; i++) + { + /* + * Pull packets from the queue + */ + + + cli(); + while((skb=skb_dequeue(&dev->buffs[i]))!=NULL) + { + /* + * Stop anyone freeing the buffer while we retransmit it + */ + skb_device_lock(skb); + restore_flags(flags); + /* + * Feed them to the output stage and if it fails + * indicate they re-queue at the front. + */ + dev_queue_xmit(skb,dev,-i - 1); + /* + * If we can take no more then stop here. + */ + if (dev->tbusy) + return; + cli(); + } + } + restore_flags(flags); +} + + +/* + * Perform a SIOCGIFCONF call. This structure will change + * size shortly, and there is nothing I can do about it. + * Thus we will need a 'compatibility mode'. + */ + +static int dev_ifconf(char *arg) +{ + struct ifconf ifc; + struct ifreq ifr; + struct device *dev; + char *pos; + int len; + int err; + + /* + * Fetch the caller's info block. + */ + + err=verify_area(VERIFY_WRITE, arg, sizeof(struct ifconf)); + if(err) + return err; + memcpy_fromfs(&ifc, arg, sizeof(struct ifconf)); + len = ifc.ifc_len; + pos = ifc.ifc_buf; + + /* + * We now walk the device list filling each active device + * into the array. + */ + + err=verify_area(VERIFY_WRITE,pos,len); + if(err) + return err; + + /* + * Loop over the interfaces, and write an info block for each. + */ + + for (dev = dev_base; dev != NULL; dev = dev->next) + { + if(!(dev->flags & IFF_UP)) /* Downed devices don't count */ + continue; + memset(&ifr, 0, sizeof(struct ifreq)); + strcpy(ifr.ifr_name, dev->name); + (*(struct sockaddr_in *) &ifr.ifr_addr).sin_family = dev->family; + (*(struct sockaddr_in *) &ifr.ifr_addr).sin_addr.s_addr = dev->pa_addr; + + /* + * Write this block to the caller's space. + */ + + memcpy_tofs(pos, &ifr, sizeof(struct ifreq)); + pos += sizeof(struct ifreq); + len -= sizeof(struct ifreq); + + /* + * Have we run out of space here ? + */ + + if (len < sizeof(struct ifreq)) + break; + } + + /* + * All done. Write the updated control block back to the caller. + */ + + ifc.ifc_len = (pos - ifc.ifc_buf); + ifc.ifc_req = (struct ifreq *) ifc.ifc_buf; + memcpy_tofs(arg, &ifc, sizeof(struct ifconf)); + + /* + * Report how much was filled in + */ + + return(pos - arg); +} + + +/* + * This is invoked by the /proc filesystem handler to display a device + * in detail. + */ + +static int sprintf_stats(char *buffer, struct device *dev) +{ + struct enet_statistics *stats = (dev->get_stats ? dev->get_stats(dev): NULL); + int size; + + if (stats) + size = sprintf(buffer, "%6s:%7d %4d %4d %4d %4d %8d %4d %4d %4d %5d %4d\n", + dev->name, + stats->rx_packets, stats->rx_errors, + stats->rx_dropped + stats->rx_missed_errors, + stats->rx_fifo_errors, + stats->rx_length_errors + stats->rx_over_errors + + stats->rx_crc_errors + stats->rx_frame_errors, + stats->tx_packets, stats->tx_errors, stats->tx_dropped, + stats->tx_fifo_errors, stats->collisions, + stats->tx_carrier_errors + stats->tx_aborted_errors + + stats->tx_window_errors + stats->tx_heartbeat_errors); + else + size = sprintf(buffer, "%6s: No statistics available.\n", dev->name); + + return size; +} + +/* + * Called from the PROCfs module. This now uses the new arbitrary sized /proc/net interface + * to create /proc/net/dev + */ + +int dev_get_info(char *buffer, char **start, off_t offset, int length) +{ + int len=0; + off_t begin=0; + off_t pos=0; + int size; + + struct device *dev; + + + size = sprintf(buffer, "Inter-| Receive | Transmit\n" + " face |packets errs drop fifo frame|packets errs drop fifo colls carrier\n"); + + pos+=size; + len+=size; + + + for (dev = dev_base; dev != NULL; dev = dev->next) + { + size = sprintf_stats(buffer+len, dev); + len+=size; + pos=begin+len; + + if(pos<offset) + { + len=0; + begin=pos; + } + if(pos>offset+length) + break; + } + + *start=buffer+(offset-begin); /* Start of wanted data */ + len-=(offset-begin); /* Start slop */ + if(len>length) + len=length; /* Ending slop */ + return len; +} + + +/* + * This checks bitmasks for the ioctl calls for devices. + */ + +static inline int bad_mask(unsigned long mask, unsigned long addr) +{ + if (addr & (mask = ~mask)) + return 1; + mask = ntohl(mask); + if (mask & (mask+1)) + return 1; + return 0; +} + +#ifndef _HURD_ +/* + * Perform the SIOCxIFxxx calls. + * + * The socket layer has seen an ioctl the address family thinks is + * for the device. At this point we get invoked to make a decision + */ + +static int dev_ifsioc(void *arg, unsigned int getset) +{ + struct ifreq ifr; + struct device *dev; + int ret; + + /* + * Fetch the caller's info block into kernel space + */ + + int err=verify_area(VERIFY_WRITE, arg, sizeof(struct ifreq)); + if(err) + return err; + + memcpy_fromfs(&ifr, arg, sizeof(struct ifreq)); + + /* + * See which interface the caller is talking about. + */ + + if ((dev = dev_get(ifr.ifr_name)) == NULL) + return(-ENODEV); + + switch(getset) + { + case SIOCGIFFLAGS: /* Get interface flags */ + ifr.ifr_flags = dev->flags; + memcpy_tofs(arg, &ifr, sizeof(struct ifreq)); + ret = 0; + break; + case SIOCSIFFLAGS: /* Set interface flags */ + { + int old_flags = dev->flags; +#ifdef CONFIG_SLAVE_BALANCING + if(dev->flags&IFF_SLAVE) + return -EBUSY; +#endif + dev->flags = ifr.ifr_flags & ( + IFF_UP | IFF_BROADCAST | IFF_DEBUG | IFF_LOOPBACK | + IFF_POINTOPOINT | IFF_NOTRAILERS | IFF_RUNNING | + IFF_NOARP | IFF_PROMISC | IFF_ALLMULTI | IFF_SLAVE | IFF_MASTER + | IFF_MULTICAST); +#ifdef CONFIG_SLAVE_BALANCING + if(!(dev->flags&IFF_MASTER) && dev->slave) + { + dev->slave->flags&=~IFF_SLAVE; + dev->slave=NULL; + } +#endif + /* + * Load in the correct multicast list now the flags have changed. + */ + + dev_mc_upload(dev); +#if 0 + if( dev->set_multicast_list!=NULL) + { + + /* + * Has promiscuous mode been turned off + */ + + if ( (old_flags & IFF_PROMISC) && ((dev->flags & IFF_PROMISC) == 0)) + dev->set_multicast_list(dev,0,NULL); + + /* + * Has it been turned on + */ + + if ( (dev->flags & IFF_PROMISC) && ((old_flags & IFF_PROMISC) == 0)) + dev->set_multicast_list(dev,-1,NULL); + } +#endif + /* + * Have we downed the interface + */ + + if ((old_flags & IFF_UP) && ((dev->flags & IFF_UP) == 0)) + { + ret = dev_close(dev); + } + else + { + /* + * Have we upped the interface + */ + + ret = (! (old_flags & IFF_UP) && (dev->flags & IFF_UP)) + ? dev_open(dev) : 0; + /* + * Check the flags. + */ + if(ret<0) + dev->flags&=~IFF_UP; /* Didn't open so down the if */ + } + } + break; + + case SIOCGIFADDR: /* Get interface address (and family) */ + (*(struct sockaddr_in *) + &ifr.ifr_addr).sin_addr.s_addr = dev->pa_addr; + (*(struct sockaddr_in *) + &ifr.ifr_addr).sin_family = dev->family; + (*(struct sockaddr_in *) + &ifr.ifr_addr).sin_port = 0; + memcpy_tofs(arg, &ifr, sizeof(struct ifreq)); + ret = 0; + break; + + case SIOCSIFADDR: /* Set interface address (and family) */ + dev->pa_addr = (*(struct sockaddr_in *) + &ifr.ifr_addr).sin_addr.s_addr; + dev->family = ifr.ifr_addr.sa_family; + +#ifdef CONFIG_INET + /* This is naughty. When net-032e comes out It wants moving into the net032 + code not the kernel. Till then it can sit here (SIGH) */ + dev->pa_mask = ip_get_mask(dev->pa_addr); +#endif + dev->pa_brdaddr = dev->pa_addr | ~dev->pa_mask; + ret = 0; + break; + + case SIOCGIFBRDADDR: /* Get the broadcast address */ + (*(struct sockaddr_in *) + &ifr.ifr_broadaddr).sin_addr.s_addr = dev->pa_brdaddr; + (*(struct sockaddr_in *) + &ifr.ifr_broadaddr).sin_family = dev->family; + (*(struct sockaddr_in *) + &ifr.ifr_broadaddr).sin_port = 0; + memcpy_tofs(arg, &ifr, sizeof(struct ifreq)); + ret = 0; + break; + + case SIOCSIFBRDADDR: /* Set the broadcast address */ + dev->pa_brdaddr = (*(struct sockaddr_in *) + &ifr.ifr_broadaddr).sin_addr.s_addr; + ret = 0; + break; + + case SIOCGIFDSTADDR: /* Get the destination address (for point-to-point links) */ + (*(struct sockaddr_in *) + &ifr.ifr_dstaddr).sin_addr.s_addr = dev->pa_dstaddr; + (*(struct sockaddr_in *) + &ifr.ifr_broadaddr).sin_family = dev->family; + (*(struct sockaddr_in *) + &ifr.ifr_broadaddr).sin_port = 0; + memcpy_tofs(arg, &ifr, sizeof(struct ifreq)); + ret = 0; + break; + + case SIOCSIFDSTADDR: /* Set the destination address (for point-to-point links) */ + dev->pa_dstaddr = (*(struct sockaddr_in *) + &ifr.ifr_dstaddr).sin_addr.s_addr; + ret = 0; + break; + + case SIOCGIFNETMASK: /* Get the netmask for the interface */ + (*(struct sockaddr_in *) + &ifr.ifr_netmask).sin_addr.s_addr = dev->pa_mask; + (*(struct sockaddr_in *) + &ifr.ifr_netmask).sin_family = dev->family; + (*(struct sockaddr_in *) + &ifr.ifr_netmask).sin_port = 0; + memcpy_tofs(arg, &ifr, sizeof(struct ifreq)); + ret = 0; + break; + + case SIOCSIFNETMASK: /* Set the netmask for the interface */ + { + unsigned long mask = (*(struct sockaddr_in *) + &ifr.ifr_netmask).sin_addr.s_addr; + ret = -EINVAL; + /* + * The mask we set must be legal. + */ + if (bad_mask(mask,0)) + break; + dev->pa_mask = mask; + ret = 0; + } + break; + + case SIOCGIFMETRIC: /* Get the metric on the interface (currently unused) */ + + ifr.ifr_metric = dev->metric; + memcpy_tofs(arg, &ifr, sizeof(struct ifreq)); + ret = 0; + break; + + case SIOCSIFMETRIC: /* Set the metric on the interface (currently unused) */ + dev->metric = ifr.ifr_metric; + ret = 0; + break; + + case SIOCGIFMTU: /* Get the MTU of a device */ + ifr.ifr_mtu = dev->mtu; + memcpy_tofs(arg, &ifr, sizeof(struct ifreq)); + ret = 0; + break; + + case SIOCSIFMTU: /* Set the MTU of a device */ + + /* + * MTU must be positive and under the page size problem + */ + + if(ifr.ifr_mtu<1 || ifr.ifr_mtu>3800) + return -EINVAL; + dev->mtu = ifr.ifr_mtu; + ret = 0; + break; + + case SIOCGIFMEM: /* Get the per device memory space. We can add this but currently + do not support it */ + printk("NET: ioctl(SIOCGIFMEM, %p)\n", arg); + ret = -EINVAL; + break; + + case SIOCSIFMEM: /* Set the per device memory buffer space. Not applicable in our case */ + printk("NET: ioctl(SIOCSIFMEM, %p)\n", arg); + ret = -EINVAL; + break; + + case OLD_SIOCGIFHWADDR: /* Get the hardware address. This will change and SIFHWADDR will be added */ + memcpy(ifr.old_ifr_hwaddr,dev->dev_addr, MAX_ADDR_LEN); + memcpy_tofs(arg,&ifr,sizeof(struct ifreq)); + ret=0; + break; + + case SIOCGIFHWADDR: + memcpy(ifr.ifr_hwaddr.sa_data,dev->dev_addr, MAX_ADDR_LEN); + ifr.ifr_hwaddr.sa_family=dev->type; + memcpy_tofs(arg,&ifr,sizeof(struct ifreq)); + ret=0; + break; + + case SIOCSIFHWADDR: + if(dev->set_mac_address==NULL) + return -EOPNOTSUPP; + if(ifr.ifr_hwaddr.sa_family!=dev->type) + return -EINVAL; + ret=dev->set_mac_address(dev,ifr.ifr_hwaddr.sa_data); + break; + + case SIOCGIFMAP: + ifr.ifr_map.mem_start=dev->mem_start; + ifr.ifr_map.mem_end=dev->mem_end; + ifr.ifr_map.base_addr=dev->base_addr; + ifr.ifr_map.irq=dev->irq; + ifr.ifr_map.dma=dev->dma; + ifr.ifr_map.port=dev->if_port; + memcpy_tofs(arg,&ifr,sizeof(struct ifreq)); + ret=0; + break; + + case SIOCSIFMAP: + if(dev->set_config==NULL) + return -EOPNOTSUPP; + return dev->set_config(dev,&ifr.ifr_map); + + case SIOCGIFSLAVE: +#ifdef CONFIG_SLAVE_BALANCING + if(dev->slave==NULL) + return -ENOENT; + strncpy(ifr.ifr_name,dev->name,sizeof(ifr.ifr_name)); + memcpy_tofs(arg,&ifr,sizeof(struct ifreq)); + ret=0; +#else + return -ENOENT; +#endif + break; +#ifdef CONFIG_SLAVE_BALANCING + case SIOCSIFSLAVE: + { + + /* + * Fun game. Get the device up and the flags right without + * letting some scummy user confuse us. + */ + unsigned long flags; + struct device *slave=dev_get(ifr.ifr_slave); + save_flags(flags); + if(slave==NULL) + { + return -ENODEV; + } + cli(); + if((slave->flags&(IFF_UP|IFF_RUNNING))!=(IFF_UP|IFF_RUNNING)) + { + restore_flags(flags); + return -EINVAL; + } + if(dev->flags&IFF_SLAVE) + { + restore_flags(flags); + return -EBUSY; + } + if(dev->slave!=NULL) + { + restore_flags(flags); + return -EBUSY; + } + if(slave->flags&IFF_SLAVE) + { + restore_flags(flags); + return -EBUSY; + } + dev->slave=slave; + slave->flags|=IFF_SLAVE; + dev->flags|=IFF_MASTER; + restore_flags(flags); + ret=0; + } + break; +#endif + + case SIOCADDMULTI: + if(dev->set_multicast_list==NULL) + return -EINVAL; + if(ifr.ifr_hwaddr.sa_family!=AF_UNSPEC) + return -EINVAL; + dev_mc_add(dev,ifr.ifr_hwaddr.sa_data, dev->addr_len, 1); + return 0; + + case SIOCDELMULTI: + if(dev->set_multicast_list==NULL) + return -EINVAL; + if(ifr.ifr_hwaddr.sa_family!=AF_UNSPEC) + return -EINVAL; + dev_mc_delete(dev,ifr.ifr_hwaddr.sa_data,dev->addr_len, 1); + return 0; + /* + * Unknown or private ioctl + */ + + default: + if((getset >= SIOCDEVPRIVATE) && + (getset <= (SIOCDEVPRIVATE + 15))) { + if(dev->do_ioctl==NULL) + return -EOPNOTSUPP; + ret=dev->do_ioctl(dev, &ifr, getset); + memcpy_tofs(arg,&ifr,sizeof(struct ifreq)); + break; + } + + ret = -EINVAL; + } + return(ret); +} + + +/* + * This function handles all "interface"-type I/O control requests. The actual + * 'doing' part of this is dev_ifsioc above. + */ + +int dev_ioctl(unsigned int cmd, void *arg) +{ + switch(cmd) + { + case SIOCGIFCONF: + (void) dev_ifconf((char *) arg); + return 0; + + /* + * Ioctl calls that can be done by all. + */ + + case SIOCGIFFLAGS: + case SIOCGIFADDR: + case SIOCGIFDSTADDR: + case SIOCGIFBRDADDR: + case SIOCGIFNETMASK: + case SIOCGIFMETRIC: + case SIOCGIFMTU: + case SIOCGIFMEM: + case SIOCGIFHWADDR: + case SIOCSIFHWADDR: + case OLD_SIOCGIFHWADDR: + case SIOCGIFSLAVE: + case SIOCGIFMAP: + return dev_ifsioc(arg, cmd); + + /* + * Ioctl calls requiring the power of a superuser + */ + + case SIOCSIFFLAGS: + case SIOCSIFADDR: + case SIOCSIFDSTADDR: + case SIOCSIFBRDADDR: + case SIOCSIFNETMASK: + case SIOCSIFMETRIC: + case SIOCSIFMTU: + case SIOCSIFMEM: + case SIOCSIFMAP: + case SIOCSIFSLAVE: + case SIOCADDMULTI: + case SIOCDELMULTI: + if (!suser()) + return -EPERM; + return dev_ifsioc(arg, cmd); + + case SIOCSIFLINK: + return -EINVAL; + + /* + * Unknown or private ioctl. + */ + + default: + if((cmd >= SIOCDEVPRIVATE) && + (cmd <= (SIOCDEVPRIVATE + 15))) { + return dev_ifsioc(arg, cmd); + } + return -EINVAL; + } +} +#endif + + +/* + * Initialize the DEV module. At boot time this walks the device list and + * unhooks any devices that fail to initialise (normally hardware not + * present) and leaves us with a valid list of present and active devices. + * + * The PCMCIA code may need to change this a little, and add a pair + * of register_inet_device() unregister_inet_device() calls. This will be + * needed for ethernet as modules support. + */ + +void dev_init(void) +{ + struct device *dev, *dev2; + + /* + * Add the devices. + * If the call to dev->init fails, the dev is removed + * from the chain disconnecting the device until the + * next reboot. + */ + + dev2 = NULL; + for (dev = dev_base; dev != NULL; dev=dev->next) + { + if (dev->init && dev->init(dev)) + { + /* + * It failed to come up. Unhook it. + */ + + if (dev2 == NULL) + dev_base = dev->next; + else + dev2->next = dev->next; + } + else + { + dev2 = dev; + } + } +} diff --git a/pfinet/linux-inet/dev_mcast.c b/pfinet/linux-inet/dev_mcast.c new file mode 100644 index 00000000..cd5e356e --- /dev/null +++ b/pfinet/linux-inet/dev_mcast.c @@ -0,0 +1,169 @@ +/* + * Linux NET3: Multicast List maintenance. + * + * Authors: + * Tim Kordas <tjk@nostromo.eeap.cwru.edu> + * Richard Underwood <richard@wuzz.demon.co.uk> + * + * Stir fried together from the IP multicast and CAP patches above + * Alan Cox <Alan.Cox@linux.org> + * + * Fixes: + * Alan Cox : Update the device on a real delete + * rather than any time but... + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <asm/segment.h> +#include <asm/system.h> +#include <asm/bitops.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/string.h> +#include <linux/mm.h> +#include <linux/socket.h> +#include <linux/sockios.h> +#include <linux/in.h> +#include <linux/errno.h> +#include <linux/interrupt.h> +#include <linux/if_ether.h> +#include <linux/inet.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include "ip.h" +#include "route.h" +#include <linux/skbuff.h> +#include "sock.h" +#include "arp.h" + + +/* + * Device multicast list maintenance. This knows about such little matters as promiscuous mode and + * converting from the list to the array the drivers use. At least until I fix the drivers up. + * + * This is used both by IP and by the user level maintenance functions. Unlike BSD we maintain a usage count + * on a given multicast address so that a casual user application can add/delete multicasts used by protocols + * without doing damage to the protocols when it deletes the entries. It also helps IP as it tracks overlapping + * maps. + */ + + +/* + * Update the multicast list into the physical NIC controller. + */ + +void dev_mc_upload(struct device *dev) +{ + struct dev_mc_list *dmi; + char *data, *tmp; + + /* Don't do anything till we up the interface + [dev_open will call this function so the list will + stay sane] */ + + if(!(dev->flags&IFF_UP)) + return; + + + /* Devices with no set multicast don't get set */ + if(dev->set_multicast_list==NULL) + return; + /* Promiscuous is promiscuous - so no filter needed */ + if(dev->flags&IFF_PROMISC) + { + dev->set_multicast_list(dev, -1, NULL); + return; + } + + if(dev->mc_count==0) + { + dev->set_multicast_list(dev,0,NULL); + return; + } + + data=kmalloc(dev->mc_count*dev->addr_len, GFP_KERNEL); + if(data==NULL) + { + printk("Unable to get memory to set multicast list on %s\n",dev->name); + return; + } + for(tmp = data, dmi=dev->mc_list;dmi!=NULL;dmi=dmi->next) + { + memcpy(tmp,dmi->dmi_addr, dmi->dmi_addrlen); + tmp+=dev->addr_len; + } + dev->set_multicast_list(dev,dev->mc_count,data); + kfree(data); +} + +/* + * Delete a device level multicast + */ + +void dev_mc_delete(struct device *dev, void *addr, int alen, int all) +{ + struct dev_mc_list **dmi; + for(dmi=&dev->mc_list;*dmi!=NULL;dmi=&(*dmi)->next) + { + if(memcmp((*dmi)->dmi_addr,addr,(*dmi)->dmi_addrlen)==0 && alen==(*dmi)->dmi_addrlen) + { + struct dev_mc_list *tmp= *dmi; + if(--(*dmi)->dmi_users && !all) + return; + *dmi=(*dmi)->next; + dev->mc_count--; + kfree_s(tmp,sizeof(*tmp)); + dev_mc_upload(dev); + return; + } + } +} + +/* + * Add a device level multicast + */ + +void dev_mc_add(struct device *dev, void *addr, int alen, int newonly) +{ + struct dev_mc_list *dmi; + for(dmi=dev->mc_list;dmi!=NULL;dmi=dmi->next) + { + if(memcmp(dmi->dmi_addr,addr,dmi->dmi_addrlen)==0 && dmi->dmi_addrlen==alen) + { + if(!newonly) + dmi->dmi_users++; + return; + } + } + dmi=(struct dev_mc_list *)kmalloc(sizeof(*dmi),GFP_KERNEL); + if(dmi==NULL) + return; /* GFP_KERNEL so can't happen anyway */ + memcpy(dmi->dmi_addr, addr, alen); + dmi->dmi_addrlen=alen; + dmi->next=dev->mc_list; + dmi->dmi_users=1; + dev->mc_list=dmi; + dev->mc_count++; + dev_mc_upload(dev); +} + +/* + * Discard multicast list when a device is downed + */ + +void dev_mc_discard(struct device *dev) +{ + while(dev->mc_list!=NULL) + { + struct dev_mc_list *tmp=dev->mc_list; + dev->mc_list=dev->mc_list->next; + kfree_s(tmp,sizeof(*tmp)); + } + dev->mc_count=0; +} + diff --git a/pfinet/linux-inet/devinet.c b/pfinet/linux-inet/devinet.c new file mode 100644 index 00000000..946536be --- /dev/null +++ b/pfinet/linux-inet/devinet.c @@ -0,0 +1,213 @@ +/* + * NET3 IP device support routines. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Derived from the IP parts of dev.c 1.0.19 + * Authors: Ross Biro, <bir7@leland.Stanford.Edu> + * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> + * Mark Evans, <evansmp@uhura.aston.ac.uk> + * + * Additional Authors: + * Alan Cox, <gw4pts@gw4pts.ampr.org> + */ + +#include <asm/segment.h> +#include <asm/system.h> +#include <asm/bitops.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/string.h> +#include <linux/mm.h> +#include <linux/socket.h> +#include <linux/sockios.h> +#include <linux/in.h> +#include <linux/errno.h> +#include <linux/interrupt.h> +#include <linux/if_ether.h> +#include <linux/inet.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include "ip.h" +#include "route.h" +#include "protocol.h" +#include "tcp.h" +#include <linux/skbuff.h> +#include "sock.h" +#include "arp.h" + +/* + * Determine a default network mask, based on the IP address. + */ + +unsigned long ip_get_mask(unsigned long addr) +{ + unsigned long dst; + + if (addr == 0L) + return(0L); /* special case */ + + dst = ntohl(addr); + if (IN_CLASSA(dst)) + return(htonl(IN_CLASSA_NET)); + if (IN_CLASSB(dst)) + return(htonl(IN_CLASSB_NET)); + if (IN_CLASSC(dst)) + return(htonl(IN_CLASSC_NET)); + + /* + * Something else, probably a multicast. + */ + + return(0); +} + +/* + * Check the address for our address, broadcasts, etc. + * + * I intend to fix this to at the very least cache the last + * resolved entry. + */ + +int ip_chk_addr(unsigned long addr) +{ + struct device *dev; + unsigned long mask; + + /* + * Accept both `all ones' and `all zeros' as BROADCAST. + * (Support old BSD in other words). This old BSD + * support will go very soon as it messes other things + * up. + * Also accept `loopback broadcast' as BROADCAST. + */ + + if (addr == INADDR_ANY || addr == INADDR_BROADCAST || + addr == htonl(0x7FFFFFFFL)) + return IS_BROADCAST; + + mask = ip_get_mask(addr); + + /* + * Accept all of the `loopback' class A net. + */ + + if ((addr & mask) == htonl(0x7F000000L)) + return IS_MYADDR; + + /* + * OK, now check the interface addresses. + */ + + for (dev = dev_base; dev != NULL; dev = dev->next) + { + if (!(dev->flags & IFF_UP)) + continue; + /* + * If the protocol address of the device is 0 this is special + * and means we are address hunting (eg bootp). + */ + + if ((dev->pa_addr == 0)/* || (dev->flags&IFF_PROMISC)*/) + return IS_MYADDR; + /* + * Is it the exact IP address? + */ + + if (addr == dev->pa_addr) + return IS_MYADDR; + /* + * Is it our broadcast address? + */ + + if ((dev->flags & IFF_BROADCAST) && addr == dev->pa_brdaddr) + return IS_BROADCAST; + /* + * Nope. Check for a subnetwork broadcast. + */ + + if (((addr ^ dev->pa_addr) & dev->pa_mask) == 0) + { + if ((addr & ~dev->pa_mask) == 0) + return IS_BROADCAST; + if ((addr & ~dev->pa_mask) == ~dev->pa_mask) + return IS_BROADCAST; + } + + /* + * Nope. Check for Network broadcast. + */ + + if (((addr ^ dev->pa_addr) & mask) == 0) + { + if ((addr & ~mask) == 0) + return IS_BROADCAST; + if ((addr & ~mask) == ~mask) + return IS_BROADCAST; + } + } + if(IN_MULTICAST(ntohl(addr))) + return IS_MULTICAST; + return 0; /* no match at all */ +} + + +/* + * Retrieve our own address. + * + * Because the loopback address (127.0.0.1) is already recognized + * automatically, we can use the loopback interface's address as + * our "primary" interface. This is the address used by IP et + * al when it doesn't know which address to use (i.e. it does not + * yet know from or to which interface to go...). + */ + +unsigned long ip_my_addr(void) +{ + struct device *dev; + + for (dev = dev_base; dev != NULL; dev = dev->next) + { + if (dev->flags & IFF_LOOPBACK) + return(dev->pa_addr); + } + return(0); +} + +/* + * Find an interface that can handle addresses for a certain address. + * + * This needs optimising, since it's relatively trivial to collapse + * the two loops into one. + */ + +struct device * ip_dev_check(unsigned long addr) +{ + struct device *dev; + + for (dev = dev_base; dev; dev = dev->next) + { + if (!(dev->flags & IFF_UP)) + continue; + if (!(dev->flags & IFF_POINTOPOINT)) + continue; + if (addr != dev->pa_dstaddr) + continue; + return dev; + } + for (dev = dev_base; dev; dev = dev->next) + { + if (!(dev->flags & IFF_UP)) + continue; + if (dev->flags & IFF_POINTOPOINT) + continue; + if (dev->pa_mask & (addr ^ dev->pa_addr)) + continue; + return dev; + } + return NULL; +} diff --git a/pfinet/linux-inet/eth.c b/pfinet/linux-inet/eth.c new file mode 100644 index 00000000..cbd2c94b --- /dev/null +++ b/pfinet/linux-inet/eth.c @@ -0,0 +1,196 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Ethernet-type device handling. + * + * Version: @(#)eth.c 1.0.7 05/25/93 + * + * Authors: Ross Biro, <bir7@leland.Stanford.Edu> + * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> + * Mark Evans, <evansmp@uhura.aston.ac.uk> + * Florian La Roche, <rzsfl@rz.uni-sb.de> + * Alan Cox, <gw4pts@gw4pts.ampr.org> + * + * Fixes: + * Mr Linux : Arp problems + * Alan Cox : Generic queue tidyup (very tiny here) + * Alan Cox : eth_header ntohs should be htons + * Alan Cox : eth_rebuild_header missing an htons and + * minor other things. + * Tegge : Arp bug fixes. + * Florian : Removed many unnecessary functions, code cleanup + * and changes for new arp and skbuff. + * Alan Cox : Redid header building to reflect new format. + * Alan Cox : ARP only when compiled with CONFIG_INET + * Greg Page : 802.2 and SNAP stuff + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <asm/segment.h> +#include <asm/system.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/string.h> +#include <linux/mm.h> +#include <linux/socket.h> +#include <linux/in.h> +#include <linux/inet.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/skbuff.h> +#include <linux/errno.h> +#include <linux/config.h> + +#include "arp.h" + +void eth_setup(char *str, int *ints) +{ + struct device *d = dev_base; + + if (!str || !*str) + return; + while (d) + { + if (!strcmp(str,d->name)) + { + if (ints[0] > 0) + d->irq=ints[1]; + if (ints[0] > 1) + d->base_addr=ints[2]; + if (ints[0] > 2) + d->mem_start=ints[3]; + if (ints[0] > 3) + d->mem_end=ints[4]; + break; + } + d=d->next; + } +} + + +/* + * Create the Ethernet MAC header for an arbitrary protocol layer + * + * saddr=NULL means use device source address + * daddr=NULL means leave destination address (eg unresolved arp) + */ + +int eth_header(unsigned char *buff, struct device *dev, unsigned short type, + void *daddr, void *saddr, unsigned len, + struct sk_buff *skb) +{ + struct ethhdr *eth = (struct ethhdr *)buff; + + /* + * Set the protocol type. For a packet of type ETH_P_802_3 we put the length + * in here instead. It is up to the 802.2 layer to carry protocol information. + */ + + if(type!=ETH_P_802_3) + eth->h_proto = htons(type); + else + eth->h_proto = htons(len); + + /* + * Set the source hardware address. + */ + + if(saddr) + memcpy(eth->h_source,saddr,dev->addr_len); + else + memcpy(eth->h_source,dev->dev_addr,dev->addr_len); + + /* + * Anyway, the loopback-device should never use this function... + */ + + if (dev->flags & IFF_LOOPBACK) + { + memset(eth->h_dest, 0, dev->addr_len); + return(dev->hard_header_len); + } + + if(daddr) + { + memcpy(eth->h_dest,daddr,dev->addr_len); + return dev->hard_header_len; + } + + return -dev->hard_header_len; +} + + +/* + * Rebuild the Ethernet MAC header. This is called after an ARP + * (or in future other address resolution) has completed on this + * sk_buff. We now let ARP fill in the other fields. + */ + +int eth_rebuild_header(void *buff, struct device *dev, unsigned long dst, + struct sk_buff *skb) +{ + struct ethhdr *eth = (struct ethhdr *)buff; + + /* + * Only ARP/IP is currently supported + */ + + if(eth->h_proto != htons(ETH_P_IP)) + { + printk("eth_rebuild_header: Don't know how to resolve type %d addresses?\n",(int)eth->h_proto); + memcpy(eth->h_source, dev->dev_addr, dev->addr_len); + return 0; + } + + /* + * Try and get ARP to resolve the header. + */ +#ifdef CONFIG_INET + return arp_find(eth->h_dest, dst, dev, dev->pa_addr, skb)? 1 : 0; +#else + return 0; +#endif +} + + +/* + * Determine the packet's protocol ID. The rule here is that we + * assume 802.3 if the type field is short enough to be a length. + * This is normal practice and works for any 'now in use' protocol. + */ + +unsigned short eth_type_trans(struct sk_buff *skb, struct device *dev) +{ + struct ethhdr *eth = (struct ethhdr *) skb->data; + unsigned char *rawp; + + if(*eth->h_dest&1) + { + if(memcmp(eth->h_dest,dev->broadcast, ETH_ALEN)==0) + skb->pkt_type=PACKET_BROADCAST; + else + skb->pkt_type=PACKET_MULTICAST; + } + + if(dev->flags&IFF_PROMISC) + { + if(memcmp(eth->h_dest,dev->dev_addr, ETH_ALEN)) + skb->pkt_type=PACKET_OTHERHOST; + } + + if (ntohs(eth->h_proto) >= 1536) + return eth->h_proto; + + rawp = (unsigned char *)(eth + 1); + + if (*(unsigned short *)rawp == 0xFFFF) + return htons(ETH_P_802_3); + + return htons(ETH_P_802_2); +} diff --git a/pfinet/linux-inet/eth.h b/pfinet/linux-inet/eth.h new file mode 100644 index 00000000..f8fed44e --- /dev/null +++ b/pfinet/linux-inet/eth.h @@ -0,0 +1,35 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. NET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Definitions for the Ethernet handlers. + * + * Version: @(#)eth.h 1.0.4 05/13/93 + * + * Authors: Ross Biro, <bir7@leland.Stanford.Edu> + * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _ETH_H +#define _ETH_H + + +#include <linux/if_ether.h> + + +extern char *eth_print(unsigned char *ptr); +extern void eth_dump(struct ethhdr *eth); +extern int eth_header(unsigned char *buff, struct device *dev, + unsigned short type, unsigned long daddr, + unsigned long saddr, unsigned len); +extern int eth_rebuild_header(void *buff, struct device *dev); +extern void eth_add_arp(unsigned long addr, struct sk_buff *skb, + struct device *dev); +extern unsigned short eth_type_trans(struct sk_buff *skb, struct device *dev); + +#endif /* _ETH_H */ diff --git a/pfinet/linux-inet/icmp.c b/pfinet/linux-inet/icmp.c new file mode 100644 index 00000000..c023eab2 --- /dev/null +++ b/pfinet/linux-inet/icmp.c @@ -0,0 +1,774 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Internet Control Message Protocol (ICMP) + * + * Version: @(#)icmp.c 1.0.11 06/02/93 + * + * Authors: Ross Biro, <bir7@leland.Stanford.Edu> + * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> + * Mark Evans, <evansmp@uhura.aston.ac.uk> + * Alan Cox, <gw4pts@gw4pts.ampr.org> + * Stefan Becker, <stefanb@yello.ping.de> + * + * Fixes: + * Alan Cox : Generic queue usage. + * Gerhard Koerting: ICMP addressing corrected + * Alan Cox : Use tos/ttl settings + * Alan Cox : Protocol violations + * Alan Cox : SNMP Statistics + * Alan Cox : Routing errors + * Alan Cox : Changes for newer routing code + * Alan Cox : Removed old debugging junk + * Alan Cox : Fixed the ICMP error status of net/host unreachable + * Gerhard Koerting : Fixed broadcast ping properly + * Ulrich Kunitz : Fixed ICMP timestamp reply + * A.N.Kuznetsov : Multihoming fixes. + * Laco Rusnak : Multihoming fixes. + * Alan Cox : Tightened up icmp_send(). + * Alan Cox : Multicasts. + * Stefan Becker : ICMP redirects in icmp_send(). + * + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/types.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/fcntl.h> +#include <linux/socket.h> +#include <linux/in.h> +#include <linux/inet.h> +#include <linux/netdevice.h> +#include <linux/string.h> +#include "snmp.h" +#include "ip.h" +#include "route.h" +#include "protocol.h" +#include "icmp.h" +#include "tcp.h" +#include "snmp.h" +#include <linux/skbuff.h> +#include "sock.h" +#include <linux/errno.h> +#include <linux/timer.h> +#include <asm/system.h> +#include <asm/segment.h> + + +#define min(a,b) ((a)<(b)?(a):(b)) + + +/* + * Statistics + */ + +struct icmp_mib icmp_statistics={0,}; + + +/* An array of errno for error messages from dest unreach. */ +struct icmp_err icmp_err_convert[] = { + { ENETUNREACH, 0 }, /* ICMP_NET_UNREACH */ + { EHOSTUNREACH, 0 }, /* ICMP_HOST_UNREACH */ + { ENOPROTOOPT, 1 }, /* ICMP_PROT_UNREACH */ + { ECONNREFUSED, 1 }, /* ICMP_PORT_UNREACH */ + { EOPNOTSUPP, 0 }, /* ICMP_FRAG_NEEDED */ + { EOPNOTSUPP, 0 }, /* ICMP_SR_FAILED */ + { ENETUNREACH, 1 }, /* ICMP_NET_UNKNOWN */ + { EHOSTDOWN, 1 }, /* ICMP_HOST_UNKNOWN */ + { ENONET, 1 }, /* ICMP_HOST_ISOLATED */ + { ENETUNREACH, 1 }, /* ICMP_NET_ANO */ + { EHOSTUNREACH, 1 }, /* ICMP_HOST_ANO */ + { EOPNOTSUPP, 0 }, /* ICMP_NET_UNR_TOS */ + { EOPNOTSUPP, 0 } /* ICMP_HOST_UNR_TOS */ +}; + + +/* + * Send an ICMP message in response to a situation + * + * Fixme: Fragment handling is wrong really. + */ + +void icmp_send(struct sk_buff *skb_in, int type, int code, unsigned long info, struct device *dev) +{ + struct sk_buff *skb; + struct iphdr *iph; + int offset; + struct icmphdr *icmph; + int len; + struct device *ndev=NULL; /* Make this =dev to force replies on the same interface */ + unsigned long our_addr; + int atype; + + /* + * Find the original IP header. + */ + + iph = (struct iphdr *) (skb_in->data + dev->hard_header_len); + + /* + * No replies to MAC multicast + */ + + if(skb_in->pkt_type!=PACKET_HOST) + return; + + /* + * No replies to IP multicasting + */ + + atype=ip_chk_addr(iph->daddr); + if(atype==IS_BROADCAST || IN_MULTICAST(iph->daddr)) + return; + + /* + * Only reply to first fragment. + */ + + if(ntohs(iph->frag_off)&IP_OFFSET) + return; + + /* + * We must NEVER NEVER send an ICMP error to an ICMP error message + */ + + if(type==ICMP_DEST_UNREACH||type==ICMP_REDIRECT||type==ICMP_SOURCE_QUENCH||type==ICMP_TIME_EXCEEDED) + { + + /* + * Is the original packet an ICMP packet? + */ + + if(iph->protocol==IPPROTO_ICMP) + { + icmph = (struct icmphdr *) ((char *) iph + + 4 * iph->ihl); + /* + * Check for ICMP error packets (Must never reply to + * an ICMP error). + */ + + if (icmph->type == ICMP_DEST_UNREACH || + icmph->type == ICMP_SOURCE_QUENCH || + icmph->type == ICMP_REDIRECT || + icmph->type == ICMP_TIME_EXCEEDED || + icmph->type == ICMP_PARAMETERPROB) + return; + } + } + icmp_statistics.IcmpOutMsgs++; + + /* + * This needs a tidy. + */ + + switch(type) + { + case ICMP_DEST_UNREACH: + icmp_statistics.IcmpOutDestUnreachs++; + break; + case ICMP_SOURCE_QUENCH: + icmp_statistics.IcmpOutSrcQuenchs++; + break; + case ICMP_REDIRECT: + icmp_statistics.IcmpOutRedirects++; + break; + case ICMP_ECHO: + icmp_statistics.IcmpOutEchos++; + break; + case ICMP_ECHOREPLY: + icmp_statistics.IcmpOutEchoReps++; + break; + case ICMP_TIME_EXCEEDED: + icmp_statistics.IcmpOutTimeExcds++; + break; + case ICMP_PARAMETERPROB: + icmp_statistics.IcmpOutParmProbs++; + break; + case ICMP_TIMESTAMP: + icmp_statistics.IcmpOutTimestamps++; + break; + case ICMP_TIMESTAMPREPLY: + icmp_statistics.IcmpOutTimestampReps++; + break; + case ICMP_ADDRESS: + icmp_statistics.IcmpOutAddrMasks++; + break; + case ICMP_ADDRESSREPLY: + icmp_statistics.IcmpOutAddrMaskReps++; + break; + } + /* + * Get some memory for the reply. + */ + + len = dev->hard_header_len + sizeof(struct iphdr) + sizeof(struct icmphdr) + + sizeof(struct iphdr) + 32; /* amount of header to return */ + + skb = (struct sk_buff *) alloc_skb(len, GFP_ATOMIC); + if (skb == NULL) + { + icmp_statistics.IcmpOutErrors++; + return; + } + skb->free = 1; + + /* + * Build Layer 2-3 headers for message back to source. + */ + + our_addr = dev->pa_addr; + if (iph->daddr != our_addr && ip_chk_addr(iph->daddr) == IS_MYADDR) + our_addr = iph->daddr; + offset = ip_build_header(skb, our_addr, iph->saddr, + &ndev, IPPROTO_ICMP, NULL, len, + skb_in->ip_hdr->tos,255); + if (offset < 0) + { + icmp_statistics.IcmpOutErrors++; + skb->sk = NULL; + kfree_skb(skb, FREE_READ); + return; + } + + /* + * Re-adjust length according to actual IP header size. + */ + + skb->len = offset + sizeof(struct icmphdr) + sizeof(struct iphdr) + 8; + + /* + * Fill in the frame + */ + + icmph = (struct icmphdr *) (skb->data + offset); + icmph->type = type; + icmph->code = code; + icmph->checksum = 0; + icmph->un.gateway = info; /* This might not be meant for + this form of the union but it will + be right anyway */ + memcpy(icmph + 1, iph, sizeof(struct iphdr) + 8); + + icmph->checksum = ip_compute_csum((unsigned char *)icmph, + sizeof(struct icmphdr) + sizeof(struct iphdr) + 8); + + /* + * Send it and free it once sent. + */ + ip_queue_xmit(NULL, ndev, skb, 1); +} + + +/* + * Handle ICMP_UNREACH and ICMP_QUENCH. + */ + +static void icmp_unreach(struct icmphdr *icmph, struct sk_buff *skb) +{ + struct inet_protocol *ipprot; + struct iphdr *iph; + unsigned char hash; + int err; + + err = (icmph->type << 8) | icmph->code; + iph = (struct iphdr *) (icmph + 1); + + switch(icmph->code & 7) + { + case ICMP_NET_UNREACH: + break; + case ICMP_HOST_UNREACH: + break; + case ICMP_PROT_UNREACH: + printk("ICMP: %s:%d: protocol unreachable.\n", + in_ntoa(iph->daddr), ntohs(iph->protocol)); + break; + case ICMP_PORT_UNREACH: + break; + case ICMP_FRAG_NEEDED: + printk("ICMP: %s: fragmentation needed and DF set.\n", + in_ntoa(iph->daddr)); + break; + case ICMP_SR_FAILED: + printk("ICMP: %s: Source Route Failed.\n", in_ntoa(iph->daddr)); + break; + default: + break; + } + + /* + * Get the protocol(s). + */ + + hash = iph->protocol & (MAX_INET_PROTOS -1); + + /* + * This can't change while we are doing it. + */ + + ipprot = (struct inet_protocol *) inet_protos[hash]; + while(ipprot != NULL) + { + struct inet_protocol *nextip; + + nextip = (struct inet_protocol *) ipprot->next; + + /* + * Pass it off to everyone who wants it. + */ + if (iph->protocol == ipprot->protocol && ipprot->err_handler) + { + ipprot->err_handler(err, (unsigned char *)(icmph + 1), + iph->daddr, iph->saddr, ipprot); + } + + ipprot = nextip; + } + kfree_skb(skb, FREE_READ); +} + + +/* + * Handle ICMP_REDIRECT. + */ + +static void icmp_redirect(struct icmphdr *icmph, struct sk_buff *skb, + struct device *dev, unsigned long source) +{ + struct rtable *rt; + struct iphdr *iph; + unsigned long ip; + + /* + * Get the copied header of the packet that caused the redirect + */ + + iph = (struct iphdr *) (icmph + 1); + ip = iph->daddr; + + switch(icmph->code & 7) + { + case ICMP_REDIR_NET: + /* + * This causes a problem with subnetted networks. What we should do + * is use ICMP_ADDRESS to get the subnet mask of the problem route + * and set both. But we don't.. + */ +#ifdef not_a_good_idea + ip_rt_add((RTF_DYNAMIC | RTF_MODIFIED | RTF_GATEWAY), + ip, 0, icmph->un.gateway, dev,0, 0); + break; +#endif + case ICMP_REDIR_HOST: + /* + * Add better route to host. + * But first check that the redirect + * comes from the old gateway.. + * And make sure it's an ok host address + * (not some confused thing sending our + * address) + */ + rt = ip_rt_route(ip, NULL, NULL); + if (!rt) + break; + if (rt->rt_gateway != source || ip_chk_addr(icmph->un.gateway)) + break; + printk("redirect from %s\n", in_ntoa(source)); + ip_rt_add((RTF_DYNAMIC | RTF_MODIFIED | RTF_HOST | RTF_GATEWAY), + ip, 0, icmph->un.gateway, dev,0, 0); + break; + case ICMP_REDIR_NETTOS: + case ICMP_REDIR_HOSTTOS: + printk("ICMP: cannot handle TOS redirects yet!\n"); + break; + default: + break; + } + + /* + * Discard the original packet + */ + + kfree_skb(skb, FREE_READ); +} + + +/* + * Handle ICMP_ECHO ("ping") requests. + */ + +static void icmp_echo(struct icmphdr *icmph, struct sk_buff *skb, struct device *dev, + unsigned long saddr, unsigned long daddr, int len, + struct options *opt) +{ + struct icmphdr *icmphr; + struct sk_buff *skb2; + struct device *ndev=NULL; + int size, offset; + + icmp_statistics.IcmpOutEchoReps++; + icmp_statistics.IcmpOutMsgs++; + + size = dev->hard_header_len + 64 + len; + skb2 = alloc_skb(size, GFP_ATOMIC); + + if (skb2 == NULL) + { + icmp_statistics.IcmpOutErrors++; + kfree_skb(skb, FREE_READ); + return; + } + skb2->free = 1; + + /* Build Layer 2-3 headers for message back to source */ + offset = ip_build_header(skb2, daddr, saddr, &ndev, + IPPROTO_ICMP, opt, len, skb->ip_hdr->tos,255); + if (offset < 0) + { + icmp_statistics.IcmpOutErrors++; + printk("ICMP: Could not build IP Header for ICMP ECHO Response\n"); + kfree_skb(skb2,FREE_WRITE); + kfree_skb(skb, FREE_READ); + return; + } + + /* + * Re-adjust length according to actual IP header size. + */ + + skb2->len = offset + len; + + /* + * Build ICMP_ECHO Response message. + */ + icmphr = (struct icmphdr *) (skb2->data + offset); + memcpy((char *) icmphr, (char *) icmph, len); + icmphr->type = ICMP_ECHOREPLY; + icmphr->code = 0; + icmphr->checksum = 0; + icmphr->checksum = ip_compute_csum((unsigned char *)icmphr, len); + + /* + * Ship it out - free it when done + */ + ip_queue_xmit((struct sock *)NULL, ndev, skb2, 1); + + /* + * Free the received frame + */ + + kfree_skb(skb, FREE_READ); +} + +/* + * Handle ICMP Timestamp requests. + */ + +static void icmp_timestamp(struct icmphdr *icmph, struct sk_buff *skb, struct device *dev, + unsigned long saddr, unsigned long daddr, int len, + struct options *opt) +{ + struct icmphdr *icmphr; + struct sk_buff *skb2; + int size, offset; + unsigned long *timeptr, midtime; + struct device *ndev=NULL; + + if (len != 20) + { + printk( + "ICMP: Size (%d) of ICMP_TIMESTAMP request should be 20!\n", + len); + icmp_statistics.IcmpInErrors++; +#if 1 + /* correct answers are possible for everything >= 12 */ + if (len < 12) +#endif + return; + } + + size = dev->hard_header_len + 84; + + if (! (skb2 = alloc_skb(size, GFP_ATOMIC))) + { + skb->sk = NULL; + kfree_skb(skb, FREE_READ); + icmp_statistics.IcmpOutErrors++; + return; + } + skb2->free = 1; + +/* + * Build Layer 2-3 headers for message back to source + */ + + offset = ip_build_header(skb2, daddr, saddr, &ndev, IPPROTO_ICMP, opt, len, + skb->ip_hdr->tos, 255); + if (offset < 0) + { + printk("ICMP: Could not build IP Header for ICMP TIMESTAMP Response\n"); + kfree_skb(skb2, FREE_WRITE); + kfree_skb(skb, FREE_READ); + icmp_statistics.IcmpOutErrors++; + return; + } + + /* + * Re-adjust length according to actual IP header size. + */ + skb2->len = offset + 20; + + /* + * Build ICMP_TIMESTAMP Response message. + */ + + icmphr = (struct icmphdr *) ((char *) (skb2 + 1) + offset); + memcpy((char *) icmphr, (char *) icmph, 12); + icmphr->type = ICMP_TIMESTAMPREPLY; + icmphr->code = icmphr->checksum = 0; + + /* fill in the current time as ms since midnight UT: */ + midtime = (xtime.tv_sec % 86400) * 1000 + xtime.tv_usec / 1000; + timeptr = (unsigned long *) (icmphr + 1); + /* + * the originate timestamp (timeptr [0]) is still in the copy: + */ + timeptr [1] = timeptr [2] = htonl(midtime); + + icmphr->checksum = ip_compute_csum((unsigned char *) icmphr, 20); + + /* + * Ship it out - free it when done + */ + + ip_queue_xmit((struct sock *) NULL, ndev, skb2, 1); + icmp_statistics.IcmpOutTimestampReps++; + kfree_skb(skb, FREE_READ); +} + + + + +/* + * Handle the ICMP INFORMATION REQUEST. + */ + +static void icmp_info(struct icmphdr *icmph, struct sk_buff *skb, struct device *dev, + unsigned long saddr, unsigned long daddr, int len, + struct options *opt) +{ + /* Obsolete */ + kfree_skb(skb, FREE_READ); +} + + +/* + * Handle ICMP_ADDRESS_MASK requests. + */ + +static void icmp_address(struct icmphdr *icmph, struct sk_buff *skb, struct device *dev, + unsigned long saddr, unsigned long daddr, int len, + struct options *opt) +{ + struct icmphdr *icmphr; + struct sk_buff *skb2; + int size, offset; + struct device *ndev=NULL; + + icmp_statistics.IcmpOutMsgs++; + icmp_statistics.IcmpOutAddrMaskReps++; + + size = dev->hard_header_len + 64 + len; + skb2 = alloc_skb(size, GFP_ATOMIC); + if (skb2 == NULL) + { + icmp_statistics.IcmpOutErrors++; + kfree_skb(skb, FREE_READ); + return; + } + skb2->free = 1; + + /* + * Build Layer 2-3 headers for message back to source + */ + + offset = ip_build_header(skb2, daddr, saddr, &ndev, + IPPROTO_ICMP, opt, len, skb->ip_hdr->tos,255); + if (offset < 0) + { + icmp_statistics.IcmpOutErrors++; + printk("ICMP: Could not build IP Header for ICMP ADDRESS Response\n"); + kfree_skb(skb2,FREE_WRITE); + kfree_skb(skb, FREE_READ); + return; + } + + /* + * Re-adjust length according to actual IP header size. + */ + + skb2->len = offset + len; + + /* + * Build ICMP ADDRESS MASK Response message. + */ + + icmphr = (struct icmphdr *) (skb2->data + offset); + icmphr->type = ICMP_ADDRESSREPLY; + icmphr->code = 0; + icmphr->checksum = 0; + icmphr->un.echo.id = icmph->un.echo.id; + icmphr->un.echo.sequence = icmph->un.echo.sequence; + memcpy((char *) (icmphr + 1), (char *) &dev->pa_mask, sizeof(dev->pa_mask)); + + icmphr->checksum = ip_compute_csum((unsigned char *)icmphr, len); + + /* Ship it out - free it when done */ + ip_queue_xmit((struct sock *)NULL, ndev, skb2, 1); + + skb->sk = NULL; + kfree_skb(skb, FREE_READ); +} + + +/* + * Deal with incoming ICMP packets. + */ + +int icmp_rcv(struct sk_buff *skb1, struct device *dev, struct options *opt, + unsigned long daddr, unsigned short len, + unsigned long saddr, int redo, struct inet_protocol *protocol) +{ + struct icmphdr *icmph; + unsigned char *buff; + + /* + * Drop broadcast packets. IP has done a broadcast check and ought one day + * to pass on that information. + */ + + icmp_statistics.IcmpInMsgs++; + + + /* + * Grab the packet as an icmp object + */ + + buff = skb1->h.raw; + icmph = (struct icmphdr *) buff; + + /* + * Validate the packet first + */ + + if (ip_compute_csum((unsigned char *) icmph, len)) + { + /* Failed checksum! */ + icmp_statistics.IcmpInErrors++; + printk("ICMP: failed checksum from %s!\n", in_ntoa(saddr)); + kfree_skb(skb1, FREE_READ); + return(0); + } + + /* + * Parse the ICMP message + */ + + if (ip_chk_addr(daddr) != IS_MYADDR) + { + if (icmph->type != ICMP_ECHO) + { + icmp_statistics.IcmpInErrors++; + kfree_skb(skb1, FREE_READ); + return(0); + } + daddr=dev->pa_addr; + } + + switch(icmph->type) + { + case ICMP_TIME_EXCEEDED: + icmp_statistics.IcmpInTimeExcds++; + icmp_unreach(icmph, skb1); + return 0; + case ICMP_DEST_UNREACH: + icmp_statistics.IcmpInDestUnreachs++; + icmp_unreach(icmph, skb1); + return 0; + case ICMP_SOURCE_QUENCH: + icmp_statistics.IcmpInSrcQuenchs++; + icmp_unreach(icmph, skb1); + return(0); + case ICMP_REDIRECT: + icmp_statistics.IcmpInRedirects++; + icmp_redirect(icmph, skb1, dev, saddr); + return(0); + case ICMP_ECHO: + icmp_statistics.IcmpInEchos++; + icmp_echo(icmph, skb1, dev, saddr, daddr, len, opt); + return 0; + case ICMP_ECHOREPLY: + icmp_statistics.IcmpInEchoReps++; + kfree_skb(skb1, FREE_READ); + return(0); + case ICMP_TIMESTAMP: + icmp_statistics.IcmpInTimestamps++; + icmp_timestamp(icmph, skb1, dev, saddr, daddr, len, opt); + return 0; + case ICMP_TIMESTAMPREPLY: + icmp_statistics.IcmpInTimestampReps++; + kfree_skb(skb1,FREE_READ); + return 0; + /* INFO is obsolete and doesn't even feature in the SNMP stats */ + case ICMP_INFO_REQUEST: + icmp_info(icmph, skb1, dev, saddr, daddr, len, opt); + return 0; + case ICMP_INFO_REPLY: + skb1->sk = NULL; + kfree_skb(skb1, FREE_READ); + return(0); + case ICMP_ADDRESS: + icmp_statistics.IcmpInAddrMasks++; + icmp_address(icmph, skb1, dev, saddr, daddr, len, opt); + return 0; + case ICMP_ADDRESSREPLY: + /* + * We ought to set our netmask on receiving this, but + * experience shows it's a waste of effort. + */ + icmp_statistics.IcmpInAddrMaskReps++; + kfree_skb(skb1, FREE_READ); + return(0); + default: + icmp_statistics.IcmpInErrors++; + kfree_skb(skb1, FREE_READ); + return(0); + } + /*NOTREACHED*/ + kfree_skb(skb1, FREE_READ); + return(-1); +} + + +/* + * Perform any ICMP-related I/O control requests. + * [to vanish soon] + */ + +int icmp_ioctl(struct sock *sk, int cmd, unsigned long arg) +{ + switch(cmd) + { + default: + return(-EINVAL); + } + return(0); +} diff --git a/pfinet/linux-inet/icmp.h b/pfinet/linux-inet/icmp.h new file mode 100644 index 00000000..8f1c3498 --- /dev/null +++ b/pfinet/linux-inet/icmp.h @@ -0,0 +1,38 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Definitions for the ICMP module. + * + * Version: @(#)icmp.h 1.0.4 05/13/93 + * + * Authors: Ross Biro, <bir7@leland.Stanford.Edu> + * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _ICMP_H +#define _ICMP_H + +#include <linux/icmp.h> + + +extern struct icmp_err icmp_err_convert[]; +extern struct icmp_mib icmp_statistics; + + +extern void icmp_send(struct sk_buff *skb_in, int type, int code, + unsigned long info, struct device *dev); +extern int icmp_rcv(struct sk_buff *skb1, struct device *dev, + struct options *opt, unsigned long daddr, + unsigned short len, unsigned long saddr, + int redo, struct inet_protocol *protocol); + +extern int icmp_ioctl(struct sock *sk, int cmd, + unsigned long arg); + +#endif /* _ICMP_H */ diff --git a/pfinet/linux-inet/igmp.c b/pfinet/linux-inet/igmp.c new file mode 100644 index 00000000..32e42213 --- /dev/null +++ b/pfinet/linux-inet/igmp.c @@ -0,0 +1,390 @@ +/* + * Linux NET3: Internet Gateway Management Protocol [IGMP] + * + * Authors: + * Alan Cox <Alan.Cox@linux.org> + * + * WARNING: + * This is a 'preliminary' implementation... on your own head + * be it. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + + +#include <asm/segment.h> +#include <asm/system.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/string.h> +#include <linux/config.h> +#include <linux/socket.h> +#include <linux/sockios.h> +#include <linux/in.h> +#include <linux/inet.h> +#include <linux/netdevice.h> +#include "ip.h" +#include "protocol.h" +#include "route.h" +#include <linux/skbuff.h> +#include "sock.h" +#include <linux/igmp.h> + +#ifdef CONFIG_IP_MULTICAST + + +/* + * Timer management + */ + + +static void igmp_stop_timer(struct ip_mc_list *im) +{ + del_timer(&im->timer); + im->tm_running=0; +} + +static int igmp_random(void) +{ + static unsigned long seed=152L; + seed=seed*69069L+1; + return seed^jiffies; +} + + +static void igmp_start_timer(struct ip_mc_list *im) +{ + int tv; + if(im->tm_running) + return; + tv=igmp_random()%(10*HZ); /* Pick a number any number 8) */ + im->timer.expires=tv; + im->tm_running=1; + add_timer(&im->timer); +} + +/* + * Send an IGMP report. + */ + +#define MAX_IGMP_SIZE (sizeof(struct igmphdr)+sizeof(struct iphdr)+64) + +static void igmp_send_report(struct device *dev, unsigned long address, int type) +{ + struct sk_buff *skb=alloc_skb(MAX_IGMP_SIZE, GFP_ATOMIC); + int tmp; + struct igmphdr *igh; + + if(skb==NULL) + return; + tmp=ip_build_header(skb, INADDR_ANY, address, &dev, IPPROTO_IGMP, NULL, + skb->mem_len, 0, 1); + if(tmp<0) + { + kfree_skb(skb, FREE_WRITE); + return; + } + igh=(struct igmphdr *)(skb->data+tmp); + skb->len=tmp+sizeof(*igh); + igh->csum=0; + igh->unused=0; + igh->type=type; + igh->group=address; + igh->csum=ip_compute_csum((void *)igh,sizeof(*igh)); + ip_queue_xmit(NULL,dev,skb,1); +} + + +static void igmp_timer_expire(unsigned long data) +{ + struct ip_mc_list *im=(struct ip_mc_list *)data; + igmp_stop_timer(im); + igmp_send_report(im->interface, im->multiaddr, IGMP_HOST_MEMBERSHIP_REPORT); +} + +static void igmp_init_timer(struct ip_mc_list *im) +{ + im->tm_running=0; + init_timer(&im->timer); + im->timer.data=(unsigned long)im; + im->timer.function=&igmp_timer_expire; +} + + +static void igmp_heard_report(struct device *dev, unsigned long address) +{ + struct ip_mc_list *im; + for(im=dev->ip_mc_list;im!=NULL;im=im->next) + if(im->multiaddr==address) + igmp_stop_timer(im); +} + +static void igmp_heard_query(struct device *dev) +{ + struct ip_mc_list *im; + for(im=dev->ip_mc_list;im!=NULL;im=im->next) + if(!im->tm_running && im->multiaddr!=IGMP_ALL_HOSTS) + igmp_start_timer(im); +} + +/* + * Map a multicast IP onto multicast MAC for type ethernet. + */ + +static void ip_mc_map(unsigned long addr, char *buf) +{ + addr=ntohl(addr); + buf[0]=0x01; + buf[1]=0x00; + buf[2]=0x5e; + buf[5]=addr&0xFF; + addr>>=8; + buf[4]=addr&0xFF; + addr>>=8; + buf[3]=addr&0x7F; +} + +/* + * Add a filter to a device + */ + +void ip_mc_filter_add(struct device *dev, unsigned long addr) +{ + char buf[6]; + if(dev->type!=ARPHRD_ETHER) + return; /* Only do ethernet now */ + ip_mc_map(addr,buf); + dev_mc_add(dev,buf,ETH_ALEN,0); +} + +/* + * Remove a filter from a device + */ + +void ip_mc_filter_del(struct device *dev, unsigned long addr) +{ + char buf[6]; + if(dev->type!=ARPHRD_ETHER) + return; /* Only do ethernet now */ + ip_mc_map(addr,buf); + dev_mc_delete(dev,buf,ETH_ALEN,0); +} + +static void igmp_group_dropped(struct ip_mc_list *im) +{ + del_timer(&im->timer); + igmp_send_report(im->interface, im->multiaddr, IGMP_HOST_LEAVE_MESSAGE); + ip_mc_filter_del(im->interface, im->multiaddr); +/* printk("Left group %lX\n",im->multiaddr);*/ +} + +static void igmp_group_added(struct ip_mc_list *im) +{ + igmp_init_timer(im); + igmp_send_report(im->interface, im->multiaddr, IGMP_HOST_MEMBERSHIP_REPORT); + ip_mc_filter_add(im->interface, im->multiaddr); +/* printk("Joined group %lX\n",im->multiaddr);*/ +} + +int igmp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt, + unsigned long daddr, unsigned short len, unsigned long saddr, int redo, + struct inet_protocol *protocol) +{ + /* This basically follows the spec line by line -- see RFC1112 */ + struct igmphdr *igh=(struct igmphdr *)skb->h.raw; + + if(skb->ip_hdr->ttl!=1 || ip_compute_csum((void *)igh,sizeof(*igh))) + { + kfree_skb(skb, FREE_READ); + return 0; + } + + if(igh->type==IGMP_HOST_MEMBERSHIP_QUERY && daddr==IGMP_ALL_HOSTS) + igmp_heard_query(dev); + if(igh->type==IGMP_HOST_MEMBERSHIP_REPORT && daddr==igh->group) + igmp_heard_report(dev,igh->group); + kfree_skb(skb, FREE_READ); + return 0; +} + +/* + * Multicast list managers + */ + + +/* + * A socket has joined a multicast group on device dev. + */ + +static void ip_mc_inc_group(struct device *dev, unsigned long addr) +{ + struct ip_mc_list *i; + for(i=dev->ip_mc_list;i!=NULL;i=i->next) + { + if(i->multiaddr==addr) + { + i->users++; + return; + } + } + i=(struct ip_mc_list *)kmalloc(sizeof(*i), GFP_KERNEL); + if(!i) + return; + i->users=1; + i->interface=dev; + i->multiaddr=addr; + i->next=dev->ip_mc_list; + igmp_group_added(i); + dev->ip_mc_list=i; +} + +/* + * A socket has left a multicast group on device dev + */ + +static void ip_mc_dec_group(struct device *dev, unsigned long addr) +{ + struct ip_mc_list **i; + for(i=&(dev->ip_mc_list);(*i)!=NULL;i=&(*i)->next) + { + if((*i)->multiaddr==addr) + { + if(--((*i)->users)) + return; + else + { + struct ip_mc_list *tmp= *i; + igmp_group_dropped(tmp); + *i=(*i)->next; + kfree_s(tmp,sizeof(*tmp)); + } + } + } +} + +/* + * Device going down: Clean up. + */ + +void ip_mc_drop_device(struct device *dev) +{ + struct ip_mc_list *i; + struct ip_mc_list *j; + for(i=dev->ip_mc_list;i!=NULL;i=j) + { + j=i->next; + kfree_s(i,sizeof(*i)); + } + dev->ip_mc_list=NULL; +} + +/* + * Device going up. Make sure it is in all hosts + */ + +void ip_mc_allhost(struct device *dev) +{ + struct ip_mc_list *i; + for(i=dev->ip_mc_list;i!=NULL;i=i->next) + if(i->multiaddr==IGMP_ALL_HOSTS) + return; + i=(struct ip_mc_list *)kmalloc(sizeof(*i), GFP_KERNEL); + if(!i) + return; + i->users=1; + i->interface=dev; + i->multiaddr=IGMP_ALL_HOSTS; + i->next=dev->ip_mc_list; + dev->ip_mc_list=i; + ip_mc_filter_add(i->interface, i->multiaddr); + +} + +/* + * Join a socket to a group + */ + +int ip_mc_join_group(struct sock *sk , struct device *dev, unsigned long addr) +{ + int unused= -1; + int i; + if(!MULTICAST(addr)) + return -EINVAL; + if(!(dev->flags&IFF_MULTICAST)) + return -EADDRNOTAVAIL; + if(sk->ip_mc_list==NULL) + { + if((sk->ip_mc_list=(struct ip_mc_socklist *)kmalloc(sizeof(*sk->ip_mc_list), GFP_KERNEL))==NULL) + return -ENOMEM; + memset(sk->ip_mc_list,'\0',sizeof(*sk->ip_mc_list)); + } + for(i=0;i<IP_MAX_MEMBERSHIPS;i++) + { + if(sk->ip_mc_list->multiaddr[i]==addr && sk->ip_mc_list->multidev[i]==dev) + return -EADDRINUSE; + if(sk->ip_mc_list->multidev[i]==NULL) + unused=i; + } + + if(unused==-1) + return -ENOBUFS; + sk->ip_mc_list->multiaddr[unused]=addr; + sk->ip_mc_list->multidev[unused]=dev; + ip_mc_inc_group(dev,addr); + return 0; +} + +/* + * Ask a socket to leave a group. + */ + +int ip_mc_leave_group(struct sock *sk, struct device *dev, unsigned long addr) +{ + int i; + if(!MULTICAST(addr)) + return -EINVAL; + if(!(dev->flags&IFF_MULTICAST)) + return -EADDRNOTAVAIL; + if(sk->ip_mc_list==NULL) + return -EADDRNOTAVAIL; + + for(i=0;i<IP_MAX_MEMBERSHIPS;i++) + { + if(sk->ip_mc_list->multiaddr[i]==addr && sk->ip_mc_list->multidev[i]==dev) + { + sk->ip_mc_list->multidev[i]=NULL; + ip_mc_dec_group(dev,addr); + return 0; + } + } + return -EADDRNOTAVAIL; +} + +/* + * A socket is closing. + */ + +void ip_mc_drop_socket(struct sock *sk) +{ + int i; + + if(sk->ip_mc_list==NULL) + return; + + for(i=0;i<IP_MAX_MEMBERSHIPS;i++) + { + if(sk->ip_mc_list->multidev[i]) + { + ip_mc_dec_group(sk->ip_mc_list->multidev[i], sk->ip_mc_list->multiaddr[i]); + sk->ip_mc_list->multidev[i]=NULL; + } + } + kfree_s(sk->ip_mc_list,sizeof(*sk->ip_mc_list)); + sk->ip_mc_list=NULL; +} + +#endif diff --git a/pfinet/linux-inet/ip.c b/pfinet/linux-inet/ip.c new file mode 100644 index 00000000..dd188f54 --- /dev/null +++ b/pfinet/linux-inet/ip.c @@ -0,0 +1,2427 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * The Internet Protocol (IP) module. + * + * Version: @(#)ip.c 1.0.16b 9/1/93 + * + * Authors: Ross Biro, <bir7@leland.Stanford.Edu> + * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> + * Donald Becker, <becker@super.org> + * Alan Cox, <gw4pts@gw4pts.ampr.org> + * Richard Underwood + * Stefan Becker, <stefanb@yello.ping.de> + * + * + * Fixes: + * Alan Cox : Commented a couple of minor bits of surplus code + * Alan Cox : Undefining IP_FORWARD doesn't include the code + * (just stops a compiler warning). + * Alan Cox : Frames with >=MAX_ROUTE record routes, strict routes or loose routes + * are junked rather than corrupting things. + * Alan Cox : Frames to bad broadcast subnets are dumped + * We used to process them non broadcast and + * boy could that cause havoc. + * Alan Cox : ip_forward sets the free flag on the + * new frame it queues. Still crap because + * it copies the frame but at least it + * doesn't eat memory too. + * Alan Cox : Generic queue code and memory fixes. + * Fred Van Kempen : IP fragment support (borrowed from NET2E) + * Gerhard Koerting: Forward fragmented frames correctly. + * Gerhard Koerting: Fixes to my fix of the above 8-). + * Gerhard Koerting: IP interface addressing fix. + * Linus Torvalds : More robustness checks + * Alan Cox : Even more checks: Still not as robust as it ought to be + * Alan Cox : Save IP header pointer for later + * Alan Cox : ip option setting + * Alan Cox : Use ip_tos/ip_ttl settings + * Alan Cox : Fragmentation bogosity removed + * (Thanks to Mark.Bush@prg.ox.ac.uk) + * Dmitry Gorodchanin : Send of a raw packet crash fix. + * Alan Cox : Silly ip bug when an overlength + * fragment turns up. Now frees the + * queue. + * Linus Torvalds/ : Memory leakage on fragmentation + * Alan Cox : handling. + * Gerhard Koerting: Forwarding uses IP priority hints + * Teemu Rantanen : Fragment problems. + * Alan Cox : General cleanup, comments and reformat + * Alan Cox : SNMP statistics + * Alan Cox : BSD address rule semantics. Also see + * UDP as there is a nasty checksum issue + * if you do things the wrong way. + * Alan Cox : Always defrag, moved IP_FORWARD to the config.in file + * Alan Cox : IP options adjust sk->priority. + * Pedro Roque : Fix mtu/length error in ip_forward. + * Alan Cox : Avoid ip_chk_addr when possible. + * Richard Underwood : IP multicasting. + * Alan Cox : Cleaned up multicast handlers. + * Alan Cox : RAW sockets demultiplex in the BSD style. + * Gunther Mayer : Fix the SNMP reporting typo + * Alan Cox : Always in group 224.0.0.1 + * Alan Cox : Multicast loopback error for 224.0.0.1 + * Alan Cox : IP_MULTICAST_LOOP option. + * Alan Cox : Use notifiers. + * Bjorn Ekwall : Removed ip_csum (from slhc.c too) + * Bjorn Ekwall : Moved ip_fast_csum to ip.h (inline!) + * Stefan Becker : Send out ICMP HOST REDIRECT + * Alan Cox : Only send ICMP_REDIRECT if src/dest are the same net. + * + * + * To Fix: + * IP option processing is mostly not needed. ip_forward needs to know about routing rules + * and time stamp but that's about all. Use the route mtu field here too + * IP fragmentation wants rewriting cleanly. The RFC815 algorithm is much more efficient + * and could be made very efficient with the addition of some virtual memory hacks to permit + * the allocation of a buffer that can then be 'grown' by twiddling page tables. + * Output fragmentation wants updating along with the buffer management to use a single + * interleaved copy algorithm so that fragmenting has a one copy overhead. Actual packet + * output should probably do its own fragmentation at the UDP/RAW layer. TCP shouldn't cause + * fragmentation anyway. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <asm/segment.h> +#include <asm/system.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/string.h> +#include <linux/errno.h> +#include <linux/config.h> + +#include <linux/socket.h> +#include <linux/sockios.h> +#include <linux/in.h> +#include <linux/inet.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> + +#include "snmp.h" +#include "ip.h" +#include "protocol.h" +#include "route.h" +#include "tcp.h" +#include "udp.h" +#include <linux/skbuff.h> +#include "sock.h" +#include "arp.h" +#include "icmp.h" +#include "raw.h" +#include <linux/igmp.h> +#include <linux/ip_fw.h> + +#define CONFIG_IP_DEFRAG + +extern int last_retran; +extern void sort_send(struct sock *sk); + +#define min(a,b) ((a)<(b)?(a):(b)) +#define LOOPBACK(x) (((x) & htonl(0xff000000)) == htonl(0x7f000000)) + +/* + * SNMP management statistics + */ + +#ifdef CONFIG_IP_FORWARD +struct ip_mib ip_statistics={1,64,}; /* Forwarding=Yes, Default TTL=64 */ +#else +struct ip_mib ip_statistics={0,64,}; /* Forwarding=No, Default TTL=64 */ +#endif + +/* + * Handle the issuing of an ioctl() request + * for the ip device. This is scheduled to + * disappear + */ + +int ip_ioctl(struct sock *sk, int cmd, unsigned long arg) +{ + switch(cmd) + { + default: + return(-EINVAL); + } +} + + +/* these two routines will do routing. */ + +static void +strict_route(struct iphdr *iph, struct options *opt) +{ +} + + +static void +loose_route(struct iphdr *iph, struct options *opt) +{ +} + + + + +/* This routine will check to see if we have lost a gateway. */ +void +ip_route_check(unsigned long daddr) +{ +} + + +#if 0 +/* this routine puts the options at the end of an ip header. */ +static int +build_options(struct iphdr *iph, struct options *opt) +{ + unsigned char *ptr; + /* currently we don't support any options. */ + ptr = (unsigned char *)(iph+1); + *ptr = 0; + return (4); +} +#endif + + +/* + * Take an skb, and fill in the MAC header. + */ + +static int ip_send(struct sk_buff *skb, unsigned long daddr, int len, struct device *dev, unsigned long saddr) +{ + int mac = 0; + + skb->dev = dev; + skb->arp = 1; + if (dev->hard_header) + { + /* + * Build a hardware header. Source address is our mac, destination unknown + * (rebuild header will sort this out) + */ + mac = dev->hard_header(skb->data, dev, ETH_P_IP, NULL, NULL, len, skb); + if (mac < 0) + { + mac = -mac; + skb->arp = 0; + skb->raddr = daddr; /* next routing address */ + } + } + return mac; +} + +int ip_id_count = 0; + +/* + * This routine builds the appropriate hardware/IP headers for + * the routine. It assumes that if *dev != NULL then the + * protocol knows what it's doing, otherwise it uses the + * routing/ARP tables to select a device struct. + */ +int ip_build_header(struct sk_buff *skb, unsigned long saddr, unsigned long daddr, + struct device **dev, int type, struct options *opt, int len, int tos, int ttl) +{ + static struct options optmem; + struct iphdr *iph; + struct rtable *rt; + unsigned char *buff; + unsigned long raddr; + int tmp; + unsigned long src; + + buff = skb->data; + + /* + * See if we need to look up the device. + */ + +#ifdef CONFIG_INET_MULTICAST + if(MULTICAST(daddr) && *dev==NULL && skb->sk && *skb->sk->ip_mc_name) + *dev=dev_get(skb->sk->ip_mc_name); +#endif + if (*dev == NULL) + { + if(skb->localroute) + rt = ip_rt_local(daddr, &optmem, &src); + else + rt = ip_rt_route(daddr, &optmem, &src); + if (rt == NULL) + { + ip_statistics.IpOutNoRoutes++; + return(-ENETUNREACH); + } + + *dev = rt->rt_dev; + /* + * If the frame is from us and going off machine it MUST MUST MUST + * have the output device ip address and never the loopback + */ + if (LOOPBACK(saddr) && !LOOPBACK(daddr)) + saddr = src;/*rt->rt_dev->pa_addr;*/ + raddr = rt->rt_gateway; + + opt = &optmem; + } + else + { + /* + * We still need the address of the first hop. + */ + if(skb->localroute) + rt = ip_rt_local(daddr, &optmem, &src); + else + rt = ip_rt_route(daddr, &optmem, &src); + /* + * If the frame is from us and going off machine it MUST MUST MUST + * have the output device ip address and never the loopback + */ + if (LOOPBACK(saddr) && !LOOPBACK(daddr)) + saddr = src;/*rt->rt_dev->pa_addr;*/ + + raddr = (rt == NULL) ? 0 : rt->rt_gateway; + } + + /* + * No source addr so make it our addr + */ + if (saddr == 0) + saddr = src; + + /* + * No gateway so aim at the real destination + */ + if (raddr == 0) + raddr = daddr; + + /* + * Now build the MAC header. + */ + + tmp = ip_send(skb, raddr, len, *dev, saddr); + buff += tmp; + len -= tmp; + + /* + * Book keeping + */ + + skb->dev = *dev; + skb->saddr = saddr; + if (skb->sk) + skb->sk->saddr = saddr; + + /* + * Now build the IP header. + */ + + /* + * If we are using IPPROTO_RAW, then we don't need an IP header, since + * one is being supplied to us by the user + */ + + if(type == IPPROTO_RAW) + return (tmp); + + iph = (struct iphdr *)buff; + iph->version = 4; + iph->tos = tos; + iph->frag_off = 0; + iph->ttl = ttl; + iph->daddr = daddr; + iph->saddr = saddr; + iph->protocol = type; + iph->ihl = 5; + skb->ip_hdr = iph; + + /* Setup the IP options. */ +#ifdef Not_Yet_Avail + build_options(iph, opt); +#endif + + return(20 + tmp); /* IP header plus MAC header size */ +} + + +static int +do_options(struct iphdr *iph, struct options *opt) +{ + unsigned char *buff; + int done = 0; + int i, len = sizeof(struct iphdr); + + /* Zero out the options. */ + opt->record_route.route_size = 0; + opt->loose_route.route_size = 0; + opt->strict_route.route_size = 0; + opt->tstamp.ptr = 0; + opt->security = 0; + opt->compartment = 0; + opt->handling = 0; + opt->stream = 0; + opt->tcc = 0; + return(0); + + /* Advance the pointer to start at the options. */ + buff = (unsigned char *)(iph + 1); + + /* Now start the processing. */ + while (!done && len < iph->ihl*4) switch(*buff) { + case IPOPT_END: + done = 1; + break; + case IPOPT_NOOP: + buff++; + len++; + break; + case IPOPT_SEC: + buff++; + if (*buff != 11) return(1); + buff++; + opt->security = ntohs(*(unsigned short *)buff); + buff += 2; + opt->compartment = ntohs(*(unsigned short *)buff); + buff += 2; + opt->handling = ntohs(*(unsigned short *)buff); + buff += 2; + opt->tcc = ((*buff) << 16) + ntohs(*(unsigned short *)(buff+1)); + buff += 3; + len += 11; + break; + case IPOPT_LSRR: + buff++; + if ((*buff - 3)% 4 != 0) return(1); + len += *buff; + opt->loose_route.route_size = (*buff -3)/4; + buff++; + if (*buff % 4 != 0) return(1); + opt->loose_route.pointer = *buff/4 - 1; + buff++; + buff++; + for (i = 0; i < opt->loose_route.route_size; i++) { + if(i>=MAX_ROUTE) + return(1); + opt->loose_route.route[i] = *(unsigned long *)buff; + buff += 4; + } + break; + case IPOPT_SSRR: + buff++; + if ((*buff - 3)% 4 != 0) return(1); + len += *buff; + opt->strict_route.route_size = (*buff -3)/4; + buff++; + if (*buff % 4 != 0) return(1); + opt->strict_route.pointer = *buff/4 - 1; + buff++; + buff++; + for (i = 0; i < opt->strict_route.route_size; i++) { + if(i>=MAX_ROUTE) + return(1); + opt->strict_route.route[i] = *(unsigned long *)buff; + buff += 4; + } + break; + case IPOPT_RR: + buff++; + if ((*buff - 3)% 4 != 0) return(1); + len += *buff; + opt->record_route.route_size = (*buff -3)/4; + buff++; + if (*buff % 4 != 0) return(1); + opt->record_route.pointer = *buff/4 - 1; + buff++; + buff++; + for (i = 0; i < opt->record_route.route_size; i++) { + if(i>=MAX_ROUTE) + return 1; + opt->record_route.route[i] = *(unsigned long *)buff; + buff += 4; + } + break; + case IPOPT_SID: + len += 4; + buff +=2; + opt->stream = *(unsigned short *)buff; + buff += 2; + break; + case IPOPT_TIMESTAMP: + buff++; + len += *buff; + if (*buff % 4 != 0) return(1); + opt->tstamp.len = *buff / 4 - 1; + buff++; + if ((*buff - 1) % 4 != 0) return(1); + opt->tstamp.ptr = (*buff-1)/4; + buff++; + opt->tstamp.x.full_char = *buff; + buff++; + for (i = 0; i < opt->tstamp.len; i++) { + opt->tstamp.data[i] = *(unsigned long *)buff; + buff += 4; + } + break; + default: + return(1); + } + + if (opt->record_route.route_size == 0) { + if (opt->strict_route.route_size != 0) { + memcpy(&(opt->record_route), &(opt->strict_route), + sizeof(opt->record_route)); + } else if (opt->loose_route.route_size != 0) { + memcpy(&(opt->record_route), &(opt->loose_route), + sizeof(opt->record_route)); + } + } + + if (opt->strict_route.route_size != 0 && + opt->strict_route.route_size != opt->strict_route.pointer) { + strict_route(iph, opt); + return(0); + } + + if (opt->loose_route.route_size != 0 && + opt->loose_route.route_size != opt->loose_route.pointer) { + loose_route(iph, opt); + return(0); + } + + return(0); +} + +/* + * This routine does all the checksum computations that don't + * require anything special (like copying or special headers). + */ + +unsigned short ip_compute_csum(unsigned char * buff, int len) +{ + unsigned long sum = 0; + + /* Do the first multiple of 4 bytes and convert to 16 bits. */ + if (len > 3) + { + __asm__("clc\n" + "1:\t" + "lodsl\n\t" + "adcl %%eax, %%ebx\n\t" + "loop 1b\n\t" + "adcl $0, %%ebx\n\t" + "movl %%ebx, %%eax\n\t" + "shrl $16, %%eax\n\t" + "addw %%ax, %%bx\n\t" + "adcw $0, %%bx" + : "=b" (sum) , "=S" (buff) + : "0" (sum), "c" (len >> 2) ,"1" (buff) + : "ax", "cx", "si", "bx" ); + } + if (len & 2) + { + __asm__("lodsw\n\t" + "addw %%ax, %%bx\n\t" + "adcw $0, %%bx" + : "=b" (sum), "=S" (buff) + : "0" (sum), "1" (buff) + : "bx", "ax", "si"); + } + if (len & 1) + { + __asm__("lodsb\n\t" + "movb $0, %%ah\n\t" + "addw %%ax, %%bx\n\t" + "adcw $0, %%bx" + : "=b" (sum), "=S" (buff) + : "0" (sum), "1" (buff) + : "bx", "ax", "si"); + } + sum =~sum; + return(sum & 0xffff); +} + +/* + * Generate a checksum for an outgoing IP datagram. + */ + +void ip_send_check(struct iphdr *iph) +{ + iph->check = 0; + iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); +} + +/************************ Fragment Handlers From NET2E **********************************/ + + +/* + * This fragment handler is a bit of a heap. On the other hand it works quite + * happily and handles things quite well. + */ + +static struct ipq *ipqueue = NULL; /* IP fragment queue */ + +/* + * Create a new fragment entry. + */ + +static struct ipfrag *ip_frag_create(int offset, int end, struct sk_buff *skb, unsigned char *ptr) +{ + struct ipfrag *fp; + + fp = (struct ipfrag *) kmalloc(sizeof(struct ipfrag), GFP_ATOMIC); + if (fp == NULL) + { + printk("IP: frag_create: no memory left !\n"); + return(NULL); + } + memset(fp, 0, sizeof(struct ipfrag)); + + /* Fill in the structure. */ + fp->offset = offset; + fp->end = end; + fp->len = end - offset; + fp->skb = skb; + fp->ptr = ptr; + + return(fp); +} + + +/* + * Find the correct entry in the "incomplete datagrams" queue for + * this IP datagram, and return the queue entry address if found. + */ + +static struct ipq *ip_find(struct iphdr *iph) +{ + struct ipq *qp; + struct ipq *qplast; + + cli(); + qplast = NULL; + for(qp = ipqueue; qp != NULL; qplast = qp, qp = qp->next) + { + if (iph->id== qp->iph->id && iph->saddr == qp->iph->saddr && + iph->daddr == qp->iph->daddr && iph->protocol == qp->iph->protocol) + { + del_timer(&qp->timer); /* So it doesn't vanish on us. The timer will be reset anyway */ + sti(); + return(qp); + } + } + sti(); + return(NULL); +} + + +/* + * Remove an entry from the "incomplete datagrams" queue, either + * because we completed, reassembled and processed it, or because + * it timed out. + */ + +static void ip_free(struct ipq *qp) +{ + struct ipfrag *fp; + struct ipfrag *xp; + + /* + * Stop the timer for this entry. + */ + + del_timer(&qp->timer); + + /* Remove this entry from the "incomplete datagrams" queue. */ + cli(); + if (qp->prev == NULL) + { + ipqueue = qp->next; + if (ipqueue != NULL) + ipqueue->prev = NULL; + } + else + { + qp->prev->next = qp->next; + if (qp->next != NULL) + qp->next->prev = qp->prev; + } + + /* Release all fragment data. */ + + fp = qp->fragments; + while (fp != NULL) + { + xp = fp->next; + IS_SKB(fp->skb); + kfree_skb(fp->skb,FREE_READ); + kfree_s(fp, sizeof(struct ipfrag)); + fp = xp; + } + + /* Release the MAC header. */ + kfree_s(qp->mac, qp->maclen); + + /* Release the IP header. */ + kfree_s(qp->iph, qp->ihlen + 8); + + /* Finally, release the queue descriptor itself. */ + kfree_s(qp, sizeof(struct ipq)); + sti(); +} + + +/* + * Oops- a fragment queue timed out. Kill it and send an ICMP reply. + */ + +static void ip_expire(unsigned long arg) +{ + struct ipq *qp; + + qp = (struct ipq *)arg; + + /* + * Send an ICMP "Fragment Reassembly Timeout" message. + */ + + ip_statistics.IpReasmTimeout++; + ip_statistics.IpReasmFails++; + /* This if is always true... shrug */ + if(qp->fragments!=NULL) + icmp_send(qp->fragments->skb,ICMP_TIME_EXCEEDED, + ICMP_EXC_FRAGTIME, 0, qp->dev); + + /* + * Nuke the fragment queue. + */ + ip_free(qp); +} + + +/* + * Add an entry to the 'ipq' queue for a newly received IP datagram. + * We will (hopefully :-) receive all other fragments of this datagram + * in time, so we just create a queue for this datagram, in which we + * will insert the received fragments at their respective positions. + */ + +static struct ipq *ip_create(struct sk_buff *skb, struct iphdr *iph, struct device *dev) +{ + struct ipq *qp; + int maclen; + int ihlen; + + qp = (struct ipq *) kmalloc(sizeof(struct ipq), GFP_ATOMIC); + if (qp == NULL) + { + printk("IP: create: no memory left !\n"); + return(NULL); + skb->dev = qp->dev; + } + memset(qp, 0, sizeof(struct ipq)); + + /* + * Allocate memory for the MAC header. + * + * FIXME: We have a maximum MAC address size limit and define + * elsewhere. We should use it here and avoid the 3 kmalloc() calls + */ + + maclen = ((unsigned long) iph) - ((unsigned long) skb->data); + qp->mac = (unsigned char *) kmalloc(maclen, GFP_ATOMIC); + if (qp->mac == NULL) + { + printk("IP: create: no memory left !\n"); + kfree_s(qp, sizeof(struct ipq)); + return(NULL); + } + + /* + * Allocate memory for the IP header (plus 8 octets for ICMP). + */ + + ihlen = (iph->ihl * sizeof(unsigned long)); + qp->iph = (struct iphdr *) kmalloc(ihlen + 8, GFP_ATOMIC); + if (qp->iph == NULL) + { + printk("IP: create: no memory left !\n"); + kfree_s(qp->mac, maclen); + kfree_s(qp, sizeof(struct ipq)); + return(NULL); + } + + /* Fill in the structure. */ + memcpy(qp->mac, skb->data, maclen); + memcpy(qp->iph, iph, ihlen + 8); + qp->len = 0; + qp->ihlen = ihlen; + qp->maclen = maclen; + qp->fragments = NULL; + qp->dev = dev; + + /* Start a timer for this entry. */ + qp->timer.expires = IP_FRAG_TIME; /* about 30 seconds */ + qp->timer.data = (unsigned long) qp; /* pointer to queue */ + qp->timer.function = ip_expire; /* expire function */ + add_timer(&qp->timer); + + /* Add this entry to the queue. */ + qp->prev = NULL; + cli(); + qp->next = ipqueue; + if (qp->next != NULL) + qp->next->prev = qp; + ipqueue = qp; + sti(); + return(qp); +} + + +/* + * See if a fragment queue is complete. + */ + +static int ip_done(struct ipq *qp) +{ + struct ipfrag *fp; + int offset; + + /* Only possible if we received the final fragment. */ + if (qp->len == 0) + return(0); + + /* Check all fragment offsets to see if they connect. */ + fp = qp->fragments; + offset = 0; + while (fp != NULL) + { + if (fp->offset > offset) + return(0); /* fragment(s) missing */ + offset = fp->end; + fp = fp->next; + } + + /* All fragments are present. */ + return(1); +} + + +/* + * Build a new IP datagram from all its fragments. + * + * FIXME: We copy here because we lack an effective way of handling lists + * of bits on input. Until the new skb data handling is in I'm not going + * to touch this with a bargepole. This also causes a 4Kish limit on + * packet sizes. + */ + +static struct sk_buff *ip_glue(struct ipq *qp) +{ + struct sk_buff *skb; + struct iphdr *iph; + struct ipfrag *fp; + unsigned char *ptr; + int count, len; + + /* + * Allocate a new buffer for the datagram. + */ + + len = qp->maclen + qp->ihlen + qp->len; + + if ((skb = alloc_skb(len,GFP_ATOMIC)) == NULL) + { + ip_statistics.IpReasmFails++; + printk("IP: queue_glue: no memory for gluing queue 0x%X\n", (int) qp); + ip_free(qp); + return(NULL); + } + + /* Fill in the basic details. */ + skb->len = (len - qp->maclen); + skb->h.raw = skb->data; + skb->free = 1; + + /* Copy the original MAC and IP headers into the new buffer. */ + ptr = (unsigned char *) skb->h.raw; + memcpy(ptr, ((unsigned char *) qp->mac), qp->maclen); + ptr += qp->maclen; + memcpy(ptr, ((unsigned char *) qp->iph), qp->ihlen); + ptr += qp->ihlen; + skb->h.raw += qp->maclen; + + count = 0; + + /* Copy the data portions of all fragments into the new buffer. */ + fp = qp->fragments; + while(fp != NULL) + { + if(count+fp->len > skb->len) + { + printk("Invalid fragment list: Fragment over size.\n"); + ip_free(qp); + kfree_skb(skb,FREE_WRITE); + ip_statistics.IpReasmFails++; + return NULL; + } + memcpy((ptr + fp->offset), fp->ptr, fp->len); + count += fp->len; + fp = fp->next; + } + + /* We glued together all fragments, so remove the queue entry. */ + ip_free(qp); + + /* Done with all fragments. Fixup the new IP header. */ + iph = skb->h.iph; + iph->frag_off = 0; + iph->tot_len = htons((iph->ihl * sizeof(unsigned long)) + count); + skb->ip_hdr = iph; + + ip_statistics.IpReasmOKs++; + return(skb); +} + + +/* + * Process an incoming IP datagram fragment. + */ + +static struct sk_buff *ip_defrag(struct iphdr *iph, struct sk_buff *skb, struct device *dev) +{ + struct ipfrag *prev, *next; + struct ipfrag *tfp; + struct ipq *qp; + struct sk_buff *skb2; + unsigned char *ptr; + int flags, offset; + int i, ihl, end; + + ip_statistics.IpReasmReqds++; + + /* Find the entry of this IP datagram in the "incomplete datagrams" queue. */ + qp = ip_find(iph); + + /* Is this a non-fragmented datagram? */ + offset = ntohs(iph->frag_off); + flags = offset & ~IP_OFFSET; + offset &= IP_OFFSET; + if (((flags & IP_MF) == 0) && (offset == 0)) + { + if (qp != NULL) + ip_free(qp); /* Huh? How could this exist?? */ + return(skb); + } + + offset <<= 3; /* offset is in 8-byte chunks */ + + /* + * If the queue already existed, keep restarting its timer as long + * as we still are receiving fragments. Otherwise, create a fresh + * queue entry. + */ + + if (qp != NULL) + { + del_timer(&qp->timer); + qp->timer.expires = IP_FRAG_TIME; /* about 30 seconds */ + qp->timer.data = (unsigned long) qp; /* pointer to queue */ + qp->timer.function = ip_expire; /* expire function */ + add_timer(&qp->timer); + } + else + { + /* + * If we failed to create it, then discard the frame + */ + if ((qp = ip_create(skb, iph, dev)) == NULL) + { + skb->sk = NULL; + kfree_skb(skb, FREE_READ); + ip_statistics.IpReasmFails++; + return NULL; + } + } + + /* + * Determine the position of this fragment. + */ + + ihl = (iph->ihl * sizeof(unsigned long)); + end = offset + ntohs(iph->tot_len) - ihl; + + /* + * Point into the IP datagram 'data' part. + */ + + ptr = skb->data + dev->hard_header_len + ihl; + + /* + * Is this the final fragment? + */ + + if ((flags & IP_MF) == 0) + qp->len = end; + + /* + * Find out which fragments are in front and at the back of us + * in the chain of fragments so far. We must know where to put + * this fragment, right? + */ + + prev = NULL; + for(next = qp->fragments; next != NULL; next = next->next) + { + if (next->offset > offset) + break; /* bingo! */ + prev = next; + } + + /* + * We found where to put this one. + * Check for overlap with preceding fragment, and, if needed, + * align things so that any overlaps are eliminated. + */ + if (prev != NULL && offset < prev->end) + { + i = prev->end - offset; + offset += i; /* ptr into datagram */ + ptr += i; /* ptr into fragment data */ + } + + /* + * Look for overlap with succeeding segments. + * If we can merge fragments, do it. + */ + + for(; next != NULL; next = tfp) + { + tfp = next->next; + if (next->offset >= end) + break; /* no overlaps at all */ + + i = end - next->offset; /* overlap is 'i' bytes */ + next->len -= i; /* so reduce size of */ + next->offset += i; /* next fragment */ + next->ptr += i; + + /* + * If we get a frag size of <= 0, remove it and the packet + * that it goes with. + */ + if (next->len <= 0) + { + if (next->prev != NULL) + next->prev->next = next->next; + else + qp->fragments = next->next; + + if (tfp->next != NULL) + next->next->prev = next->prev; + + kfree_skb(next->skb,FREE_READ); + kfree_s(next, sizeof(struct ipfrag)); + } + } + + /* + * Insert this fragment in the chain of fragments. + */ + + tfp = NULL; + tfp = ip_frag_create(offset, end, skb, ptr); + + /* + * No memory to save the fragment - so throw the lot + */ + + if (!tfp) + { + skb->sk = NULL; + kfree_skb(skb, FREE_READ); + return NULL; + } + tfp->prev = prev; + tfp->next = next; + if (prev != NULL) + prev->next = tfp; + else + qp->fragments = tfp; + + if (next != NULL) + next->prev = tfp; + + /* + * OK, so we inserted this new fragment into the chain. + * Check if we now have a full IP datagram which we can + * bump up to the IP layer... + */ + + if (ip_done(qp)) + { + skb2 = ip_glue(qp); /* glue together the fragments */ + return(skb2); + } + return(NULL); +} + + +/* + * This IP datagram is too large to be sent in one piece. Break it up into + * smaller pieces (each of size equal to the MAC header plus IP header plus + * a block of the data of the original IP data part) that will yet fit in a + * single device frame, and queue such a frame for sending by calling the + * ip_queue_xmit(). Note that this is recursion, and bad things will happen + * if this function causes a loop... + * + * Yes this is inefficient, feel free to submit a quicker one. + * + * **Protocol Violation** + * We copy all the options to each fragment. !FIXME! + */ +void ip_fragment(struct sock *sk, struct sk_buff *skb, struct device *dev, int is_frag) +{ + struct iphdr *iph; + unsigned char *raw; + unsigned char *ptr; + struct sk_buff *skb2; + int left, mtu, hlen, len; + int offset; + unsigned long flags; + + /* + * Point into the IP datagram header. + */ + + raw = skb->data; + iph = (struct iphdr *) (raw + dev->hard_header_len); + + skb->ip_hdr = iph; + + /* + * Setup starting values. + */ + + hlen = (iph->ihl * sizeof(unsigned long)); + left = ntohs(iph->tot_len) - hlen; /* Space per frame */ + hlen += dev->hard_header_len; /* Total header size */ + mtu = (dev->mtu - hlen); /* Size of data space */ + ptr = (raw + hlen); /* Where to start from */ + + /* + * Check for any "DF" flag. [DF means do not fragment] + */ + + if (ntohs(iph->frag_off) & IP_DF) + { + /* + * Reply giving the MTU of the failed hop. + */ + ip_statistics.IpFragFails++; + icmp_send(skb,ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, dev->mtu, dev); + return; + } + + /* + * The protocol doesn't seem to say what to do in the case that the + * frame + options doesn't fit the mtu. As it used to fall down dead + * in this case we were fortunate it didn't happen + */ + + if(mtu<8) + { + /* It's wrong but it's better than nothing */ + icmp_send(skb,ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED,dev->mtu, dev); + ip_statistics.IpFragFails++; + return; + } + + /* + * Fragment the datagram. + */ + + /* + * The initial offset is 0 for a complete frame. When + * fragmenting fragments it's wherever this one starts. + */ + + if (is_frag & 2) + offset = (ntohs(iph->frag_off) & 0x1fff) << 3; + else + offset = 0; + + + /* + * Keep copying data until we run out. + */ + + while(left > 0) + { + len = left; + /* IF: it doesn't fit, use 'mtu' - the data space left */ + if (len > mtu) + len = mtu; + /* IF: we are not sending upto and including the packet end + then align the next start on an eight byte boundary */ + if (len < left) + { + len/=8; + len*=8; + } + /* + * Allocate buffer. + */ + + if ((skb2 = alloc_skb(len + hlen,GFP_ATOMIC)) == NULL) + { + printk("IP: frag: no memory for new fragment!\n"); + ip_statistics.IpFragFails++; + return; + } + + /* + * Set up data on packet + */ + + skb2->arp = skb->arp; + if(skb->free==0) + printk("IP fragmenter: BUG free!=1 in fragmenter\n"); + skb2->free = 1; + skb2->len = len + hlen; + skb2->h.raw=(char *) skb2->data; + /* + * Charge the memory for the fragment to any owner + * it might possess + */ + + save_flags(flags); + if (sk) + { + cli(); + sk->wmem_alloc += skb2->mem_len; + skb2->sk=sk; + } + restore_flags(flags); + skb2->raddr = skb->raddr; /* For rebuild_header - must be here */ + + /* + * Copy the packet header into the new buffer. + */ + + memcpy(skb2->h.raw, raw, hlen); + + /* + * Copy a block of the IP datagram. + */ + memcpy(skb2->h.raw + hlen, ptr, len); + left -= len; + + skb2->h.raw+=dev->hard_header_len; + + /* + * Fill in the new header fields. + */ + iph = (struct iphdr *)(skb2->h.raw/*+dev->hard_header_len*/); + iph->frag_off = htons((offset >> 3)); + /* + * Added AC : If we are fragmenting a fragment thats not the + * last fragment then keep MF on each bit + */ + if (left > 0 || (is_frag & 1)) + iph->frag_off |= htons(IP_MF); + ptr += len; + offset += len; + + /* + * Put this fragment into the sending queue. + */ + + ip_statistics.IpFragCreates++; + + ip_queue_xmit(sk, dev, skb2, 2); + } + ip_statistics.IpFragOKs++; +} + + + +#ifdef CONFIG_IP_FORWARD + +/* + * Forward an IP datagram to its next destination. + */ + +static void ip_forward(struct sk_buff *skb, struct device *dev, int is_frag) +{ + struct device *dev2; /* Output device */ + struct iphdr *iph; /* Our header */ + struct sk_buff *skb2; /* Output packet */ + struct rtable *rt; /* Route we use */ + unsigned char *ptr; /* Data pointer */ + unsigned long raddr; /* Router IP address */ + + /* + * See if we are allowed to forward this. + */ + +#ifdef CONFIG_IP_FIREWALL + int err; + + if((err=ip_fw_chk(skb->h.iph, dev, ip_fw_fwd_chain, ip_fw_fwd_policy, 0))!=1) + { + if(err==-1) + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, dev); + return; + } +#endif + /* + * According to the RFC, we must first decrease the TTL field. If + * that reaches zero, we must reply an ICMP control message telling + * that the packet's lifetime expired. + * + * Exception: + * We may not generate an ICMP for an ICMP. icmp_send does the + * enforcement of this so we can forget it here. It is however + * sometimes VERY important. + */ + + iph = skb->h.iph; + iph->ttl--; + if (iph->ttl <= 0) + { + /* Tell the sender its packet died... */ + icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0, dev); + return; + } + + /* + * Re-compute the IP header checksum. + * This is inefficient. We know what has happened to the header + * and could thus adjust the checksum as Phil Karn does in KA9Q + */ + + ip_send_check(iph); + + /* + * OK, the packet is still valid. Fetch its destination address, + * and give it to the IP sender for further processing. + */ + + rt = ip_rt_route(iph->daddr, NULL, NULL); + if (rt == NULL) + { + /* + * Tell the sender its packet cannot be delivered. Again + * ICMP is screened later. + */ + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_UNREACH, 0, dev); + return; + } + + + /* + * Gosh. Not only is the packet valid; we even know how to + * forward it onto its final destination. Can we say this + * is being plain lucky? + * If the router told us that there is no GW, use the dest. + * IP address itself- we seem to be connected directly... + */ + + raddr = rt->rt_gateway; + + if (raddr != 0) + { + /* + * There is a gateway so find the correct route for it. + * Gateways cannot in turn be gatewayed. + */ + rt = ip_rt_route(raddr, NULL, NULL); + if (rt == NULL) + { + /* + * Tell the sender its packet cannot be delivered... + */ + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, dev); + return; + } + if (rt->rt_gateway != 0) + raddr = rt->rt_gateway; + } + else + raddr = iph->daddr; + + /* + * Having picked a route we can now send the frame out. + */ + + dev2 = rt->rt_dev; + + /* + * In IP you never have to forward a frame on the interface that it + * arrived upon. We now generate an ICMP HOST REDIRECT giving the route + * we calculated. + */ +#ifdef CONFIG_IP_NO_ICMP_REDIRECT + if (dev == dev2) + return; +#else + if (dev == dev2 && (iph->saddr&dev->pa_mask) == (iph->daddr & dev->pa_mask)) + icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, raddr, dev); +#endif + + /* + * We now allocate a new buffer, and copy the datagram into it. + * If the indicated interface is up and running, kick it. + */ + + if (dev2->flags & IFF_UP) + { + + /* + * Current design decrees we copy the packet. For identical header + * lengths we could avoid it. The new skb code will let us push + * data so the problem goes away then. + */ + + skb2 = alloc_skb(dev2->hard_header_len + skb->len, GFP_ATOMIC); + /* + * This is rare and since IP is tolerant of network failures + * quite harmless. + */ + if (skb2 == NULL) + { + printk("\nIP: No memory available for IP forward\n"); + return; + } + ptr = skb2->data; + skb2->free = 1; + skb2->len = skb->len + dev2->hard_header_len; + skb2->h.raw = ptr; + + /* + * Copy the packet data into the new buffer. + */ + memcpy(ptr + dev2->hard_header_len, skb->h.raw, skb->len); + + /* Now build the MAC header. */ + (void) ip_send(skb2, raddr, skb->len, dev2, dev2->pa_addr); + + ip_statistics.IpForwDatagrams++; + + /* + * See if it needs fragmenting. Note in ip_rcv we tagged + * the fragment type. This must be right so that + * the fragmenter does the right thing. + */ + + if(skb2->len > dev2->mtu + dev2->hard_header_len) + { + ip_fragment(NULL,skb2,dev2, is_frag); + kfree_skb(skb2,FREE_WRITE); + } + else + { +#ifdef CONFIG_IP_ACCT + /* + * Count mapping we shortcut + */ + + ip_acct_cnt(iph,dev,ip_acct_chain); +#endif + + /* + * Map service types to priority. We lie about + * throughput being low priority, but it's a good + * choice to help improve general usage. + */ + if(iph->tos & IPTOS_LOWDELAY) + dev_queue_xmit(skb2, dev2, SOPRI_INTERACTIVE); + else if(iph->tos & IPTOS_THROUGHPUT) + dev_queue_xmit(skb2, dev2, SOPRI_BACKGROUND); + else + dev_queue_xmit(skb2, dev2, SOPRI_NORMAL); + } + } +} + + +#endif + +/* + * This function receives all incoming IP datagrams. + */ + +int ip_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt) +{ + struct iphdr *iph = skb->h.iph; + struct sock *raw_sk=NULL; + unsigned char hash; + unsigned char flag = 0; + unsigned char opts_p = 0; /* Set iff the packet has options. */ + struct inet_protocol *ipprot; + static struct options opt; /* since we don't use these yet, and they + take up stack space. */ + int brd=IS_MYADDR; + int is_frag=0; +#ifdef CONFIG_IP_FIREWALL + int err; +#endif + + ip_statistics.IpInReceives++; + + /* + * Tag the ip header of this packet so we can find it + */ + + skb->ip_hdr = iph; + + /* + * Is the datagram acceptable? + * + * 1. Length at least the size of an ip header + * 2. Version of 4 + * 3. Checksums correctly. [Speed optimisation for later, skip loopback checksums] + * (4. We ought to check for IP multicast addresses and undefined types.. does this matter ?) + */ + + if (skb->len<sizeof(struct iphdr) || iph->ihl<5 || iph->version != 4 || ip_fast_csum((unsigned char *)iph, iph->ihl) !=0) + { + ip_statistics.IpInHdrErrors++; + kfree_skb(skb, FREE_WRITE); + return(0); + } + + /* + * See if the firewall wants to dispose of the packet. + */ + +#ifdef CONFIG_IP_FIREWALL + + if ((err=ip_fw_chk(iph,dev,ip_fw_blk_chain,ip_fw_blk_policy, 0))!=1) + { + if(err==-1) + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0, dev); + kfree_skb(skb, FREE_WRITE); + return 0; + } + +#endif + + /* + * Our transport medium may have padded the buffer out. Now we know it + * is IP we can trim to the true length of the frame. + */ + + skb->len=ntohs(iph->tot_len); + + /* + * Next analyse the packet for options. Studies show under one packet in + * a thousand have options.... + */ + + if (iph->ihl != 5) + { /* Fast path for the typical optionless IP packet. */ + memset((char *) &opt, 0, sizeof(opt)); + if (do_options(iph, &opt) != 0) + return 0; + opts_p = 1; + } + + /* + * Remember if the frame is fragmented. + */ + + if(iph->frag_off) + { + if (iph->frag_off & 0x0020) + is_frag|=1; + /* + * Last fragment ? + */ + + if (ntohs(iph->frag_off) & 0x1fff) + is_frag|=2; + } + + /* + * Do any IP forwarding required. chk_addr() is expensive -- avoid it someday. + * + * This is inefficient. While finding out if it is for us we could also compute + * the routing table entry. This is where the great unified cache theory comes + * in as and when someone implements it + * + * For most hosts over 99% of packets match the first conditional + * and don't go via ip_chk_addr. Note: brd is set to IS_MYADDR at + * function entry. + */ + + if ( iph->daddr != skb->dev->pa_addr && (brd = ip_chk_addr(iph->daddr)) == 0) + { + /* + * Don't forward multicast or broadcast frames. + */ + + if(skb->pkt_type!=PACKET_HOST || brd==IS_BROADCAST) + { + kfree_skb(skb,FREE_WRITE); + return 0; + } + + /* + * The packet is for another target. Forward the frame + */ + +#ifdef CONFIG_IP_FORWARD + ip_forward(skb, dev, is_frag); +#else +/* printk("Machine %lx tried to use us as a forwarder to %lx but we have forwarding disabled!\n", + iph->saddr,iph->daddr);*/ + ip_statistics.IpInAddrErrors++; +#endif + /* + * The forwarder is inefficient and copies the packet. We + * free the original now. + */ + + kfree_skb(skb, FREE_WRITE); + return(0); + } + +#ifdef CONFIG_IP_MULTICAST + + if(brd==IS_MULTICAST && iph->daddr!=IGMP_ALL_HOSTS && !(dev->flags&IFF_LOOPBACK)) + { + /* + * Check it is for one of our groups + */ + struct ip_mc_list *ip_mc=dev->ip_mc_list; + do + { + if(ip_mc==NULL) + { + kfree_skb(skb, FREE_WRITE); + return 0; + } + if(ip_mc->multiaddr==iph->daddr) + break; + ip_mc=ip_mc->next; + } + while(1); + } +#endif + /* + * Account for the packet + */ + +#ifdef CONFIG_IP_ACCT + ip_acct_cnt(iph,dev, ip_acct_chain); +#endif + + /* + * Reassemble IP fragments. + */ + + if(is_frag) + { + /* Defragment. Obtain the complete packet if there is one */ + skb=ip_defrag(iph,skb,dev); + if(skb==NULL) + return 0; + skb->dev = dev; + iph=skb->h.iph; + } + + + + /* + * Point into the IP datagram, just past the header. + */ + + skb->ip_hdr = iph; + skb->h.raw += iph->ihl*4; + + /* + * Deliver to raw sockets. This is fun as to avoid copies we want to make no surplus copies. + */ + + hash = iph->protocol & (SOCK_ARRAY_SIZE-1); + + /* If there maybe a raw socket we must check - if not we don't care less */ + if((raw_sk=raw_prot.sock_array[hash])!=NULL) + { + struct sock *sknext=NULL; + struct sk_buff *skb1; + raw_sk=get_sock_raw(raw_sk, hash, iph->saddr, iph->daddr); + if(raw_sk) /* Any raw sockets */ + { + do + { + /* Find the next */ + sknext=get_sock_raw(raw_sk->next, hash, iph->saddr, iph->daddr); + if(sknext) + skb1=skb_clone(skb, GFP_ATOMIC); + else + break; /* One pending raw socket left */ + if(skb1) + raw_rcv(raw_sk, skb1, dev, iph->saddr,iph->daddr); + raw_sk=sknext; + } + while(raw_sk!=NULL); + /* Here either raw_sk is the last raw socket, or NULL if none */ + /* We deliver to the last raw socket AFTER the protocol checks as it avoids a surplus copy */ + } + } + + /* + * skb->h.raw now points at the protocol beyond the IP header. + */ + + hash = iph->protocol & (MAX_INET_PROTOS -1); + for (ipprot = (struct inet_protocol *)inet_protos[hash];ipprot != NULL;ipprot=(struct inet_protocol *)ipprot->next) + { + struct sk_buff *skb2; + + if (ipprot->protocol != iph->protocol) + continue; + /* + * See if we need to make a copy of it. This will + * only be set if more than one protocol wants it. + * and then not for the last one. If there is a pending + * raw delivery wait for that + */ + if (ipprot->copy || raw_sk) + { + skb2 = skb_clone(skb, GFP_ATOMIC); + if(skb2==NULL) + continue; + } + else + { + skb2 = skb; + } + flag = 1; + + /* + * Pass on the datagram to each protocol that wants it, + * based on the datagram protocol. We should really + * check the protocol handler's return values here... + */ + ipprot->handler(skb2, dev, opts_p ? &opt : 0, iph->daddr, + (ntohs(iph->tot_len) - (iph->ihl * 4)), + iph->saddr, 0, ipprot); + + } + + /* + * All protocols checked. + * If this packet was a broadcast, we may *not* reply to it, since that + * causes (proven, grin) ARP storms and a leakage of memory (i.e. all + * ICMP reply messages get queued up for transmission...) + */ + + if(raw_sk!=NULL) /* Shift to last raw user */ + raw_rcv(raw_sk, skb, dev, iph->saddr, iph->daddr); + else if (!flag) /* Free and report errors */ + { + if (brd != IS_BROADCAST && brd!=IS_MULTICAST) + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0, dev); + kfree_skb(skb, FREE_WRITE); + } + + return(0); +} + +/* + * Loop a packet back to the sender. + */ + +static void ip_loopback(struct device *old_dev, struct sk_buff *skb) +{ + extern struct device loopback_dev; + struct device *dev=&loopback_dev; + int len=skb->len-old_dev->hard_header_len; + struct sk_buff *newskb=alloc_skb(len+dev->hard_header_len, GFP_ATOMIC); + + if(newskb==NULL) + return; + + newskb->link3=NULL; + newskb->sk=NULL; + newskb->dev=dev; + newskb->saddr=skb->saddr; + newskb->daddr=skb->daddr; + newskb->raddr=skb->raddr; + newskb->free=1; + newskb->lock=0; + newskb->users=0; + newskb->pkt_type=skb->pkt_type; + newskb->len=len+dev->hard_header_len; + + + newskb->ip_hdr=(struct iphdr *)(newskb->data+ip_send(newskb, skb->ip_hdr->daddr, len, dev, skb->ip_hdr->saddr)); + memcpy(newskb->ip_hdr,skb->ip_hdr,len); + + /* Recurse. The device check against IFF_LOOPBACK will stop infinite recursion */ + + /*printk("Loopback output queued [%lX to %lX].\n", newskb->ip_hdr->saddr,newskb->ip_hdr->daddr);*/ + ip_queue_xmit(NULL, dev, newskb, 1); +} + + +/* + * Queues a packet to be sent, and starts the transmitter + * if necessary. if free = 1 then we free the block after + * transmit, otherwise we don't. If free==2 we not only + * free the block but also don't assign a new ip seq number. + * This routine also needs to put in the total length, + * and compute the checksum + */ + +void ip_queue_xmit(struct sock *sk, struct device *dev, + struct sk_buff *skb, int free) +{ + struct iphdr *iph; + unsigned char *ptr; + + /* Sanity check */ + if (dev == NULL) + { + printk("IP: ip_queue_xmit dev = NULL\n"); + return; + } + + IS_SKB(skb); + + /* + * Do some book-keeping in the packet for later + */ + + + skb->dev = dev; + skb->when = jiffies; + + /* + * Find the IP header and set the length. This is bad + * but once we get the skb data handling code in the + * hardware will push its header sensibly and we will + * set skb->ip_hdr to avoid this mess and the fixed + * header length problem + */ + + ptr = skb->data; + ptr += dev->hard_header_len; + iph = (struct iphdr *)ptr; + skb->ip_hdr = iph; + iph->tot_len = ntohs(skb->len-dev->hard_header_len); + +#ifdef CONFIG_IP_FIREWALL + if(ip_fw_chk(iph, dev, ip_fw_blk_chain, ip_fw_blk_policy, 0) != 1) + /* just don't send this packet */ + return; +#endif + + /* + * No reassigning numbers to fragments... + */ + + if(free!=2) + iph->id = htons(ip_id_count++); + else + free=1; + + /* All buffers without an owner socket get freed */ + if (sk == NULL) + free = 1; + + skb->free = free; + + /* + * Do we need to fragment. Again this is inefficient. + * We need to somehow lock the original buffer and use + * bits of it. + */ + + if(skb->len > dev->mtu + dev->hard_header_len) + { + ip_fragment(sk,skb,dev,0); + IS_SKB(skb); + kfree_skb(skb,FREE_WRITE); + return; + } + + /* + * Add an IP checksum + */ + + ip_send_check(iph); + + /* + * Print the frame when debugging + */ + + /* + * More debugging. You cannot queue a packet already on a list + * Spot this and moan loudly. + */ + if (skb->next != NULL) + { + printk("ip_queue_xmit: next != NULL\n"); + skb_unlink(skb); + } + + /* + * If a sender wishes the packet to remain unfreed + * we add it to his send queue. This arguably belongs + * in the TCP level since nobody else uses it. BUT + * remember IPng might change all the rules. + */ + + if (!free) + { + unsigned long flags; + /* The socket now has more outstanding blocks */ + + sk->packets_out++; + + /* Protect the list for a moment */ + save_flags(flags); + cli(); + + if (skb->link3 != NULL) + { + printk("ip.c: link3 != NULL\n"); + skb->link3 = NULL; + } + if (sk->send_head == NULL) + { + sk->send_tail = skb; + sk->send_head = skb; + } + else + { + sk->send_tail->link3 = skb; + sk->send_tail = skb; + } + /* skb->link3 is NULL */ + + /* Interrupt restore */ + restore_flags(flags); + } + else + /* Remember who owns the buffer */ + skb->sk = sk; + + /* + * If the indicated interface is up and running, send the packet. + */ + + ip_statistics.IpOutRequests++; +#ifdef CONFIG_IP_ACCT + ip_acct_cnt(iph,dev, ip_acct_chain); +#endif + +#ifdef CONFIG_IP_MULTICAST + + /* + * Multicasts are looped back for other local users + */ + + if (MULTICAST(iph->daddr) && !(dev->flags&IFF_LOOPBACK)) + { + if(sk==NULL || sk->ip_mc_loop) + { + if(iph->daddr==IGMP_ALL_HOSTS) + ip_loopback(dev,skb); + else + { + struct ip_mc_list *imc=dev->ip_mc_list; + while(imc!=NULL) + { + if(imc->multiaddr==iph->daddr) + { + ip_loopback(dev,skb); + break; + } + imc=imc->next; + } + } + } + /* Multicasts with ttl 0 must not go beyond the host */ + + if(skb->ip_hdr->ttl==0) + { + kfree_skb(skb, FREE_READ); + return; + } + } +#endif + if((dev->flags&IFF_BROADCAST) && iph->daddr==dev->pa_brdaddr && !(dev->flags&IFF_LOOPBACK)) + ip_loopback(dev,skb); + + if (dev->flags & IFF_UP) + { + /* + * If we have an owner use its priority setting, + * otherwise use NORMAL + */ + + if (sk != NULL) + { + dev_queue_xmit(skb, dev, sk->priority); + } + else + { + dev_queue_xmit(skb, dev, SOPRI_NORMAL); + } + } + else + { + ip_statistics.IpOutDiscards++; + if (free) + kfree_skb(skb, FREE_WRITE); + } +} + + + +#ifdef CONFIG_IP_MULTICAST + +/* + * Write an multicast group list table for the IGMP daemon to + * read. + */ + +int ip_mc_procinfo(char *buffer, char **start, off_t offset, int length) +{ + off_t pos=0, begin=0; + struct ip_mc_list *im; + unsigned long flags; + int len=0; + struct device *dev; + + len=sprintf(buffer,"Device : Count\tGroup Users Timer\n"); + save_flags(flags); + cli(); + + for(dev = dev_base; dev; dev = dev->next) + { + if((dev->flags&IFF_UP)&&(dev->flags&IFF_MULTICAST)) + { + len+=sprintf(buffer+len,"%-10s: %5d\n", + dev->name, dev->mc_count); + for(im = dev->ip_mc_list; im; im = im->next) + { + len+=sprintf(buffer+len, + "\t\t\t%08lX %5d %d:%08lX\n", + im->multiaddr, im->users, + im->tm_running, im->timer.expires); + pos=begin+len; + if(pos<offset) + { + len=0; + begin=pos; + } + if(pos>offset+length) + break; + } + } + } + restore_flags(flags); + *start=buffer+(offset-begin); + len-=(offset-begin); + if(len>length) + len=length; + return len; +} + + +#endif +/* + * Socket option code for IP. This is the end of the line after any TCP,UDP etc options on + * an IP socket. + * + * We implement IP_TOS (type of service), IP_TTL (time to live). + * + * Next release we will sort out IP_OPTIONS since for some people are kind of important. + */ + +int ip_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen) +{ + int val,err; +#if defined(CONFIG_IP_FIREWALL) || defined(CONFIG_IP_ACCT) + struct ip_fw tmp_fw; +#endif + if (optval == NULL) + return(-EINVAL); + + err=verify_area(VERIFY_READ, optval, sizeof(int)); + if(err) + return err; + + val = get_fs_long((unsigned long *)optval); + + if(level!=SOL_IP) + return -EOPNOTSUPP; + + switch(optname) + { + case IP_TOS: + if(val<0||val>255) + return -EINVAL; + sk->ip_tos=val; + if(val==IPTOS_LOWDELAY) + sk->priority=SOPRI_INTERACTIVE; + if(val==IPTOS_THROUGHPUT) + sk->priority=SOPRI_BACKGROUND; + return 0; + case IP_TTL: + if(val<1||val>255) + return -EINVAL; + sk->ip_ttl=val; + return 0; +#ifdef CONFIG_IP_MULTICAST + case IP_MULTICAST_TTL: + { + unsigned char ucval; + + ucval=get_fs_byte((unsigned char *)optval); + if(ucval<1||ucval>255) + return -EINVAL; + sk->ip_mc_ttl=(int)ucval; + return 0; + } + case IP_MULTICAST_LOOP: + { + unsigned char ucval; + + ucval=get_fs_byte((unsigned char *)optval); + if(ucval!=0 && ucval!=1) + return -EINVAL; + sk->ip_mc_loop=(int)ucval; + return 0; + } + case IP_MULTICAST_IF: + { + /* Not fully tested */ + struct in_addr addr; + struct device *dev=NULL; + + /* + * Check the arguments are allowable + */ + + err=verify_area(VERIFY_READ, optval, sizeof(addr)); + if(err) + return err; + + memcpy_fromfs(&addr,optval,sizeof(addr)); + + printk("MC bind %s\n", in_ntoa(addr.s_addr)); + + /* + * What address has been requested + */ + + if(addr.s_addr==INADDR_ANY) /* Default */ + { + sk->ip_mc_name[0]=0; + return 0; + } + + /* + * Find the device + */ + + for(dev = dev_base; dev; dev = dev->next) + { + if((dev->flags&IFF_UP)&&(dev->flags&IFF_MULTICAST)&& + (dev->pa_addr==addr.s_addr)) + break; + } + + /* + * Did we find one + */ + + if(dev) + { + strcpy(sk->ip_mc_name,dev->name); + return 0; + } + return -EADDRNOTAVAIL; + } + + case IP_ADD_MEMBERSHIP: + { + +/* + * FIXME: Add/Del membership should have a semaphore protecting them from re-entry + */ + struct ip_mreq mreq; + static struct options optmem; + unsigned long route_src; + struct rtable *rt; + struct device *dev=NULL; + + /* + * Check the arguments. + */ + + err=verify_area(VERIFY_READ, optval, sizeof(mreq)); + if(err) + return err; + + memcpy_fromfs(&mreq,optval,sizeof(mreq)); + + /* + * Get device for use later + */ + + if(mreq.imr_interface.s_addr==INADDR_ANY) + { + /* + * Not set so scan. + */ + if((rt=ip_rt_route(mreq.imr_multiaddr.s_addr,&optmem, &route_src))!=NULL) + { + dev=rt->rt_dev; + rt->rt_use--; + } + } + else + { + /* + * Find a suitable device. + */ + for(dev = dev_base; dev; dev = dev->next) + { + if((dev->flags&IFF_UP)&&(dev->flags&IFF_MULTICAST)&& + (dev->pa_addr==mreq.imr_interface.s_addr)) + break; + } + } + + /* + * No device, no cookies. + */ + + if(!dev) + return -ENODEV; + + /* + * Join group. + */ + + return ip_mc_join_group(sk,dev,mreq.imr_multiaddr.s_addr); + } + + case IP_DROP_MEMBERSHIP: + { + struct ip_mreq mreq; + struct rtable *rt; + static struct options optmem; + unsigned long route_src; + struct device *dev=NULL; + + /* + * Check the arguments + */ + + err=verify_area(VERIFY_READ, optval, sizeof(mreq)); + if(err) + return err; + + memcpy_fromfs(&mreq,optval,sizeof(mreq)); + + /* + * Get device for use later + */ + + if(mreq.imr_interface.s_addr==INADDR_ANY) + { + if((rt=ip_rt_route(mreq.imr_multiaddr.s_addr,&optmem, &route_src))!=NULL) + { + dev=rt->rt_dev; + rt->rt_use--; + } + } + else + { + for(dev = dev_base; dev; dev = dev->next) + { + if((dev->flags&IFF_UP)&& (dev->flags&IFF_MULTICAST)&& + (dev->pa_addr==mreq.imr_interface.s_addr)) + break; + } + } + + /* + * Did we find a suitable device. + */ + + if(!dev) + return -ENODEV; + + /* + * Leave group + */ + + return ip_mc_leave_group(sk,dev,mreq.imr_multiaddr.s_addr); + } +#endif +#ifdef CONFIG_IP_FIREWALL + case IP_FW_ADD_BLK: + case IP_FW_DEL_BLK: + case IP_FW_ADD_FWD: + case IP_FW_DEL_FWD: + case IP_FW_CHK_BLK: + case IP_FW_CHK_FWD: + case IP_FW_FLUSH_BLK: + case IP_FW_FLUSH_FWD: + case IP_FW_ZERO_BLK: + case IP_FW_ZERO_FWD: + case IP_FW_POLICY_BLK: + case IP_FW_POLICY_FWD: + if(!suser()) + return -EPERM; + if(optlen>sizeof(tmp_fw) || optlen<1) + return -EINVAL; + err=verify_area(VERIFY_READ,optval,optlen); + if(err) + return err; + memcpy_fromfs(&tmp_fw,optval,optlen); + err=ip_fw_ctl(optname, &tmp_fw,optlen); + return -err; /* -0 is 0 after all */ + +#endif +#ifdef CONFIG_IP_ACCT + case IP_ACCT_DEL: + case IP_ACCT_ADD: + case IP_ACCT_FLUSH: + case IP_ACCT_ZERO: + if(!suser()) + return -EPERM; + if(optlen>sizeof(tmp_fw) || optlen<1) + return -EINVAL; + err=verify_area(VERIFY_READ,optval,optlen); + if(err) + return err; + memcpy_fromfs(&tmp_fw, optval,optlen); + err=ip_acct_ctl(optname, &tmp_fw,optlen); + return -err; /* -0 is 0 after all */ +#endif + /* IP_OPTIONS and friends go here eventually */ + default: + return(-ENOPROTOOPT); + } +} + +/* + * Get the options. Note for future reference. The GET of IP options gets the + * _received_ ones. The set sets the _sent_ ones. + */ + +int ip_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen) +{ + int val,err; +#ifdef CONFIG_IP_MULTICAST + int len; +#endif + + if(level!=SOL_IP) + return -EOPNOTSUPP; + + switch(optname) + { + case IP_TOS: + val=sk->ip_tos; + break; + case IP_TTL: + val=sk->ip_ttl; + break; +#ifdef CONFIG_IP_MULTICAST + case IP_MULTICAST_TTL: + val=sk->ip_mc_ttl; + break; + case IP_MULTICAST_LOOP: + val=sk->ip_mc_loop; + break; + case IP_MULTICAST_IF: + err=verify_area(VERIFY_WRITE, optlen, sizeof(int)); + if(err) + return err; + len=strlen(sk->ip_mc_name); + err=verify_area(VERIFY_WRITE, optval, len); + if(err) + return err; + put_fs_long(len,(unsigned long *) optlen); + memcpy_tofs((void *)optval,sk->ip_mc_name, len); + return 0; +#endif + default: + return(-ENOPROTOOPT); + } + err=verify_area(VERIFY_WRITE, optlen, sizeof(int)); + if(err) + return err; + put_fs_long(sizeof(int),(unsigned long *) optlen); + + err=verify_area(VERIFY_WRITE, optval, sizeof(int)); + if(err) + return err; + put_fs_long(val,(unsigned long *)optval); + + return(0); +} + +/* + * IP protocol layer initialiser + */ + +static struct packet_type ip_packet_type = +{ + 0, /* MUTTER ntohs(ETH_P_IP),*/ + NULL, /* All devices */ + ip_rcv, + NULL, + NULL, +}; + +/* + * Device notifier + */ + +static int ip_rt_event(unsigned long event, void *ptr) +{ + if(event==NETDEV_DOWN) + ip_rt_flush(ptr); + return NOTIFY_DONE; +} + +struct notifier_block ip_rt_notifier={ + ip_rt_event, + NULL, + 0 +}; + +/* + * IP registers the packet type and then calls the subprotocol initialisers + */ + +void ip_init(void) +{ + ip_packet_type.type=htons(ETH_P_IP); + dev_add_pack(&ip_packet_type); + + /* So we flush routes when a device is downed */ + register_netdevice_notifier(&ip_rt_notifier); +/* ip_raw_init(); + ip_packet_init(); + ip_tcp_init(); + ip_udp_init();*/ +} diff --git a/pfinet/linux-inet/ip.h b/pfinet/linux-inet/ip.h new file mode 100644 index 00000000..95954a8c --- /dev/null +++ b/pfinet/linux-inet/ip.h @@ -0,0 +1,130 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Definitions for the IP module. + * + * Version: @(#)ip.h 1.0.2 05/07/93 + * + * Authors: Ross Biro, <bir7@leland.Stanford.Edu> + * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> + * Alan Cox, <gw4pts@gw4pts.ampr.org> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _IP_H +#define _IP_H + + +#include <linux/ip.h> +#include <linux/config.h> + +#ifndef _SNMP_H +#include "snmp.h" +#endif + +#include "sock.h" /* struct sock */ + +/* IP flags. */ +#define IP_CE 0x8000 /* Flag: "Congestion" */ +#define IP_DF 0x4000 /* Flag: "Don't Fragment" */ +#define IP_MF 0x2000 /* Flag: "More Fragments" */ +#define IP_OFFSET 0x1FFF /* "Fragment Offset" part */ + +#define IP_FRAG_TIME (30 * HZ) /* fragment lifetime */ + +#ifdef CONFIG_IP_MULTICAST +extern void ip_mc_dropsocket(struct sock *); +extern void ip_mc_dropdevice(struct device *dev); +extern int ip_mc_procinfo(char *, char **, off_t, int); +#define MULTICAST(x) (IN_MULTICAST(htonl(x))) +#endif + + +/* Describe an IP fragment. */ +struct ipfrag { + int offset; /* offset of fragment in IP datagram */ + int end; /* last byte of data in datagram */ + int len; /* length of this fragment */ + struct sk_buff *skb; /* complete received fragment */ + unsigned char *ptr; /* pointer into real fragment data */ + struct ipfrag *next; /* linked list pointers */ + struct ipfrag *prev; +}; + +/* Describe an entry in the "incomplete datagrams" queue. */ +struct ipq { + unsigned char *mac; /* pointer to MAC header */ + struct iphdr *iph; /* pointer to IP header */ + int len; /* total length of original datagram */ + short ihlen; /* length of the IP header */ + short maclen; /* length of the MAC header */ + struct timer_list timer; /* when will this queue expire? */ + struct ipfrag *fragments; /* linked list of received fragments */ + struct ipq *next; /* linked list pointers */ + struct ipq *prev; + struct device *dev; /* Device - for icmp replies */ +}; + + +extern int backoff(int n); + +extern void ip_print(const struct iphdr *ip); +extern int ip_ioctl(struct sock *sk, int cmd, + unsigned long arg); +extern void ip_route_check(unsigned long daddr); +extern int ip_build_header(struct sk_buff *skb, + unsigned long saddr, + unsigned long daddr, + struct device **dev, int type, + struct options *opt, int len, + int tos,int ttl); +extern unsigned short ip_compute_csum(unsigned char * buff, int len); +extern int ip_rcv(struct sk_buff *skb, struct device *dev, + struct packet_type *pt); +extern void ip_send_check(struct iphdr *ip); +extern int ip_id_count; +extern void ip_queue_xmit(struct sock *sk, + struct device *dev, struct sk_buff *skb, + int free); +extern int ip_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen); +extern int ip_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen); +extern void ip_init(void); + +extern struct ip_mib ip_statistics; + +/* + * This is a version of ip_compute_csum() optimized for IP headers, which + * always checksum on 4 octet boundaries. + * Used by ip.c and slhc.c (the net driver module) + * (Moved to here by bj0rn@blox.se) + */ + +static inline unsigned short ip_fast_csum(unsigned char * buff, int wlen) +{ + unsigned long sum = 0; + + if (wlen) + { + unsigned long bogus; + __asm__("clc\n" + "1:\t" + "lodsl\n\t" + "adcl %3, %0\n\t" + "decl %2\n\t" + "jne 1b\n\t" + "adcl $0, %0\n\t" + "movl %0, %3\n\t" + "shrl $16, %3\n\t" + "addw %w3, %w0\n\t" + "adcw $0, %w0" + : "=r" (sum), "=S" (buff), "=r" (wlen), "=a" (bogus) + : "0" (sum), "1" (buff), "2" (wlen)); + } + return (~sum) & 0xffff; +} +#endif /* _IP_H */ diff --git a/pfinet/linux-inet/ip_fw.c b/pfinet/linux-inet/ip_fw.c new file mode 100644 index 00000000..0572c8f1 --- /dev/null +++ b/pfinet/linux-inet/ip_fw.c @@ -0,0 +1,1016 @@ +/* + * IP firewalling code. This is taken from 4.4BSD. Please note the + * copyright message below. As per the GPL it must be maintained + * and the licenses thus do not conflict. While this port is subject + * to the GPL I also place my modifications under the original + * license in recognition of the original copyright. + * -- Alan Cox. + * + * Ported from BSD to Linux, + * Alan Cox 22/Nov/1994. + * Zeroing /proc and other additions + * Jos Vos 4/Feb/1995. + * Merged and included the FreeBSD-Current changes at Ugen's request + * (but hey it's a lot cleaner now). Ugen would prefer in some ways + * we waited for his final product but since Linux 1.2.0 is about to + * appear it's not practical - Read: It works, it's not clean but please + * don't consider it to be his standard of finished work. + * Alan Cox 12/Feb/1995 + * Porting bidirectional entries from BSD, fixing accounting issues, + * adding struct ip_fwpkt for checking packets with interface address + * Jos Vos 5/Mar/1995. + * + * All the real work was done by ..... + */ + +/* + * Copyright (c) 1993 Daniel Boulet + * Copyright (c) 1994 Ugen J.S.Antsilevich + * + * Redistribution and use in source forms, with and without modification, + * are permitted provided that this entire comment appears intact. + * + * Redistribution in binary form may occur without any restrictions. + * Obviously, it would be nice if you gave credit where credit is due + * but requiring it would be too onerous. + * + * This software is provided ``AS IS'' without any warranties of any kind. + */ + +#include <linux/config.h> +#include <asm/segment.h> +#include <asm/system.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/string.h> +#include <linux/errno.h> +#include <linux/config.h> + +#include <linux/socket.h> +#include <linux/sockios.h> +#include <linux/in.h> +#include <linux/inet.h> +#include <linux/netdevice.h> +#include <linux/icmp.h> +#include <linux/udp.h> +#include "ip.h" +#include "protocol.h" +#include "route.h" +#include "tcp.h" +#include <linux/skbuff.h> +#include "sock.h" +#include "icmp.h" +#include <linux/ip_fw.h> + +/* + * Implement IP packet firewall + */ + +#ifdef CONFIG_IPFIREWALL_DEBUG +#define dprintf1(a) printk(a) +#define dprintf2(a1,a2) printk(a1,a2) +#define dprintf3(a1,a2,a3) printk(a1,a2,a3) +#define dprintf4(a1,a2,a3,a4) printk(a1,a2,a3,a4) +#else +#define dprintf1(a) +#define dprintf2(a1,a2) +#define dprintf3(a1,a2,a3) +#define dprintf4(a1,a2,a3,a4) +#endif + +#define print_ip(a) printf("%d.%d.%d.%d",(ntohl(a.s_addr)>>24)&0xFF,\ + (ntohl(a.s_addr)>>16)&0xFF,\ + (ntohl(a.s_addr)>>8)&0xFF,\ + (ntohl(a.s_addr))&0xFF); + +#ifdef IPFIREWALL_DEBUG +#define dprint_ip(a) print_ip(a) +#else +#define dprint_ip(a) +#endif + +#ifdef CONFIG_IP_FIREWALL +struct ip_fw *ip_fw_fwd_chain; +struct ip_fw *ip_fw_blk_chain; +int ip_fw_blk_policy=IP_FW_F_ACCEPT; +int ip_fw_fwd_policy=IP_FW_F_ACCEPT; +#endif +#ifdef CONFIG_IP_ACCT +struct ip_fw *ip_acct_chain; +#endif + +#define IP_INFO_BLK 0 +#define IP_INFO_FWD 1 +#define IP_INFO_ACCT 2 + + +/* + * Returns 1 if the port is matched by the vector, 0 otherwise + */ + +extern inline int port_match(unsigned short *portptr,int nports,unsigned short port,int range_flag) +{ + if (!nports) + return 1; + if ( range_flag ) + { + if ( portptr[0] <= port && port <= portptr[1] ) + { + return( 1 ); + } + nports -= 2; + portptr += 2; + } + while ( nports-- > 0 ) + { + if ( *portptr++ == port ) + { + return( 1 ); + } + } + return(0); +} + +#if defined(CONFIG_IP_ACCT) || defined(CONFIG_IP_FIREWALL) + + +/* + * Returns 0 if packet should be dropped, 1 if it should be accepted, + * and -1 if an ICMP host unreachable packet should be sent. + * Also does accounting so you can feed it the accounting chain. + * If opt is set to 1, it means that we do this for accounting + * purposes (searches all entries and handles fragments different). + * If opt is set to 2, it doesn't count a matching packet, which + * is used when calling this for checking purposes (IP_FW_CHK_*). + */ + + +int ip_fw_chk(struct iphdr *ip, struct device *rif, struct ip_fw *chain, int policy, int opt) +{ + struct ip_fw *f; + struct tcphdr *tcp=(struct tcphdr *)((unsigned long *)ip+ip->ihl); + struct udphdr *udp=(struct udphdr *)((unsigned long *)ip+ip->ihl); + __u32 src, dst; + __u16 src_port=0, dst_port=0; + unsigned short f_prt=0, prt; + char notcpsyn=1, frag1, match; + unsigned short f_flag; + + /* + * If the chain is empty follow policy. The BSD one + * accepts anything giving you a time window while + * flushing and rebuilding the tables. + */ + + src = ip->saddr; + dst = ip->daddr; + + /* + * This way we handle fragmented packets. + * we ignore all fragments but the first one + * so the whole packet can't be reassembled. + * This way we relay on the full info which + * stored only in first packet. + * + * Note that this theoretically allows partial packet + * spoofing. Not very dangerous but paranoid people may + * wish to play with this. It also allows the so called + * "fragment bomb" denial of service attack on some types + * of system. + */ + + frag1 = ((ntohs(ip->frag_off) & IP_OFFSET) == 0); + if (!frag1 && (opt != 1) && (ip->protocol == IPPROTO_TCP || + ip->protocol == IPPROTO_UDP)) + return(1); + + src = ip->saddr; + dst = ip->daddr; + + /* + * If we got interface from which packet came + * we can use the address directly. This is unlike + * 4.4BSD derived systems that have an address chain + * per device. We have a device per address with dummy + * devices instead. + */ + + dprintf1("Packet "); + switch(ip->protocol) + { + case IPPROTO_TCP: + dprintf1("TCP "); + /* ports stay 0 if it is not the first fragment */ + if (frag1) { + src_port=ntohs(tcp->source); + dst_port=ntohs(tcp->dest); + if(tcp->syn && !tcp->ack) + /* We *DO* have SYN, value FALSE */ + notcpsyn=0; + } + prt=IP_FW_F_TCP; + break; + case IPPROTO_UDP: + dprintf1("UDP "); + /* ports stay 0 if it is not the first fragment */ + if (frag1) { + src_port=ntohs(udp->source); + dst_port=ntohs(udp->dest); + } + prt=IP_FW_F_UDP; + break; + case IPPROTO_ICMP: + dprintf2("ICMP:%d ",((char *)portptr)[0]&0xff); + prt=IP_FW_F_ICMP; + break; + default: + dprintf2("p=%d ",ip->protocol); + prt=IP_FW_F_ALL; + break; + } + dprint_ip(ip->saddr); + + if (ip->protocol==IPPROTO_TCP || ip->protocol==IPPROTO_UDP) + /* This will print 0 when it is not the first fragment! */ + dprintf2(":%d ", src_port); + dprint_ip(ip->daddr); + if (ip->protocol==IPPROTO_TCP || ip->protocol==IPPROTO_UDP) + /* This will print 0 when it is not the first fragment! */ + dprintf2(":%d ",dst_port); + dprintf1("\n"); + + for (f=chain;f;f=f->fw_next) + { + /* + * This is a bit simpler as we don't have to walk + * an interface chain as you do in BSD - same logic + * however. + */ + + /* + * Match can become 0x01 (a "normal" match was found), + * 0x02 (a reverse match was found), and 0x03 (the + * IP addresses match in both directions). + * Now we know in which direction(s) we should look + * for a match for the TCP/UDP ports. Both directions + * might match (e.g., when both addresses are on the + * same network for which an address/mask is given), but + * the ports might only match in one direction. + * This was obviously wrong in the original BSD code. + */ + match = 0x00; + + if ((src&f->fw_smsk.s_addr)==f->fw_src.s_addr + && (dst&f->fw_dmsk.s_addr)==f->fw_dst.s_addr) + /* normal direction */ + match |= 0x01; + + if ((f->fw_flg & IP_FW_F_BIDIR) && + (dst&f->fw_smsk.s_addr)==f->fw_src.s_addr + && (src&f->fw_dmsk.s_addr)==f->fw_dst.s_addr) + /* reverse direction */ + match |= 0x02; + + if (match) + { + /* + * Look for a VIA match + */ + if(f->fw_via.s_addr && rif) + { + if(rif->pa_addr!=f->fw_via.s_addr) + continue; /* Mismatch */ + } + /* + * Drop through - this is a match + */ + } + else + continue; + + /* + * Ok the chain addresses match. + */ + + f_prt=f->fw_flg&IP_FW_F_KIND; + if (f_prt!=IP_FW_F_ALL) + { + /* + * This is actually buggy as if you set SYN flag + * on UDP or ICMP firewall it will never work,but + * actually it is a concern of software which sets + * firewall entries. + */ + + if((f->fw_flg&IP_FW_F_TCPSYN) && notcpsyn) + continue; + /* + * Specific firewall - packet's protocol + * must match firewall's. + */ + + if(prt!=f_prt) + continue; + + if(!(prt==IP_FW_F_ICMP || ((match & 0x01) && + port_match(&f->fw_pts[0], f->fw_nsp, src_port, + f->fw_flg&IP_FW_F_SRNG) && + port_match(&f->fw_pts[f->fw_nsp], f->fw_ndp, dst_port, + f->fw_flg&IP_FW_F_DRNG)) || ((match & 0x02) && + port_match(&f->fw_pts[0], f->fw_nsp, dst_port, + f->fw_flg&IP_FW_F_SRNG) && + port_match(&f->fw_pts[f->fw_nsp], f->fw_ndp, src_port, + f->fw_flg&IP_FW_F_DRNG)))) + { + continue; + } + } +#ifdef CONFIG_IP_FIREWALL_VERBOSE + /* + * VERY ugly piece of code which actually + * makes kernel printf for denied packets... + */ + + if (f->fw_flg & IP_FW_F_PRN) + { + if(opt != 1) { + if(f->fw_flg&IP_FW_F_ACCEPT) + printk("Accept "); + else if(f->fw_flg&IP_FW_F_ICMPRPL) + printk("Reject "); + else + printk("Deny "); + } + switch(ip->protocol) + { + case IPPROTO_TCP: + printk("TCP "); + break; + case IPPROTO_UDP: + printk("UDP "); + case IPPROTO_ICMP: + printk("ICMP "); + break; + default: + printk("p=%d ",ip->protocol); + break; + } + print_ip(ip->saddr); + if(ip->protocol == IPPROTO_TCP || ip->protocol == IPPROTO_UDP) + printk(":%d", src_port); + printk(" "); + print_ip(ip->daddr); + if(ip->protocol == IPPROTO_TCP || ip->protocol == IPPROTO_UDP) + printk(":%d",dst_port); + printk("\n"); + } +#endif + if (opt != 2) { + f->fw_bcnt+=ntohs(ip->tot_len); + f->fw_pcnt++; + } + if (opt != 1) + break; + } /* Loop */ + + if(opt == 1) + return 0; + + /* + * We rely on policy defined in the rejecting entry or, if no match + * was found, we rely on the general policy variable for this type + * of firewall. + */ + + if(f!=NULL) /* A match was found */ + f_flag=f->fw_flg; + else + f_flag=policy; + if(f_flag&IP_FW_F_ACCEPT) + return 1; + if(f_flag&IP_FW_F_ICMPRPL) + return -1; + return 0; +} + + +static void zero_fw_chain(struct ip_fw *chainptr) +{ + struct ip_fw *ctmp=chainptr; + while(ctmp) + { + ctmp->fw_pcnt=0L; + ctmp->fw_bcnt=0L; + ctmp=ctmp->fw_next; + } +} + +static void free_fw_chain(struct ip_fw *volatile* chainptr) +{ + unsigned long flags; + save_flags(flags); + cli(); + while ( *chainptr != NULL ) + { + struct ip_fw *ftmp; + ftmp = *chainptr; + *chainptr = ftmp->fw_next; + kfree_s(ftmp,sizeof(*ftmp)); + } + restore_flags(flags); +} + +/* Volatiles to keep some of the compiler versions amused */ + +static int add_to_chain(struct ip_fw *volatile* chainptr, struct ip_fw *frwl) +{ + struct ip_fw *ftmp; + struct ip_fw *chtmp=NULL; + struct ip_fw *volatile chtmp_prev=NULL; + unsigned long flags; + unsigned long m_src_mask,m_dst_mask; + unsigned long n_sa,n_da,o_sa,o_da,o_sm,o_dm,n_sm,n_dm; + unsigned short n_sr,n_dr,o_sr,o_dr; + unsigned short oldkind,newkind; + int addb4=0; + int n_o,n_n; + + save_flags(flags); + + ftmp = kmalloc( sizeof(struct ip_fw), GFP_ATOMIC ); + if ( ftmp == NULL ) + { +#ifdef DEBUG_CONFIG_IP_FIREWALL + printf("ip_fw_ctl: malloc said no\n"); +#endif + return( ENOMEM ); + } + + memcpy(ftmp, frwl, sizeof( struct ip_fw ) ); + + ftmp->fw_pcnt=0L; + ftmp->fw_bcnt=0L; + + ftmp->fw_next = NULL; + + cli(); + + if (*chainptr==NULL) + { + *chainptr=ftmp; + } + else + { + chtmp_prev=NULL; + for (chtmp=*chainptr;chtmp!=NULL;chtmp=chtmp->fw_next) + { + addb4=0; + newkind=ftmp->fw_flg & IP_FW_F_KIND; + oldkind=chtmp->fw_flg & IP_FW_F_KIND; + + if (newkind!=IP_FW_F_ALL + && oldkind!=IP_FW_F_ALL + && oldkind!=newkind) + { + chtmp_prev=chtmp; + continue; + } + + /* + * Very very *UGLY* code... + * Sorry,but i had to do this.... + */ + + n_sa=ntohl(ftmp->fw_src.s_addr); + n_da=ntohl(ftmp->fw_dst.s_addr); + n_sm=ntohl(ftmp->fw_smsk.s_addr); + n_dm=ntohl(ftmp->fw_dmsk.s_addr); + + o_sa=ntohl(chtmp->fw_src.s_addr); + o_da=ntohl(chtmp->fw_dst.s_addr); + o_sm=ntohl(chtmp->fw_smsk.s_addr); + o_dm=ntohl(chtmp->fw_dmsk.s_addr); + + m_src_mask = o_sm & n_sm; + m_dst_mask = o_dm & n_dm; + + if ((o_sa & m_src_mask) == (n_sa & m_src_mask)) + { + if (n_sm > o_sm) + addb4++; + if (n_sm < o_sm) + addb4--; + } + + if ((o_da & m_dst_mask) == (n_da & m_dst_mask)) + { + if (n_dm > o_dm) + addb4++; + if (n_dm < o_dm) + addb4--; + } + + if (((o_da & o_dm) == (n_da & n_dm)) + &&((o_sa & o_sm) == (n_sa & n_sm))) + { + if (newkind!=IP_FW_F_ALL && + oldkind==IP_FW_F_ALL) + addb4++; + if (newkind==oldkind && (oldkind==IP_FW_F_TCP + || oldkind==IP_FW_F_UDP)) + { + + /* + * Here the main idea is to check the size + * of port range which the frwl covers + * We actually don't check their values but + * just the wideness of range they have + * so that less wide ranges or single ports + * go first and wide ranges go later. No ports + * at all treated as a range of maximum number + * of ports. + */ + + if (ftmp->fw_flg & IP_FW_F_SRNG) + n_sr=ftmp->fw_pts[1]-ftmp->fw_pts[0]; + else + n_sr=(ftmp->fw_nsp)? + ftmp->fw_nsp : 0xFFFF; + + if (chtmp->fw_flg & IP_FW_F_SRNG) + o_sr=chtmp->fw_pts[1]-chtmp->fw_pts[0]; + else + o_sr=(chtmp->fw_nsp)?chtmp->fw_nsp : 0xFFFF; + + if (n_sr<o_sr) + addb4++; + if (n_sr>o_sr) + addb4--; + + n_n=ftmp->fw_nsp; + n_o=chtmp->fw_nsp; + + /* + * Actually this cannot happen as the frwl control + * procedure checks for number of ports in source and + * destination range but we will try to be more safe. + */ + + if ((n_n>(IP_FW_MAX_PORTS-2)) || + (n_o>(IP_FW_MAX_PORTS-2))) + goto skip_check; + + if (ftmp->fw_flg & IP_FW_F_DRNG) + n_dr=ftmp->fw_pts[n_n+1]-ftmp->fw_pts[n_n]; + else + n_dr=(ftmp->fw_ndp)? ftmp->fw_ndp : 0xFFFF; + + if (chtmp->fw_flg & IP_FW_F_DRNG) + o_dr=chtmp->fw_pts[n_o+1]-chtmp->fw_pts[n_o]; + else + o_dr=(chtmp->fw_ndp)? chtmp->fw_ndp : 0xFFFF; + if (n_dr<o_dr) + addb4++; + if (n_dr>o_dr) + addb4--; +skip_check: + } + /* finally look at the interface address */ + if ((addb4 == 0) && ftmp->fw_via.s_addr && + !(chtmp->fw_via.s_addr)) + addb4++; + } + if (addb4>0) + { + if (chtmp_prev) + { + chtmp_prev->fw_next=ftmp; + ftmp->fw_next=chtmp; + } + else + { + *chainptr=ftmp; + ftmp->fw_next=chtmp; + } + restore_flags(flags); + return 0; + } + chtmp_prev=chtmp; + } + } + + if (chtmp_prev) + chtmp_prev->fw_next=ftmp; + else + *chainptr=ftmp; + restore_flags(flags); + return(0); +} + +static int del_from_chain(struct ip_fw *volatile*chainptr, struct ip_fw *frwl) +{ + struct ip_fw *ftmp,*ltmp; + unsigned short tport1,tport2,tmpnum; + char matches,was_found; + unsigned long flags; + + save_flags(flags); + cli(); + + ftmp=*chainptr; + + if ( ftmp == NULL ) + { +#ifdef DEBUG_CONFIG_IP_FIREWALL + printk("ip_fw_ctl: chain is empty\n"); +#endif + restore_flags(flags); + return( EINVAL ); + } + + ltmp=NULL; + was_found=0; + + while( ftmp != NULL ) + { + matches=1; + if (ftmp->fw_src.s_addr!=frwl->fw_src.s_addr + || ftmp->fw_dst.s_addr!=frwl->fw_dst.s_addr + || ftmp->fw_smsk.s_addr!=frwl->fw_smsk.s_addr + || ftmp->fw_dmsk.s_addr!=frwl->fw_dmsk.s_addr + || ftmp->fw_via.s_addr!=frwl->fw_via.s_addr + || ftmp->fw_flg!=frwl->fw_flg) + matches=0; + + tport1=ftmp->fw_nsp+ftmp->fw_ndp; + tport2=frwl->fw_nsp+frwl->fw_ndp; + if (tport1!=tport2) + matches=0; + else if (tport1!=0) + { + for (tmpnum=0;tmpnum < tport1 && tmpnum < IP_FW_MAX_PORTS;tmpnum++) + if (ftmp->fw_pts[tmpnum]!=frwl->fw_pts[tmpnum]) + matches=0; + } + if(matches) + { + was_found=1; + if (ltmp) + { + ltmp->fw_next=ftmp->fw_next; + kfree_s(ftmp,sizeof(*ftmp)); + ftmp=ltmp->fw_next; + } + else + { + *chainptr=ftmp->fw_next; + kfree_s(ftmp,sizeof(*ftmp)); + ftmp=*chainptr; + } + } + else + { + ltmp = ftmp; + ftmp = ftmp->fw_next; + } + } + restore_flags(flags); + if (was_found) + return 0; + else + return(EINVAL); +} + +#endif /* CONFIG_IP_ACCT || CONFIG_IP_FIREWALL */ + +struct ip_fw *check_ipfw_struct(struct ip_fw *frwl, int len) +{ + + if ( len != sizeof(struct ip_fw) ) + { +#ifdef DEBUG_CONFIG_IP_FIREWALL + printk("ip_fw_ctl: len=%d, want %d\n",m->m_len, + sizeof(struct ip_fw)); +#endif + return(NULL); + } + + if ( (frwl->fw_flg & ~IP_FW_F_MASK) != 0 ) + { +#ifdef DEBUG_CONFIG_IP_FIREWALL + printk("ip_fw_ctl: undefined flag bits set (flags=%x)\n", + frwl->fw_flg); +#endif + return(NULL); + } + + if ( (frwl->fw_flg & IP_FW_F_SRNG) && frwl->fw_nsp < 2 ) + { +#ifdef DEBUG_CONFIG_IP_FIREWALL + printk("ip_fw_ctl: src range set but n_src_p=%d\n", + frwl->fw_nsp); +#endif + return(NULL); + } + + if ( (frwl->fw_flg & IP_FW_F_DRNG) && frwl->fw_ndp < 2 ) + { +#ifdef DEBUG_CONFIG_IP_FIREWALL + printk("ip_fw_ctl: dst range set but n_dst_p=%d\n", + frwl->fw_ndp); +#endif + return(NULL); + } + + if ( frwl->fw_nsp + frwl->fw_ndp > IP_FW_MAX_PORTS ) + { +#ifdef DEBUG_CONFIG_IP_FIREWALL + printk("ip_fw_ctl: too many ports (%d+%d)\n", + frwl->fw_nsp,frwl->fw_ndp); +#endif + return(NULL); + } + + return frwl; +} + + + + +#ifdef CONFIG_IP_ACCT + +void ip_acct_cnt(struct iphdr *iph, struct device *dev, struct ip_fw *f) +{ + (void) ip_fw_chk(iph, dev, f, 0, 1); + return; +} + +int ip_acct_ctl(int stage, void *m, int len) +{ + if ( stage == IP_ACCT_FLUSH ) + { + free_fw_chain(&ip_acct_chain); + return(0); + } + if ( stage == IP_ACCT_ZERO ) + { + zero_fw_chain(ip_acct_chain); + return(0); + } + if ( stage == IP_ACCT_ADD + || stage == IP_ACCT_DEL + ) + { + struct ip_fw *frwl; + + if (!(frwl=check_ipfw_struct(m,len))) + return (EINVAL); + + switch (stage) + { + case IP_ACCT_ADD: + return( add_to_chain(&ip_acct_chain,frwl)); + case IP_ACCT_DEL: + return( del_from_chain(&ip_acct_chain,frwl)); + default: + /* + * Should be panic but... (Why ??? - AC) + */ +#ifdef DEBUG_CONFIG_IP_FIREWALL + printf("ip_acct_ctl: unknown request %d\n",stage); +#endif + return(EINVAL); + } + } +#ifdef DEBUG_CONFIG_IP_FIREWALL + printf("ip_acct_ctl: unknown request %d\n",stage); +#endif + return(EINVAL); +} +#endif + +#ifdef CONFIG_IP_FIREWALL +int ip_fw_ctl(int stage, void *m, int len) +{ + int ret; + + if ( stage == IP_FW_FLUSH_BLK ) + { + free_fw_chain(&ip_fw_blk_chain); + return(0); + } + + if ( stage == IP_FW_FLUSH_FWD ) + { + free_fw_chain(&ip_fw_fwd_chain); + return(0); + } + + if ( stage == IP_FW_ZERO_BLK ) + { + zero_fw_chain(ip_fw_blk_chain); + return(0); + } + + if ( stage == IP_FW_ZERO_FWD ) + { + zero_fw_chain(ip_fw_fwd_chain); + return(0); + } + + if ( stage == IP_FW_POLICY_BLK || stage == IP_FW_POLICY_FWD ) + { + int *tmp_policy_ptr; + tmp_policy_ptr=(int *)m; + if ( stage == IP_FW_POLICY_BLK ) + ip_fw_blk_policy=*tmp_policy_ptr; + else + ip_fw_fwd_policy=*tmp_policy_ptr; + return 0; + } + + if ( stage == IP_FW_CHK_BLK || stage == IP_FW_CHK_FWD ) + { + struct device viadev; + struct ip_fwpkt *ipfwp; + struct iphdr *ip; + + if ( len < sizeof(struct ip_fwpkt) ) + { +#ifdef DEBUG_CONFIG_IP_FIREWALL + printf("ip_fw_ctl: length=%d, expected %d\n", + len, sizeof(struct ip_fwpkt)); +#endif + return( EINVAL ); + } + + ipfwp = (struct ip_fwpkt *)m; + ip = &(ipfwp->fwp_iph); + + if ( ip->ihl != sizeof(struct iphdr) / sizeof(int)) + { +#ifdef DEBUG_CONFIG_IP_FIREWALL + printf("ip_fw_ctl: ip->ihl=%d, want %d\n",ip->ihl, + sizeof(struct ip)/sizeof(int)); +#endif + return(EINVAL); + } + + viadev.pa_addr = ipfwp->fwp_via.s_addr; + + if ((ret = ip_fw_chk(ip, &viadev, + stage == IP_FW_CHK_BLK ? + ip_fw_blk_chain : ip_fw_fwd_chain, + stage == IP_FW_CHK_BLK ? + ip_fw_blk_policy : ip_fw_fwd_policy, 2 )) > 0 + ) + return(0); + else if (ret == -1) + return(ECONNREFUSED); + else + return(ETIMEDOUT); + } + +/* + * Here we really working hard-adding new elements + * to blocking/forwarding chains or deleting 'em + */ + + if ( stage == IP_FW_ADD_BLK || stage == IP_FW_ADD_FWD + || stage == IP_FW_DEL_BLK || stage == IP_FW_DEL_FWD + ) + { + struct ip_fw *frwl; + frwl=check_ipfw_struct(m,len); + if (frwl==NULL) + return (EINVAL); + + switch (stage) + { + case IP_FW_ADD_BLK: + return(add_to_chain(&ip_fw_blk_chain,frwl)); + case IP_FW_ADD_FWD: + return(add_to_chain(&ip_fw_fwd_chain,frwl)); + case IP_FW_DEL_BLK: + return(del_from_chain(&ip_fw_blk_chain,frwl)); + case IP_FW_DEL_FWD: + return(del_from_chain(&ip_fw_fwd_chain,frwl)); + default: + /* + * Should be panic but... (Why are BSD people panic obsessed ??) + */ +#ifdef DEBUG_CONFIG_IP_FIREWALL + printk("ip_fw_ctl: unknown request %d\n",stage); +#endif + return(EINVAL); + } + } + +#ifdef DEBUG_CONFIG_IP_FIREWALL + printf("ip_fw_ctl: unknown request %d\n",stage); +#endif + return(EINVAL); +} +#endif /* CONFIG_IP_FIREWALL */ + +#if defined(CONFIG_IP_FIREWALL) || defined(CONFIG_IP_ACCT) + +static int ip_chain_procinfo(int stage, char *buffer, char **start, + off_t offset, int length, int reset) +{ + off_t pos=0, begin=0; + struct ip_fw *i; + unsigned long flags; + int len, p; + + + switch(stage) + { +#ifdef CONFIG_IP_FIREWALL + case IP_INFO_BLK: + i = ip_fw_blk_chain; + len=sprintf(buffer, "IP firewall block rules, default %d\n", + ip_fw_blk_policy); + break; + case IP_INFO_FWD: + i = ip_fw_fwd_chain; + len=sprintf(buffer, "IP firewall forward rules, default %d\n", + ip_fw_fwd_policy); + break; +#endif +#ifdef CONFIG_IP_ACCT + case IP_INFO_ACCT: + i = ip_acct_chain; + len=sprintf(buffer,"IP accounting rules\n"); + break; +#endif + default: + /* this should never be reached, but safety first... */ + i = NULL; + len=0; + break; + } + + save_flags(flags); + cli(); + + while(i!=NULL) + { + len+=sprintf(buffer+len,"%08lX/%08lX->%08lX/%08lX %08lX %X ", + ntohl(i->fw_src.s_addr),ntohl(i->fw_smsk.s_addr), + ntohl(i->fw_dst.s_addr),ntohl(i->fw_dmsk.s_addr), + ntohl(i->fw_via.s_addr),i->fw_flg); + len+=sprintf(buffer+len,"%u %u %lu %lu", + i->fw_nsp,i->fw_ndp, i->fw_pcnt,i->fw_bcnt); + for (p = 0; p < IP_FW_MAX_PORTS; p++) + len+=sprintf(buffer+len, " %u", i->fw_pts[p]); + buffer[len++]='\n'; + buffer[len]='\0'; + pos=begin+len; + if(pos<offset) + { + len=0; + begin=pos; + } + else if(reset) + { + /* This needs to be done at this specific place! */ + i->fw_pcnt=0L; + i->fw_bcnt=0L; + } + if(pos>offset+length) + break; + i=i->fw_next; + } + restore_flags(flags); + *start=buffer+(offset-begin); + len-=(offset-begin); + if(len>length) + len=length; + return len; +} +#endif + +#ifdef CONFIG_IP_ACCT + +int ip_acct_procinfo(char *buffer, char **start, off_t offset, int length, int reset) +{ + return ip_chain_procinfo(IP_INFO_ACCT, buffer,start,offset,length,reset); +} + +#endif + +#ifdef CONFIG_IP_FIREWALL + +int ip_fw_blk_procinfo(char *buffer, char **start, off_t offset, int length, int reset) +{ + return ip_chain_procinfo(IP_INFO_BLK, buffer,start,offset,length,reset); +} + +int ip_fw_fwd_procinfo(char *buffer, char **start, off_t offset, int length, int reset) +{ + return ip_chain_procinfo(IP_INFO_FWD, buffer,start,offset,length,reset); +} + +#endif diff --git a/pfinet/linux-inet/ipx.c b/pfinet/linux-inet/ipx.c new file mode 100644 index 00000000..88b53c30 --- /dev/null +++ b/pfinet/linux-inet/ipx.c @@ -0,0 +1,1947 @@ +/* + * Implements an IPX socket layer (badly - but I'm working on it). + * + * This code is derived from work by + * Ross Biro : Writing the original IP stack + * Fred Van Kempen : Tidying up the TCP/IP + * + * Many thanks go to Keith Baker, Institute For Industrial Information + * Technology Ltd, Swansea University for allowing me to work on this + * in my own time even though it was in some ways related to commercial + * work I am currently employed to do there. + * + * All the material in this file is subject to the Gnu license version 2. + * Neither Alan Cox nor the Swansea University Computer Society admit liability + * nor provide warranty for any of this software. This material is provided + * as is and at no charge. + * + * Revision 0.21: Uses the new generic socket option code. + * Revision 0.22: Gcc clean ups and drop out device registration. Use the + * new multi-protocol edition of hard_header + * Revision 0.23: IPX /proc by Mark Evans. + * Adding a route will overwrite any existing route to the same + * network. + * Revision 0.24: Supports new /proc with no 4K limit + * Revision 0.25: Add ephemeral sockets, passive local network + * identification, support for local net 0 and + * multiple datalinks <Greg Page> + * Revision 0.26: Device drop kills IPX routes via it. (needed for modules) + * Revision 0.27: Autobind <Mark Evans> + * Revision 0.28: Small fix for multiple local networks <Thomas Winder> + * Revision 0.29: Assorted major errors removed <Mark Evans> + * Small correction to promisc mode error fix <Alan Cox> + * Asynchronous I/O support. + * Changed to use notifiers and the newer packet_type stuff. + * Assorted major fixes <Alejandro Liu> + * + * Portions Copyright (c) 1995 Caldera, Inc. <greg@caldera.com> + * Neither Greg Page nor Caldera, Inc. admit liability nor provide + * warranty for any of this software. This material is provided + * "AS-IS" and at no charge. + */ + +#include <linux/config.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/in.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/timer.h> +#include <linux/string.h> +#include <linux/sockios.h> +#include <linux/net.h> +#include <linux/ipx.h> +#include <linux/inet.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include "sock.h" +#include <asm/segment.h> +#include <asm/system.h> +#include <linux/fcntl.h> +#include <linux/mm.h> +#include <linux/termios.h> /* For TIOCOUTQ/INQ */ +#include <linux/interrupt.h> +#include "p8022.h" +#include "psnap.h" + +#ifdef CONFIG_IPX +/* Configuration Variables */ +static unsigned char ipxcfg_max_hops = 16; +static char ipxcfg_auto_select_primary = 0; +static char ipxcfg_auto_create_interfaces = 0; + +/* Global Variables */ +static struct datalink_proto *p8022_datalink = NULL; +static struct datalink_proto *pEII_datalink = NULL; +static struct datalink_proto *p8023_datalink = NULL; +static struct datalink_proto *pSNAP_datalink = NULL; + +static ipx_interface *ipx_interfaces = NULL; +static ipx_route *ipx_routes = NULL; +static ipx_interface *ipx_internal_net = NULL; +static ipx_interface *ipx_primary_net = NULL; + +static int +ipxcfg_set_auto_create(char val) +{ + ipxcfg_auto_create_interfaces = val; + return 0; +} + +static int +ipxcfg_set_auto_select(char val) +{ + ipxcfg_auto_select_primary = val; + if (val && (ipx_primary_net == NULL)) + ipx_primary_net = ipx_interfaces; + return 0; +} + +static int +ipxcfg_get_config_data(ipx_config_data *arg) +{ + ipx_config_data vals; + + vals.ipxcfg_auto_create_interfaces = ipxcfg_auto_create_interfaces; + vals.ipxcfg_auto_select_primary = ipxcfg_auto_select_primary; + memcpy_tofs(arg, &vals, sizeof(vals)); + return 0; +} + + +/***********************************************************************************************************************\ +* * +* Handlers for the socket list. * +* * +\***********************************************************************************************************************/ + +/* + * Note: Sockets may not be removed _during_ an interrupt or inet_bh + * handler using this technique. They can be added although we do not + * use this facility. + */ + +static void +ipx_remove_socket(ipx_socket *sk) +{ + ipx_socket *s; + ipx_interface *intrfc; + unsigned long flags; + + save_flags(flags); + cli(); + + /* Determine interface with which socket is associated */ + intrfc = sk->ipx_intrfc; + if (intrfc == NULL) { + restore_flags(flags); + return; + } + + s=intrfc->if_sklist; + if(s==sk) { + intrfc->if_sklist=s->next; + restore_flags(flags); + return; + } + + while(s && s->next) { + if(s->next==sk) { + s->next=sk->next; + restore_flags(flags); + return; + } + s=s->next; + } + restore_flags(flags); +} + +/* + * This is only called from user mode. Thus it protects itself against + * interrupt users but doesn't worry about being called during work. + * Once it is removed from the queue no interrupt or bottom half will + * touch it and we are (fairly 8-) ) safe. + */ + +static void +ipx_destroy_socket(ipx_socket *sk) +{ + struct sk_buff *skb; + + ipx_remove_socket(sk); + while((skb=skb_dequeue(&sk->receive_queue))!=NULL) { + kfree_skb(skb,FREE_READ); + } + + kfree_s(sk,sizeof(*sk)); +} + +/* The following code is used to support IPX Interfaces (IPXITF). An + * IPX interface is defined by a physical device and a frame type. + */ + +static ipx_route * ipxrtr_lookup(unsigned long); + +static void +ipxitf_clear_primary_net(void) +{ + if (ipxcfg_auto_select_primary && (ipx_interfaces != NULL)) + ipx_primary_net = ipx_interfaces; + else + ipx_primary_net = NULL; +} + +static ipx_interface * +ipxitf_find_using_phys(struct device *dev, unsigned short datalink) +{ + ipx_interface *i; + + for (i=ipx_interfaces; + i && ((i->if_dev!=dev) || (i->if_dlink_type!=datalink)); + i=i->if_next) + ; + return i; +} + +static ipx_interface * +ipxitf_find_using_net(unsigned long net) +{ + ipx_interface *i; + + if (net == 0L) + return ipx_primary_net; + + for (i=ipx_interfaces; i && (i->if_netnum!=net); i=i->if_next) + ; + + return i; +} + +/* Sockets are bound to a particular IPX interface. */ +static void +ipxitf_insert_socket(ipx_interface *intrfc, ipx_socket *sk) +{ + ipx_socket *s; + + sk->ipx_intrfc = intrfc; + sk->next = NULL; + if (intrfc->if_sklist == NULL) { + intrfc->if_sklist = sk; + } else { + for (s = intrfc->if_sklist; s->next != NULL; s = s->next) + ; + s->next = sk; + } +} + +static ipx_socket * +ipxitf_find_socket(ipx_interface *intrfc, unsigned short port) +{ + ipx_socket *s; + + for (s=intrfc->if_sklist; + (s != NULL) && (s->ipx_port != port); + s=s->next) + ; + + return s; +} + +static void ipxrtr_del_routes(ipx_interface *); + +static void +ipxitf_down(ipx_interface *intrfc) +{ + ipx_interface *i; + ipx_socket *s, *t; + + /* Delete all routes associated with this interface */ + ipxrtr_del_routes(intrfc); + + /* error sockets */ + for (s = intrfc->if_sklist; s != NULL; ) { + s->err = ENOLINK; + s->error_report(s); + s->ipx_intrfc = NULL; + s->ipx_port = 0; + s->zapped=1; /* Indicates it is no longer bound */ + t = s; + s = s->next; + t->next = NULL; + } + intrfc->if_sklist = NULL; + + /* remove this interface from list */ + if (intrfc == ipx_interfaces) { + ipx_interfaces = intrfc->if_next; + } else { + for (i = ipx_interfaces; + (i != NULL) && (i->if_next != intrfc); + i = i->if_next) + ; + if ((i != NULL) && (i->if_next == intrfc)) + i->if_next = intrfc->if_next; + } + + /* remove this interface from *special* networks */ + if (intrfc == ipx_primary_net) + ipxitf_clear_primary_net(); + if (intrfc == ipx_internal_net) + ipx_internal_net = NULL; + + kfree_s(intrfc, sizeof(*intrfc)); +} + +static int +ipxitf_device_event(unsigned long event, void *ptr) +{ + struct device *dev = ptr; + ipx_interface *i, *tmp; + + if(event!=NETDEV_DOWN) + return NOTIFY_DONE; + + for (i = ipx_interfaces; i != NULL; ) { + + tmp = i->if_next; + if (i->if_dev == dev) + ipxitf_down(i); + i = tmp; + + } + + return NOTIFY_DONE; +} + +static int +ipxitf_def_skb_handler(struct sock *sock, struct sk_buff *skb) +{ + int retval; + + if((retval = sock_queue_rcv_skb(sock, skb))<0) { + /* + * We do a FREE_WRITE here because this indicates how + * to treat the socket with which the packet is + * associated. If this packet is associated with a + * socket at all, it must be the originator of the + * packet. Incoming packets will have no socket + * associated with them at this point. + */ + kfree_skb(skb,FREE_WRITE); + } + return retval; +} + +static int +ipxitf_demux_socket(ipx_interface *intrfc, struct sk_buff *skb, int copy) +{ + ipx_packet *ipx = (ipx_packet *)(skb->h.raw); + ipx_socket *sock1 = NULL, *sock2 = NULL; + struct sk_buff *skb1 = NULL, *skb2 = NULL; + int ipx_offset; + + sock1 = ipxitf_find_socket(intrfc, ipx->ipx_dest.sock); + + /* + * We need to check if there is a primary net and if + * this is addressed to one of the *SPECIAL* sockets because + * these need to be propagated to the primary net. + * The *SPECIAL* socket list contains: 0x452(SAP), 0x453(RIP) and + * 0x456(Diagnostic). + */ + if (ipx_primary_net && (intrfc != ipx_primary_net)) { + switch (ntohs(ipx->ipx_dest.sock)) { + case 0x452: + case 0x453: + case 0x456: + /* + * The appropriate thing to do here is to + * dup the packet and route to the primary net + * interface via ipxitf_send; however, we'll cheat + * and just demux it here. + */ + sock2 = ipxitf_find_socket(ipx_primary_net, + ipx->ipx_dest.sock); + break; + default: + break; + } + } + + /* if there is nothing to do, return */ + if ((sock1 == NULL) && (sock2 == NULL)) { + if (!copy) + kfree_skb(skb,FREE_WRITE); + return 0; + } + + ipx_offset = (char *)(skb->h.raw) - (char *)(skb->data); + + /* This next segment of code is a little awkward, but it sets it up + * so that the appropriate number of copies of the SKB are made and + * that skb1 and skb2 point to it (them) so that it (they) can be + * demuxed to sock1 and/or sock2. If we are unable to make enough + * copies, we do as much as is possible. + */ + if (copy) { + skb1 = skb_clone(skb, GFP_ATOMIC); + if (skb1 != NULL) { + skb1->h.raw = (unsigned char *)&(skb1->data[ipx_offset]); + skb1->arp = skb1->free = 1; + } + } else { + skb1 = skb; + } + + if (skb1 == NULL) return -ENOMEM; + + /* Do we need 2 SKBs? */ + if (sock1 && sock2) { + skb2 = skb_clone(skb1, GFP_ATOMIC); + if (skb2 != NULL) { + skb2->h.raw = (unsigned char *)&(skb2->data[ipx_offset]); + skb2->arp = skb2->free = 1; + } + } else { + skb2 = skb1; + } + + if (sock1) { + (void) ipxitf_def_skb_handler(sock1, skb1); + } + + if (skb2 == NULL) return -ENOMEM; + + if (sock2) { + (void) ipxitf_def_skb_handler(sock2, skb2); + } + + return 0; +} + +static struct sk_buff * +ipxitf_adjust_skbuff(ipx_interface *intrfc, struct sk_buff *skb) +{ + struct sk_buff *skb2; + int in_offset = skb->h.raw - skb->data; + int out_offset = intrfc->if_ipx_offset; + char *oldraw; + int len; + + /* Hopefully, most cases */ + if (in_offset == out_offset) { + skb->len += out_offset; + skb->arp = skb->free = 1; + return skb; + } + + /* Existing SKB will work, just need to move things around a little */ + if (in_offset > out_offset) { + oldraw = skb->h.raw; + skb->h.raw = &(skb->data[out_offset]); + memmove(skb->h.raw, oldraw, skb->len); + skb->len += out_offset; + skb->arp = skb->free = 1; + return skb; + } + + /* Need new SKB */ + len = skb->len + out_offset; + skb2 = alloc_skb(len, GFP_ATOMIC); + if (skb2 != NULL) { + skb2->h.raw = &(skb2->data[out_offset]); + skb2->len = len; + skb2->free=1; + skb2->arp=1; + memcpy(skb2->h.raw, skb->h.raw, skb->len); + } + kfree_skb(skb, FREE_WRITE); + return skb2; +} + +static int +ipxitf_send(ipx_interface *intrfc, struct sk_buff *skb, char *node) +{ + ipx_packet *ipx = (ipx_packet *)(skb->h.raw); + struct device *dev = intrfc->if_dev; + struct datalink_proto *dl = intrfc->if_dlink; + char dest_node[IPX_NODE_LEN]; + int send_to_wire = 1; + int addr_len; + + /* We need to know how many skbuffs it will take to send out this + * packet to avoid unnecessary copies. + */ + if ((dl == NULL) || (dev == NULL) || (dev->flags & IFF_LOOPBACK)) + send_to_wire = 0; + + /* See if this should be demuxed to sockets on this interface */ + if (ipx->ipx_dest.net == intrfc->if_netnum) { + if (memcmp(intrfc->if_node, node, IPX_NODE_LEN) == 0) + return ipxitf_demux_socket(intrfc, skb, 0); + if (memcmp(ipx_broadcast_node, node, IPX_NODE_LEN) == 0) { + ipxitf_demux_socket(intrfc, skb, send_to_wire); + if (!send_to_wire) return 0; + } + } + + /* if the originating net is not equal to our net; this is routed */ + if (ipx->ipx_source.net != intrfc->if_netnum) { + if (++(ipx->ipx_tctrl) > ipxcfg_max_hops) + send_to_wire = 0; + } + + if (!send_to_wire) { + /* + * We do a FREE_WRITE here because this indicates how + * to treat the socket with which the packet is + * associated. If this packet is associated with a + * socket at all, it must be the originator of the + * packet. Routed packets will have no socket associated + * with them. + */ + kfree_skb(skb,FREE_WRITE); + return 0; + } + + /* determine the appropriate hardware address */ + addr_len = dev->addr_len; + if (memcmp(ipx_broadcast_node, node, IPX_NODE_LEN) == 0) { + memcpy(dest_node, dev->broadcast, addr_len); + } else { + memcpy(dest_node, &(node[IPX_NODE_LEN-addr_len]), addr_len); + } + + /* make any compensation for differing physical/data link size */ + skb = ipxitf_adjust_skbuff(intrfc, skb); + if (skb == NULL) return 0; + + /* set up data link and physical headers */ + skb->dev = dev; + dl->datalink_header(dl, skb, dest_node); + + if (skb->sk != NULL) { + /* This is an outbound packet from this host. We need to + * increment the write count. + */ + skb->sk->wmem_alloc += skb->mem_len; + } + + /* Send it out */ + dev_queue_xmit(skb, dev, SOPRI_NORMAL); + return 0; +} + +static int +ipxrtr_add_route(unsigned long, ipx_interface *, unsigned char *); + +static int +ipxitf_add_local_route(ipx_interface *intrfc) +{ + return ipxrtr_add_route(intrfc->if_netnum, intrfc, NULL); +} + +static char * ipx_frame_name(unsigned short); +static char * ipx_device_name(ipx_interface *); +static int ipxrtr_route_skb(struct sk_buff *); + +static int +ipxitf_rcv(ipx_interface *intrfc, struct sk_buff *skb) +{ + ipx_packet *ipx = (ipx_packet *) (skb->h.raw); + ipx_interface *i; + + /* See if we should update our network number */ + if ((intrfc->if_netnum == 0L) && + (ipx->ipx_source.net == ipx->ipx_dest.net) && + (ipx->ipx_source.net != 0L)) { + /* NB: NetWare servers lie about their hop count so we + * dropped the test based on it. This is the best way + * to determine this is a 0 hop count packet. + */ + if ((i=ipxitf_find_using_net(ipx->ipx_source.net))==NULL) { + intrfc->if_netnum = ipx->ipx_source.net; + (void) ipxitf_add_local_route(intrfc); + } else { + printk("IPX: Network number collision %lx\n\t%s %s and %s %s\n", + htonl(ipx->ipx_source.net), + ipx_device_name(i), + ipx_frame_name(i->if_dlink_type), + ipx_device_name(intrfc), + ipx_frame_name(intrfc->if_dlink_type)); + } + } + + if (ipx->ipx_dest.net == 0L) + ipx->ipx_dest.net = intrfc->if_netnum; + if (ipx->ipx_source.net == 0L) + ipx->ipx_source.net = intrfc->if_netnum; + + if (intrfc->if_netnum != ipx->ipx_dest.net) { + /* We only route point-to-point packets. */ + if ((skb->pkt_type != PACKET_BROADCAST) && + (skb->pkt_type != PACKET_MULTICAST)) + return ipxrtr_route_skb(skb); + + kfree_skb(skb,FREE_READ); + return 0; + } + + /* see if we should keep it */ + if ((memcmp(ipx_broadcast_node, ipx->ipx_dest.node, IPX_NODE_LEN) == 0) + || (memcmp(intrfc->if_node, ipx->ipx_dest.node, IPX_NODE_LEN) == 0)) { + return ipxitf_demux_socket(intrfc, skb, 0); + } + + /* we couldn't pawn it off so unload it */ + kfree_skb(skb,FREE_READ); + return 0; +} + +static void +ipxitf_insert(ipx_interface *intrfc) +{ + ipx_interface *i; + + intrfc->if_next = NULL; + if (ipx_interfaces == NULL) { + ipx_interfaces = intrfc; + } else { + for (i = ipx_interfaces; i->if_next != NULL; i = i->if_next) + ; + i->if_next = intrfc; + } + + if (ipxcfg_auto_select_primary && (ipx_primary_net == NULL)) + ipx_primary_net = intrfc; +} + +static int +ipxitf_create_internal(ipx_interface_definition *idef) +{ + ipx_interface *intrfc; + + /* Only one primary network allowed */ + if (ipx_primary_net != NULL) return -EEXIST; + + /* Must have a valid network number */ + if (idef->ipx_network == 0L) return -EADDRNOTAVAIL; + if (ipxitf_find_using_net(idef->ipx_network) != NULL) + return -EADDRINUSE; + + intrfc=(ipx_interface *)kmalloc(sizeof(ipx_interface),GFP_ATOMIC); + if (intrfc==NULL) + return -EAGAIN; + intrfc->if_dev=NULL; + intrfc->if_netnum=idef->ipx_network; + intrfc->if_dlink_type = 0; + intrfc->if_dlink = NULL; + intrfc->if_sklist = NULL; + intrfc->if_internal = 1; + intrfc->if_ipx_offset = 0; + intrfc->if_sknum = IPX_MIN_EPHEMERAL_SOCKET; + memcpy((char *)&(intrfc->if_node), idef->ipx_node, IPX_NODE_LEN); + ipx_internal_net = intrfc; + ipx_primary_net = intrfc; + ipxitf_insert(intrfc); + return ipxitf_add_local_route(intrfc); +} + +static int +ipx_map_frame_type(unsigned char type) +{ + switch (type) { + case IPX_FRAME_ETHERII: return htons(ETH_P_IPX); + case IPX_FRAME_8022: return htons(ETH_P_802_2); + case IPX_FRAME_SNAP: return htons(ETH_P_SNAP); + case IPX_FRAME_8023: return htons(ETH_P_802_3); + } + return 0; +} + +static int +ipxitf_create(ipx_interface_definition *idef) +{ + struct device *dev; + unsigned short dlink_type = 0; + struct datalink_proto *datalink = NULL; + ipx_interface *intrfc; + + if (idef->ipx_special == IPX_INTERNAL) + return ipxitf_create_internal(idef); + + if ((idef->ipx_special == IPX_PRIMARY) && (ipx_primary_net != NULL)) + return -EEXIST; + + if ((idef->ipx_network != 0L) && + (ipxitf_find_using_net(idef->ipx_network) != NULL)) + return -EADDRINUSE; + + switch (idef->ipx_dlink_type) { + case IPX_FRAME_ETHERII: + dlink_type = htons(ETH_P_IPX); + datalink = pEII_datalink; + break; + case IPX_FRAME_8022: + dlink_type = htons(ETH_P_802_2); + datalink = p8022_datalink; + break; + case IPX_FRAME_SNAP: + dlink_type = htons(ETH_P_SNAP); + datalink = pSNAP_datalink; + break; + case IPX_FRAME_8023: + dlink_type = htons(ETH_P_802_3); + datalink = p8023_datalink; + break; + case IPX_FRAME_NONE: + default: + break; + } + + if (datalink == NULL) + return -EPROTONOSUPPORT; + + dev=dev_get(idef->ipx_device); + if (dev==NULL) + return -ENODEV; + + if (!(dev->flags & IFF_UP)) + return -ENETDOWN; + + /* Check addresses are suitable */ + if(dev->addr_len>IPX_NODE_LEN) + return -EINVAL; + + if ((intrfc = ipxitf_find_using_phys(dev, dlink_type)) == NULL) { + + /* Ok now create */ + intrfc=(ipx_interface *)kmalloc(sizeof(ipx_interface),GFP_ATOMIC); + if (intrfc==NULL) + return -EAGAIN; + intrfc->if_dev=dev; + intrfc->if_netnum=idef->ipx_network; + intrfc->if_dlink_type = dlink_type; + intrfc->if_dlink = datalink; + intrfc->if_sklist = NULL; + intrfc->if_sknum = IPX_MIN_EPHEMERAL_SOCKET; + /* Setup primary if necessary */ + if ((idef->ipx_special == IPX_PRIMARY)) + ipx_primary_net = intrfc; + intrfc->if_internal = 0; + intrfc->if_ipx_offset = dev->hard_header_len + datalink->header_length; + memset(intrfc->if_node, 0, IPX_NODE_LEN); + memcpy((char *)&(intrfc->if_node[IPX_NODE_LEN-dev->addr_len]), dev->dev_addr, dev->addr_len); + + ipxitf_insert(intrfc); + } + + /* If the network number is known, add a route */ + if (intrfc->if_netnum == 0L) + return 0; + + return ipxitf_add_local_route(intrfc); +} + +static int +ipxitf_delete(ipx_interface_definition *idef) +{ + struct device *dev = NULL; + unsigned short dlink_type = 0; + ipx_interface *intrfc; + + if (idef->ipx_special == IPX_INTERNAL) { + if (ipx_internal_net != NULL) { + ipxitf_down(ipx_internal_net); + return 0; + } + return -ENOENT; + } + + dlink_type = ipx_map_frame_type(idef->ipx_dlink_type); + if (dlink_type == 0) + return -EPROTONOSUPPORT; + + dev=dev_get(idef->ipx_device); + if(dev==NULL) return -ENODEV; + + intrfc = ipxitf_find_using_phys(dev, dlink_type); + if (intrfc != NULL) { + ipxitf_down(intrfc); + return 0; + } + return -EINVAL; +} + +static ipx_interface * +ipxitf_auto_create(struct device *dev, unsigned short dlink_type) +{ + struct datalink_proto *datalink = NULL; + ipx_interface *intrfc; + + switch (htons(dlink_type)) { + case ETH_P_IPX: datalink = pEII_datalink; break; + case ETH_P_802_2: datalink = p8022_datalink; break; + case ETH_P_SNAP: datalink = pSNAP_datalink; break; + case ETH_P_802_3: datalink = p8023_datalink; break; + default: return NULL; + } + + if (dev == NULL) + return NULL; + + /* Check addresses are suitable */ + if(dev->addr_len>IPX_NODE_LEN) return NULL; + + intrfc=(ipx_interface *)kmalloc(sizeof(ipx_interface),GFP_ATOMIC); + if (intrfc!=NULL) { + intrfc->if_dev=dev; + intrfc->if_netnum=0L; + intrfc->if_dlink_type = dlink_type; + intrfc->if_dlink = datalink; + intrfc->if_sklist = NULL; + intrfc->if_internal = 0; + intrfc->if_sknum = IPX_MIN_EPHEMERAL_SOCKET; + intrfc->if_ipx_offset = dev->hard_header_len + + datalink->header_length; + memset(intrfc->if_node, 0, IPX_NODE_LEN); + memcpy((char *)&(intrfc->if_node[IPX_NODE_LEN-dev->addr_len]), + dev->dev_addr, dev->addr_len); + ipxitf_insert(intrfc); + } + + return intrfc; +} + +static int +ipxitf_ioctl(unsigned int cmd, void *arg) +{ + int err; + switch(cmd) + { + case SIOCSIFADDR: + { + struct ifreq ifr; + struct sockaddr_ipx *sipx; + ipx_interface_definition f; + err=verify_area(VERIFY_READ,arg,sizeof(ifr)); + if(err) + return err; + memcpy_fromfs(&ifr,arg,sizeof(ifr)); + sipx=(struct sockaddr_ipx *)&ifr.ifr_addr; + if(sipx->sipx_family!=AF_IPX) + return -EINVAL; + f.ipx_network=sipx->sipx_network; + memcpy(f.ipx_device, ifr.ifr_name, sizeof(f.ipx_device)); + memcpy(f.ipx_node, sipx->sipx_node, IPX_NODE_LEN); + f.ipx_dlink_type=sipx->sipx_type; + f.ipx_special=sipx->sipx_special; + if(sipx->sipx_action==IPX_DLTITF) + return ipxitf_delete(&f); + else + return ipxitf_create(&f); + } + case SIOCGIFADDR: + { + struct ifreq ifr; + struct sockaddr_ipx *sipx; + ipx_interface *ipxif; + struct device *dev; + err=verify_area(VERIFY_WRITE,arg,sizeof(ifr)); + if(err) + return err; + memcpy_fromfs(&ifr,arg,sizeof(ifr)); + sipx=(struct sockaddr_ipx *)&ifr.ifr_addr; + dev=dev_get(ifr.ifr_name); + if(!dev) + return -ENODEV; + ipxif=ipxitf_find_using_phys(dev, ipx_map_frame_type(sipx->sipx_type)); + if(ipxif==NULL) + return -EADDRNOTAVAIL; + sipx->sipx_network=ipxif->if_netnum; + memcpy(sipx->sipx_node, ipxif->if_node, sizeof(sipx->sipx_node)); + memcpy_tofs(arg,&ifr,sizeof(ifr)); + return 0; + } + case SIOCAIPXITFCRT: + err=verify_area(VERIFY_READ,arg,sizeof(char)); + if(err) + return err; + return ipxcfg_set_auto_create(get_fs_byte(arg)); + case SIOCAIPXPRISLT: + err=verify_area(VERIFY_READ,arg,sizeof(char)); + if(err) + return err; + return ipxcfg_set_auto_select(get_fs_byte(arg)); + default: + return -EINVAL; + } +} + +/*******************************************************************************************************************\ +* * +* Routing tables for the IPX socket layer * +* * +\*******************************************************************************************************************/ + +static ipx_route * +ipxrtr_lookup(unsigned long net) +{ + ipx_route *r; + + for (r=ipx_routes; (r!=NULL) && (r->ir_net!=net); r=r->ir_next) + ; + + return r; +} + +static int +ipxrtr_add_route(unsigned long network, ipx_interface *intrfc, unsigned char *node) +{ + ipx_route *rt; + + /* Get a route structure; either existing or create */ + rt = ipxrtr_lookup(network); + if (rt==NULL) { + rt=(ipx_route *)kmalloc(sizeof(ipx_route),GFP_ATOMIC); + if(rt==NULL) + return -EAGAIN; + rt->ir_next=ipx_routes; + ipx_routes=rt; + } + + rt->ir_net = network; + rt->ir_intrfc = intrfc; + if (node == NULL) { + memset(rt->ir_router_node, '\0', IPX_NODE_LEN); + rt->ir_routed = 0; + } else { + memcpy(rt->ir_router_node, node, IPX_NODE_LEN); + rt->ir_routed=1; + } + return 0; +} + +static void +ipxrtr_del_routes(ipx_interface *intrfc) +{ + ipx_route **r, *tmp; + + for (r = &ipx_routes; (tmp = *r) != NULL; ) { + if (tmp->ir_intrfc == intrfc) { + *r = tmp->ir_next; + kfree_s(tmp, sizeof(ipx_route)); + } else { + r = &(tmp->ir_next); + } + } +} + +static int +ipxrtr_create(ipx_route_definition *rd) +{ + ipx_interface *intrfc; + + /* Find the appropriate interface */ + intrfc = ipxitf_find_using_net(rd->ipx_router_network); + if (intrfc == NULL) + return -ENETUNREACH; + + return ipxrtr_add_route(rd->ipx_network, intrfc, rd->ipx_router_node); +} + + +static int +ipxrtr_delete(long net) +{ + ipx_route **r; + ipx_route *tmp; + + for (r = &ipx_routes; (tmp = *r) != NULL; ) { + if (tmp->ir_net == net) { + if (!(tmp->ir_routed)) { + /* Directly connected; can't lose route */ + return -EPERM; + } + *r = tmp->ir_next; + kfree_s(tmp, sizeof(ipx_route)); + return 0; + } + r = &(tmp->ir_next); + } + + return -ENOENT; +} + +static int +ipxrtr_route_packet(ipx_socket *sk, struct sockaddr_ipx *usipx, void *ubuf, int len) +{ + struct sk_buff *skb; + ipx_interface *intrfc; + ipx_packet *ipx; + int size; + int ipx_offset; + ipx_route *rt = NULL; + + /* Find the appropriate interface on which to send packet */ + if ((usipx->sipx_network == 0L) && (ipx_primary_net != NULL)) { + usipx->sipx_network = ipx_primary_net->if_netnum; + intrfc = ipx_primary_net; + } else { + rt = ipxrtr_lookup(usipx->sipx_network); + if (rt==NULL) { + return -ENETUNREACH; + } + intrfc = rt->ir_intrfc; + } + + ipx_offset = intrfc->if_ipx_offset; + size=sizeof(ipx_packet)+len; + size += ipx_offset; + + if(size+sk->wmem_alloc>sk->sndbuf) return -EAGAIN; + + skb=alloc_skb(size,GFP_KERNEL); + if(skb==NULL) return -ENOMEM; + + skb->sk=sk; + skb->len=size; + skb->free=1; + skb->arp=1; + + /* Fill in IPX header */ + ipx=(ipx_packet *)&(skb->data[ipx_offset]); + ipx->ipx_checksum=0xFFFF; + ipx->ipx_pktsize=htons(len+sizeof(ipx_packet)); + ipx->ipx_tctrl=0; + ipx->ipx_type=usipx->sipx_type; + skb->h.raw = (unsigned char *)ipx; + + ipx->ipx_source.net = sk->ipx_intrfc->if_netnum; + memcpy(ipx->ipx_source.node, sk->ipx_intrfc->if_node, IPX_NODE_LEN); + ipx->ipx_source.sock = sk->ipx_port; + ipx->ipx_dest.net=usipx->sipx_network; + memcpy(ipx->ipx_dest.node,usipx->sipx_node,IPX_NODE_LEN); + ipx->ipx_dest.sock=usipx->sipx_port; + + memcpy_fromfs((char *)(ipx+1),ubuf,len); + return ipxitf_send(intrfc, skb, (rt && rt->ir_routed) ? + rt->ir_router_node : ipx->ipx_dest.node); +} + +static int +ipxrtr_route_skb(struct sk_buff *skb) +{ + ipx_packet *ipx = (ipx_packet *) (skb->h.raw); + ipx_route *r; + ipx_interface *i; + + r = ipxrtr_lookup(ipx->ipx_dest.net); + if (r == NULL) { + /* no known route */ + kfree_skb(skb,FREE_READ); + return 0; + } + i = r->ir_intrfc; + (void)ipxitf_send(i, skb, (r->ir_routed) ? + r->ir_router_node : ipx->ipx_dest.node); + return 0; +} + +/* + * We use a normal struct rtentry for route handling + */ + +static int ipxrtr_ioctl(unsigned int cmd, void *arg) +{ + int err; + struct rtentry rt; /* Use these to behave like 'other' stacks */ + struct sockaddr_ipx *sg,*st; + + err=verify_area(VERIFY_READ,arg,sizeof(rt)); + if(err) + return err; + + memcpy_fromfs(&rt,arg,sizeof(rt)); + + sg=(struct sockaddr_ipx *)&rt.rt_gateway; + st=(struct sockaddr_ipx *)&rt.rt_dst; + + if(!(rt.rt_flags&RTF_GATEWAY)) + return -EINVAL; /* Direct routes are fixed */ + if(sg->sipx_family!=AF_IPX) + return -EINVAL; + if(st->sipx_family!=AF_IPX) + return -EINVAL; + + switch(cmd) + { + case SIOCDELRT: + return ipxrtr_delete(st->sipx_network); + case SIOCADDRT: + { + struct ipx_route_definition f; + f.ipx_network=st->sipx_network; + f.ipx_router_network=sg->sipx_network; + memcpy(f.ipx_router_node, sg->sipx_node, IPX_NODE_LEN); + return ipxrtr_create(&f); + } + default: + return -EINVAL; + } +} + +static char * +ipx_frame_name(unsigned short frame) +{ + switch (ntohs(frame)) { + case ETH_P_IPX: return "EtherII"; + case ETH_P_802_2: return "802.2"; + case ETH_P_SNAP: return "SNAP"; + case ETH_P_802_3: return "802.3"; + default: return "None"; + } +} + +static char * +ipx_device_name(ipx_interface *intrfc) +{ + return (intrfc->if_internal ? "Internal" : + (intrfc->if_dev ? intrfc->if_dev->name : "Unknown")); +} + +/* Called from proc fs */ +int +ipx_get_interface_info(char *buffer, char **start, off_t offset, int length) +{ + ipx_interface *i; + int len=0; + off_t pos=0; + off_t begin=0; + + /* Theory.. Keep printing in the same place until we pass offset */ + + len += sprintf (buffer,"%-11s%-15s%-9s%-11s%s\n", "Network", + "Node_Address", "Primary", "Device", "Frame_Type"); + for (i = ipx_interfaces; i != NULL; i = i->if_next) { + len += sprintf(buffer+len, "%08lX ", ntohl(i->if_netnum)); + len += sprintf (buffer+len,"%02X%02X%02X%02X%02X%02X ", + i->if_node[0], i->if_node[1], i->if_node[2], + i->if_node[3], i->if_node[4], i->if_node[5]); + len += sprintf(buffer+len, "%-9s", (i == ipx_primary_net) ? + "Yes" : "No"); + len += sprintf (buffer+len, "%-11s", ipx_device_name(i)); + len += sprintf (buffer+len, "%s\n", + ipx_frame_name(i->if_dlink_type)); + + /* Are we still dumping unwanted data then discard the record */ + pos=begin+len; + + if(pos<offset) { + len=0; /* Keep dumping into the buffer start */ + begin=pos; + } + if(pos>offset+length) /* We have dumped enough */ + break; + } + + /* The data in question runs from begin to begin+len */ + *start=buffer+(offset-begin); /* Start of wanted data */ + len-=(offset-begin); /* Remove unwanted header data from length */ + if(len>length) + len=length; /* Remove unwanted tail data from length */ + + return len; +} + +int +ipx_get_info(char *buffer, char **start, off_t offset, int length) +{ + ipx_socket *s; + ipx_interface *i; + int len=0; + off_t pos=0; + off_t begin=0; + + /* Theory.. Keep printing in the same place until we pass offset */ + + len += sprintf (buffer,"%-15s%-28s%-10s%-10s%-7s%s\n", "Local_Address", + "Remote_Address", "Tx_Queue", "Rx_Queue", + "State", "Uid"); + for (i = ipx_interfaces; i != NULL; i = i->if_next) { + for (s = i->if_sklist; s != NULL; s = s->next) { + len += sprintf (buffer+len,"%08lX:%04X ", + htonl(i->if_netnum), + htons(s->ipx_port)); + if (s->state!=TCP_ESTABLISHED) { + len += sprintf(buffer+len, "%-28s", "Not_Connected"); + } else { + len += sprintf (buffer+len, + "%08lX:%02X%02X%02X%02X%02X%02X:%04X ", + htonl(s->ipx_dest_addr.net), + s->ipx_dest_addr.node[0], s->ipx_dest_addr.node[1], + s->ipx_dest_addr.node[2], s->ipx_dest_addr.node[3], + s->ipx_dest_addr.node[4], s->ipx_dest_addr.node[5], + htons(s->ipx_dest_addr.sock)); + } + len += sprintf (buffer+len,"%08lX %08lX ", + s->wmem_alloc, s->rmem_alloc); + len += sprintf (buffer+len,"%02X %03d\n", + s->state, SOCK_INODE(s->socket)->i_uid); + + /* Are we still dumping unwanted data then discard the record */ + pos=begin+len; + + if(pos<offset) + { + len=0; /* Keep dumping into the buffer start */ + begin=pos; + } + if(pos>offset+length) /* We have dumped enough */ + break; + } + } + + /* The data in question runs from begin to begin+len */ + *start=buffer+(offset-begin); /* Start of wanted data */ + len-=(offset-begin); /* Remove unwanted header data from length */ + if(len>length) + len=length; /* Remove unwanted tail data from length */ + + return len; +} + +int ipx_rt_get_info(char *buffer, char **start, off_t offset, int length) +{ + ipx_route *rt; + int len=0; + off_t pos=0; + off_t begin=0; + + len += sprintf (buffer,"%-11s%-13s%s\n", + "Network", "Router_Net", "Router_Node"); + for (rt = ipx_routes; rt != NULL; rt = rt->ir_next) + { + len += sprintf (buffer+len,"%08lX ", ntohl(rt->ir_net)); + if (rt->ir_routed) { + len += sprintf (buffer+len,"%08lX %02X%02X%02X%02X%02X%02X\n", + ntohl(rt->ir_intrfc->if_netnum), + rt->ir_router_node[0], rt->ir_router_node[1], + rt->ir_router_node[2], rt->ir_router_node[3], + rt->ir_router_node[4], rt->ir_router_node[5]); + } else { + len += sprintf (buffer+len, "%-13s%s\n", + "Directly", "Connected"); + } + pos=begin+len; + if(pos<offset) + { + len=0; + begin=pos; + } + if(pos>offset+length) + break; + } + *start=buffer+(offset-begin); + len-=(offset-begin); + if(len>length) + len=length; + return len; +} + +/*******************************************************************************************************************\ +* * +* Handling for system calls applied via the various interfaces to an IPX socket object * +* * +\*******************************************************************************************************************/ + +static int ipx_fcntl(struct socket *sock, unsigned int cmd, unsigned long arg) +{ + switch(cmd) + { + default: + return(-EINVAL); + } +} + +static int ipx_setsockopt(struct socket *sock, int level, int optname, char *optval, int optlen) +{ + ipx_socket *sk; + int err,opt; + + sk=(ipx_socket *)sock->data; + + if(optval==NULL) + return(-EINVAL); + + err=verify_area(VERIFY_READ,optval,sizeof(int)); + if(err) + return err; + opt=get_fs_long((unsigned long *)optval); + + switch(level) + { + case SOL_IPX: + switch(optname) + { + case IPX_TYPE: + sk->ipx_type=opt; + return 0; + default: + return -EOPNOTSUPP; + } + break; + + case SOL_SOCKET: + return sock_setsockopt(sk,level,optname,optval,optlen); + + default: + return -EOPNOTSUPP; + } +} + +static int ipx_getsockopt(struct socket *sock, int level, int optname, + char *optval, int *optlen) +{ + ipx_socket *sk; + int val=0; + int err; + + sk=(ipx_socket *)sock->data; + + switch(level) + { + + case SOL_IPX: + switch(optname) + { + case IPX_TYPE: + val=sk->ipx_type; + break; + default: + return -ENOPROTOOPT; + } + break; + + case SOL_SOCKET: + return sock_getsockopt(sk,level,optname,optval,optlen); + + default: + return -EOPNOTSUPP; + } + err=verify_area(VERIFY_WRITE,optlen,sizeof(int)); + if(err) + return err; + put_fs_long(sizeof(int),(unsigned long *)optlen); + err=verify_area(VERIFY_WRITE,optval,sizeof(int)); + put_fs_long(val,(unsigned long *)optval); + return(0); +} + +static int ipx_listen(struct socket *sock, int backlog) +{ + return -EOPNOTSUPP; +} + +static void def_callback1(struct sock *sk) +{ + if(!sk->dead) + wake_up_interruptible(sk->sleep); +} + +static void def_callback2(struct sock *sk, int len) +{ + if(!sk->dead) + { + wake_up_interruptible(sk->sleep); + sock_wake_async(sk->socket, 1); + } +} + +static int +ipx_create(struct socket *sock, int protocol) +{ + ipx_socket *sk; + sk=(ipx_socket *)kmalloc(sizeof(*sk),GFP_KERNEL); + if(sk==NULL) + return(-ENOMEM); + switch(sock->type) + { + case SOCK_DGRAM: + break; + default: + kfree_s((void *)sk,sizeof(*sk)); + return(-ESOCKTNOSUPPORT); + } + sk->dead=0; + sk->next=NULL; + sk->broadcast=0; + sk->rcvbuf=SK_RMEM_MAX; + sk->sndbuf=SK_WMEM_MAX; + sk->wmem_alloc=0; + sk->rmem_alloc=0; + sk->inuse=0; + sk->shutdown=0; + sk->prot=NULL; /* So we use default free mechanisms */ + sk->broadcast=0; + sk->err=0; + skb_queue_head_init(&sk->receive_queue); + skb_queue_head_init(&sk->write_queue); + sk->send_head=NULL; + skb_queue_head_init(&sk->back_log); + sk->state=TCP_CLOSE; + sk->socket=sock; + sk->type=sock->type; + sk->ipx_type=0; /* General user level IPX */ + sk->debug=0; + sk->ipx_intrfc = NULL; + memset(&sk->ipx_dest_addr,'\0',sizeof(sk->ipx_dest_addr)); + sk->ipx_port = 0; + sk->mtu=IPX_MTU; + + if(sock!=NULL) + { + sock->data=(void *)sk; + sk->sleep=sock->wait; + } + + sk->state_change=def_callback1; + sk->data_ready=def_callback2; + sk->write_space=def_callback1; + sk->error_report=def_callback1; + + sk->zapped=1; + return 0; +} + +static int ipx_release(struct socket *sock, struct socket *peer) +{ + ipx_socket *sk=(ipx_socket *)sock->data; + if(sk==NULL) + return(0); + if(!sk->dead) + sk->state_change(sk); + sk->dead=1; + sock->data=NULL; + ipx_destroy_socket(sk); + return(0); +} + +static int ipx_dup(struct socket *newsock,struct socket *oldsock) +{ + return(ipx_create(newsock,SOCK_DGRAM)); +} + +static unsigned short +ipx_first_free_socketnum(ipx_interface *intrfc) +{ + unsigned short socketNum = intrfc->if_sknum; + + if (socketNum < IPX_MIN_EPHEMERAL_SOCKET) + socketNum = IPX_MIN_EPHEMERAL_SOCKET; + + while (ipxitf_find_socket(intrfc, ntohs(socketNum)) != NULL) + if (socketNum > IPX_MAX_EPHEMERAL_SOCKET) + socketNum = IPX_MIN_EPHEMERAL_SOCKET; + else + socketNum++; + + intrfc->if_sknum = socketNum; + return ntohs(socketNum); +} + +static int ipx_bind(struct socket *sock, struct sockaddr *uaddr,int addr_len) +{ + ipx_socket *sk; + ipx_interface *intrfc; + struct sockaddr_ipx *addr=(struct sockaddr_ipx *)uaddr; + + sk=(ipx_socket *)sock->data; + + if(sk->zapped==0) + return -EIO; + + if(addr_len!=sizeof(struct sockaddr_ipx)) + return -EINVAL; + + intrfc = ipxitf_find_using_net(addr->sipx_network); + if (intrfc == NULL) + return -EADDRNOTAVAIL; + + if (addr->sipx_port == 0) { + addr->sipx_port = ipx_first_free_socketnum(intrfc); + if (addr->sipx_port == 0) + return -EINVAL; + } + + if(ntohs(addr->sipx_port)<IPX_MIN_EPHEMERAL_SOCKET && !suser()) + return -EPERM; /* protect IPX system stuff like routing/sap */ + + /* Source addresses are easy. It must be our network:node pair for + an interface routed to IPX with the ipx routing ioctl() */ + + if(ipxitf_find_socket(intrfc, addr->sipx_port)!=NULL) { + if(sk->debug) + printk("IPX: bind failed because port %X in use.\n", + (int)addr->sipx_port); + return -EADDRINUSE; + } + + sk->ipx_port=addr->sipx_port; + ipxitf_insert_socket(intrfc, sk); + sk->zapped=0; + if(sk->debug) + printk("IPX: socket is bound.\n"); + return 0; +} + +static int ipx_connect(struct socket *sock, struct sockaddr *uaddr, + int addr_len, int flags) +{ + ipx_socket *sk=(ipx_socket *)sock->data; + struct sockaddr_ipx *addr; + + sk->state = TCP_CLOSE; + sock->state = SS_UNCONNECTED; + + if(addr_len!=sizeof(*addr)) + return(-EINVAL); + addr=(struct sockaddr_ipx *)uaddr; + + if(sk->ipx_port==0) + /* put the autobinding in */ + { + struct sockaddr_ipx uaddr; + int ret; + + uaddr.sipx_port = 0; + uaddr.sipx_network = 0L; + ret = ipx_bind (sock, (struct sockaddr *)&uaddr, sizeof(struct sockaddr_ipx)); + if (ret != 0) return (ret); + } + + if(ipxrtr_lookup(addr->sipx_network)==NULL) + return -ENETUNREACH; + sk->ipx_dest_addr.net=addr->sipx_network; + sk->ipx_dest_addr.sock=addr->sipx_port; + memcpy(sk->ipx_dest_addr.node,addr->sipx_node,IPX_NODE_LEN); + sk->ipx_type=addr->sipx_type; + sock->state = SS_CONNECTED; + sk->state=TCP_ESTABLISHED; + return 0; +} + +static int ipx_socketpair(struct socket *sock1, struct socket *sock2) +{ + return(-EOPNOTSUPP); +} + +static int ipx_accept(struct socket *sock, struct socket *newsock, int flags) +{ + if(newsock->data) + kfree_s(newsock->data,sizeof(ipx_socket)); + return -EOPNOTSUPP; +} + +static int ipx_getname(struct socket *sock, struct sockaddr *uaddr, + int *uaddr_len, int peer) +{ + ipx_address *addr; + struct sockaddr_ipx sipx; + ipx_socket *sk; + + sk=(ipx_socket *)sock->data; + + *uaddr_len = sizeof(struct sockaddr_ipx); + + if(peer) { + if(sk->state!=TCP_ESTABLISHED) + return -ENOTCONN; + addr=&sk->ipx_dest_addr; + sipx.sipx_network = addr->net; + memcpy(sipx.sipx_node,addr->node,IPX_NODE_LEN); + sipx.sipx_port = addr->sock; + } else { + if (sk->ipx_intrfc != NULL) { + sipx.sipx_network = sk->ipx_intrfc->if_netnum; + memcpy(sipx.sipx_node, sk->ipx_intrfc->if_node, + IPX_NODE_LEN); + } else { + sipx.sipx_network = 0L; + memset(sipx.sipx_node, '\0', IPX_NODE_LEN); + } + sipx.sipx_port = sk->ipx_port; + } + + sipx.sipx_family = AF_IPX; + sipx.sipx_type = sk->ipx_type; + memcpy(uaddr,&sipx,sizeof(sipx)); + return 0; +} + +#if 0 +/* + * User to dump IPX packets (debugging) + */ +void dump_data(char *str,unsigned char *d) { + static char h2c[] = "0123456789ABCDEF"; + int l,i; + char *p, b[64]; + for (l=0;l<16;l++) { + p = b; + for (i=0; i < 8 ; i++) { + *(p++) = h2c[d[i] & 0x0f]; + *(p++) = h2c[(d[i] >> 4) & 0x0f]; + *(p++) = ' '; + } + *(p++) = '-'; + *(p++) = ' '; + for (i=0; i < 8 ; i++) *(p++) = ' '<= d[i] && d[i]<'\177' ? d[i] : '.'; + *p = '\000'; + d += i; + printk("%s-%04X: %s\n",str,l*8,b); + } +} + +void dump_addr(char *str,ipx_address *p) { + printk("%s: %08X:%02X%02X%02X%02X%02X%02X:%04X\n", + str,ntohl(p->net),p->node[0],p->node[1],p->node[2], + p->node[3],p->node[4],p->node[5],ntohs(p->sock)); +} + +void dump_hdr(char *str,ipx_packet *p) { + printk("%s: CHKSUM=%04X SIZE=%d (%04X) HOPS=%d (%02X) TYPE=%02X\n", + str,p->ipx_checksum,ntohs(p->ipx_pktsize),ntohs(p->ipx_pktsize), + p->ipx_tctrl,p->ipx_tctrl,p->ipx_type); + dump_addr(" IPX-DST",&p->ipx_dest); + dump_addr(" IPX-SRC",&p->ipx_source); +} + +void dump_pkt(char *str,ipx_packet *p) { + dump_hdr(str,p); + dump_data(str,(unsigned char *)p); +} +#endif + +int ipx_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt) +{ + /* NULL here for pt means the packet was looped back */ + ipx_interface *intrfc; + ipx_packet *ipx; + + ipx=(ipx_packet *)skb->h.raw; + + if(ipx->ipx_checksum!=IPX_NO_CHECKSUM) { + /* We don't do checksum options. We can't really. Novell don't seem to have documented them. + If you need them try the XNS checksum since IPX is basically XNS in disguise. It might be + the same... */ + kfree_skb(skb,FREE_READ); + return 0; + } + + /* Too small */ + if(htons(ipx->ipx_pktsize)<sizeof(ipx_packet)) { + kfree_skb(skb,FREE_READ); + return 0; + } + + /* Determine what local ipx endpoint this is */ + intrfc = ipxitf_find_using_phys(dev, pt->type); + if (intrfc == NULL) { + if (ipxcfg_auto_create_interfaces) { + intrfc = ipxitf_auto_create(dev, pt->type); + } + + if (intrfc == NULL) { + /* Not one of ours */ + kfree_skb(skb,FREE_READ); + return 0; + } + } + + return ipxitf_rcv(intrfc, skb); +} + +static int ipx_sendto(struct socket *sock, void *ubuf, int len, int noblock, + unsigned flags, struct sockaddr *usip, int addr_len) +{ + ipx_socket *sk=(ipx_socket *)sock->data; + struct sockaddr_ipx *usipx=(struct sockaddr_ipx *)usip; + struct sockaddr_ipx local_sipx; + int retval; + + if (sk->zapped) return -EIO; /* Socket not bound */ + if(flags) return -EINVAL; + + if(usipx) { + if(sk->ipx_port == 0) { + struct sockaddr_ipx uaddr; + int ret; + + uaddr.sipx_port = 0; + uaddr.sipx_network = 0L; + ret = ipx_bind (sock, (struct sockaddr *)&uaddr, sizeof(struct sockaddr_ipx)); + if (ret != 0) return ret; + } + + if(addr_len <sizeof(*usipx)) + return -EINVAL; + if(usipx->sipx_family != AF_IPX) + return -EINVAL; + } else { + if(sk->state!=TCP_ESTABLISHED) + return -ENOTCONN; + usipx=&local_sipx; + usipx->sipx_family=AF_IPX; + usipx->sipx_type=sk->ipx_type; + usipx->sipx_port=sk->ipx_dest_addr.sock; + usipx->sipx_network=sk->ipx_dest_addr.net; + memcpy(usipx->sipx_node,sk->ipx_dest_addr.node,IPX_NODE_LEN); + } + + retval = ipxrtr_route_packet(sk, usipx, ubuf, len); + if (retval < 0) return retval; + + return len; +} + +static int ipx_send(struct socket *sock, void *ubuf, int size, int noblock, unsigned flags) +{ + return ipx_sendto(sock,ubuf,size,noblock,flags,NULL,0); +} + +static int ipx_recvfrom(struct socket *sock, void *ubuf, int size, int noblock, + unsigned flags, struct sockaddr *sip, int *addr_len) +{ + ipx_socket *sk=(ipx_socket *)sock->data; + struct sockaddr_ipx *sipx=(struct sockaddr_ipx *)sip; + struct ipx_packet *ipx = NULL; + int copied = 0; + int truesize; + struct sk_buff *skb; + int er; + + if(sk->err) + { + er= -sk->err; + sk->err=0; + return er; + } + + if (sk->zapped) + return -EIO; + + if(addr_len) + *addr_len=sizeof(*sipx); + + skb=skb_recv_datagram(sk,flags,noblock,&er); + if(skb==NULL) + return er; + + ipx = (ipx_packet *)(skb->h.raw); + truesize=ntohs(ipx->ipx_pktsize) - sizeof(ipx_packet); + copied = (truesize > size) ? size : truesize; + skb_copy_datagram(skb,sizeof(struct ipx_packet),ubuf,copied); + + if(sipx) + { + sipx->sipx_family=AF_IPX; + sipx->sipx_port=ipx->ipx_source.sock; + memcpy(sipx->sipx_node,ipx->ipx_source.node,IPX_NODE_LEN); + sipx->sipx_network=ipx->ipx_source.net; + sipx->sipx_type = ipx->ipx_type; + } + skb_free_datagram(skb); + return(truesize); +} + +static int ipx_write(struct socket *sock, char *ubuf, int size, int noblock) +{ + return ipx_send(sock,ubuf,size,noblock,0); +} + + +static int ipx_recv(struct socket *sock, void *ubuf, int size , int noblock, + unsigned flags) +{ + ipx_socket *sk=(ipx_socket *)sock->data; + if(sk->zapped) + return -ENOTCONN; + return ipx_recvfrom(sock,ubuf,size,noblock,flags,NULL, NULL); +} + +static int ipx_read(struct socket *sock, char *ubuf, int size, int noblock) +{ + return ipx_recv(sock,ubuf,size,noblock,0); +} + + +static int ipx_shutdown(struct socket *sk,int how) +{ + return -EOPNOTSUPP; +} + +static int ipx_select(struct socket *sock , int sel_type, select_table *wait) +{ + ipx_socket *sk=(ipx_socket *)sock->data; + + return datagram_select(sk,sel_type,wait); +} + +static int ipx_ioctl(struct socket *sock,unsigned int cmd, unsigned long arg) +{ + int err; + long amount=0; + ipx_socket *sk=(ipx_socket *)sock->data; + + switch(cmd) + { + case TIOCOUTQ: + err=verify_area(VERIFY_WRITE,(void *)arg,sizeof(unsigned long)); + if(err) + return err; + amount=sk->sndbuf-sk->wmem_alloc; + if(amount<0) + amount=0; + put_fs_long(amount,(unsigned long *)arg); + return 0; + case TIOCINQ: + { + struct sk_buff *skb; + /* These two are safe on a single CPU system as only user tasks fiddle here */ + if((skb=skb_peek(&sk->receive_queue))!=NULL) + amount=skb->len; + err=verify_area(VERIFY_WRITE,(void *)arg,sizeof(unsigned long)); + put_fs_long(amount,(unsigned long *)arg); + return 0; + } + case SIOCADDRT: + case SIOCDELRT: + if(!suser()) + return -EPERM; + return(ipxrtr_ioctl(cmd,(void *)arg)); + case SIOCSIFADDR: + case SIOCGIFADDR: + case SIOCAIPXITFCRT: + case SIOCAIPXPRISLT: + if(!suser()) + return -EPERM; + return(ipxitf_ioctl(cmd,(void *)arg)); + case SIOCIPXCFGDATA: + { + err=verify_area(VERIFY_WRITE,(void *)arg, + sizeof(ipx_config_data)); + if(err) return err; + return(ipxcfg_get_config_data((void *)arg)); + } + case SIOCGSTAMP: + if (sk) + { + if(sk->stamp.tv_sec==0) + return -ENOENT; + err=verify_area(VERIFY_WRITE,(void *)arg,sizeof(struct timeval)); + if(err) + return err; + memcpy_tofs((void *)arg,&sk->stamp,sizeof(struct timeval)); + return 0; + } + return -EINVAL; + case SIOCGIFDSTADDR: + case SIOCSIFDSTADDR: + case SIOCGIFBRDADDR: + case SIOCSIFBRDADDR: + case SIOCGIFNETMASK: + case SIOCSIFNETMASK: + return -EINVAL; + default: + return(dev_ioctl(cmd,(void *) arg)); + } + /*NOTREACHED*/ + return(0); +} + +static struct proto_ops ipx_proto_ops = { + AF_IPX, + + ipx_create, + ipx_dup, + ipx_release, + ipx_bind, + ipx_connect, + ipx_socketpair, + ipx_accept, + ipx_getname, + ipx_read, + ipx_write, + ipx_select, + ipx_ioctl, + ipx_listen, + ipx_send, + ipx_recv, + ipx_sendto, + ipx_recvfrom, + ipx_shutdown, + ipx_setsockopt, + ipx_getsockopt, + ipx_fcntl, +}; + +/* Called by ddi.c on kernel start up */ + +static struct packet_type ipx_8023_packet_type = + +{ + 0, /* MUTTER ntohs(ETH_P_8023),*/ + NULL, /* All devices */ + ipx_rcv, + NULL, + NULL, +}; + +static struct packet_type ipx_dix_packet_type = +{ + 0, /* MUTTER ntohs(ETH_P_IPX),*/ + NULL, /* All devices */ + ipx_rcv, + NULL, + NULL, +}; + +static struct notifier_block ipx_dev_notifier={ + ipxitf_device_event, + NULL, + 0 +}; + + +extern struct datalink_proto *make_EII_client(void); +extern struct datalink_proto *make_8023_client(void); + +void ipx_proto_init(struct net_proto *pro) +{ + unsigned char val = 0xE0; + unsigned char snapval[5] = { 0x0, 0x0, 0x0, 0x81, 0x37 }; + + (void) sock_register(ipx_proto_ops.family, &ipx_proto_ops); + + pEII_datalink = make_EII_client(); + ipx_dix_packet_type.type=htons(ETH_P_IPX); + dev_add_pack(&ipx_dix_packet_type); + + p8023_datalink = make_8023_client(); + ipx_8023_packet_type.type=htons(ETH_P_802_3); + dev_add_pack(&ipx_8023_packet_type); + + if ((p8022_datalink = register_8022_client(val, ipx_rcv)) == NULL) + printk("IPX: Unable to register with 802.2\n"); + + if ((pSNAP_datalink = register_snap_client(snapval, ipx_rcv)) == NULL) + printk("IPX: Unable to register with SNAP\n"); + + register_netdevice_notifier(&ipx_dev_notifier); + + printk("Swansea University Computer Society IPX 0.29 BETA for NET3.019\n"); + printk("IPX Portions Copyright (c) 1995 Caldera, Inc.\n"); +} +#endif diff --git a/pfinet/linux-inet/ipx.h b/pfinet/linux-inet/ipx.h new file mode 100644 index 00000000..6842c832 --- /dev/null +++ b/pfinet/linux-inet/ipx.h @@ -0,0 +1,84 @@ + +/* + * The following information is in its entirety obtained from: + * + * Novell 'IPX Router Specification' Version 1.10 + * Part No. 107-000029-001 + * + * Which is available from ftp.novell.com + */ + +#ifndef _NET_INET_IPX_H_ +#define _NET_INET_IPX_H_ + +#include <linux/skbuff.h> +#include "datalink.h" +#include <linux/ipx.h> + +typedef struct +{ + unsigned long net; + unsigned char node[IPX_NODE_LEN]; + unsigned short sock; +} ipx_address; + +#define ipx_broadcast_node "\377\377\377\377\377\377" + +typedef struct ipx_packet +{ + unsigned short ipx_checksum; +#define IPX_NO_CHECKSUM 0xFFFF + unsigned short ipx_pktsize; + unsigned char ipx_tctrl; + unsigned char ipx_type; +#define IPX_TYPE_UNKNOWN 0x00 +#define IPX_TYPE_RIP 0x01 /* may also be 0 */ +#define IPX_TYPE_SAP 0x04 /* may also be 0 */ +#define IPX_TYPE_SPX 0x05 /* Not yet implemented */ +#define IPX_TYPE_NCP 0x11 /* $lots for docs on this (SPIT) */ +#define IPX_TYPE_PPROP 0x14 /* complicated flood fill brdcast [Not supported] */ + ipx_address ipx_dest __attribute__ ((packed)); + ipx_address ipx_source __attribute__ ((packed)); +} ipx_packet; + + +typedef struct sock ipx_socket; + +#include "ipxcall.h" +extern int ipx_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt); +extern void ipxrtr_device_down(struct device *dev); + +typedef struct ipx_interface { + /* IPX address */ + unsigned long if_netnum; + unsigned char if_node[IPX_NODE_LEN]; + + /* physical device info */ + struct device *if_dev; + struct datalink_proto *if_dlink; + unsigned short if_dlink_type; + + /* socket support */ + unsigned short if_sknum; + ipx_socket *if_sklist; + + /* administrative overhead */ + int if_ipx_offset; + unsigned char if_internal; + unsigned char if_primary; + + struct ipx_interface *if_next; +} ipx_interface; + +typedef struct ipx_route { + unsigned long ir_net; + ipx_interface *ir_intrfc; + unsigned char ir_routed; + unsigned char ir_router_node[IPX_NODE_LEN]; + struct ipx_route *ir_next; +} ipx_route; + +#define IPX_MIN_EPHEMERAL_SOCKET 0x4000 +#define IPX_MAX_EPHEMERAL_SOCKET 0x7fff + +#endif diff --git a/pfinet/linux-inet/ipxcall.h b/pfinet/linux-inet/ipxcall.h new file mode 100644 index 00000000..eb5bd2bd --- /dev/null +++ b/pfinet/linux-inet/ipxcall.h @@ -0,0 +1,2 @@ +/* Separate to keep compilation of protocols.c simpler */ +extern void ipx_proto_init(struct net_proto *pro); diff --git a/pfinet/linux-inet/p8022.c b/pfinet/linux-inet/p8022.c new file mode 100644 index 00000000..8ff3ec60 --- /dev/null +++ b/pfinet/linux-inet/p8022.c @@ -0,0 +1,98 @@ +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include "datalink.h" +#include <linux/mm.h> +#include <linux/in.h> + +static struct datalink_proto *p8022_list = NULL; + +static struct datalink_proto * +find_8022_client(unsigned char type) +{ + struct datalink_proto *proto; + + for (proto = p8022_list; + ((proto != NULL) && (*(proto->type) != type)); + proto = proto->next) + ; + + return proto; +} + +int +p8022_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt) +{ + struct datalink_proto *proto; + + proto = find_8022_client(*(skb->h.raw)); + if (proto != NULL) { + skb->h.raw += 3; + skb->len -= 3; + return proto->rcvfunc(skb, dev, pt); + } + + skb->sk = NULL; + kfree_skb(skb, FREE_READ); + return 0; +} + +static void +p8022_datalink_header(struct datalink_proto *dl, + struct sk_buff *skb, unsigned char *dest_node) +{ + struct device *dev = skb->dev; + unsigned long len = skb->len; + unsigned long hard_len = dev->hard_header_len; + unsigned char *rawp; + + dev->hard_header(skb->data, dev, len - hard_len, + dest_node, NULL, len - hard_len, skb); + rawp = skb->data + hard_len; + *rawp = dl->type[0]; + rawp++; + *rawp = dl->type[0]; + rawp++; + *rawp = 0x03; /* UI */ + rawp++; + skb->h.raw = rawp; +} + +static struct packet_type p8022_packet_type = +{ + 0, /* MUTTER ntohs(ETH_P_IPX),*/ + NULL, /* All devices */ + p8022_rcv, + NULL, + NULL, +}; + + +void p8022_proto_init(struct net_proto *pro) +{ + p8022_packet_type.type=htons(ETH_P_802_2); + dev_add_pack(&p8022_packet_type); +} + +struct datalink_proto * +register_8022_client(unsigned char type, int (*rcvfunc)(struct sk_buff *, struct device *, struct packet_type *)) +{ + struct datalink_proto *proto; + + if (find_8022_client(type) != NULL) + return NULL; + + proto = (struct datalink_proto *) kmalloc(sizeof(*proto), GFP_ATOMIC); + if (proto != NULL) { + proto->type[0] = type; + proto->type_len = 1; + proto->rcvfunc = rcvfunc; + proto->header_length = 3; + proto->datalink_header = p8022_datalink_header; + proto->string_name = "802.2"; + proto->next = p8022_list; + p8022_list = proto; + } + + return proto; +} + diff --git a/pfinet/linux-inet/p8022.h b/pfinet/linux-inet/p8022.h new file mode 100644 index 00000000..52c676be --- /dev/null +++ b/pfinet/linux-inet/p8022.h @@ -0,0 +1,2 @@ +struct datalink_proto *register_8022_client(unsigned char type, int (*rcvfunc)(struct sk_buff *, struct device *, struct packet_type *)); + diff --git a/pfinet/linux-inet/p8022call.h b/pfinet/linux-inet/p8022call.h new file mode 100644 index 00000000..14f0c2ce --- /dev/null +++ b/pfinet/linux-inet/p8022call.h @@ -0,0 +1,2 @@ +/* Separate to keep compilation of Space.c simpler */ +extern void p8022_proto_init(struct net_proto *); diff --git a/pfinet/linux-inet/p8023.c b/pfinet/linux-inet/p8023.c new file mode 100644 index 00000000..7c76223d --- /dev/null +++ b/pfinet/linux-inet/p8023.c @@ -0,0 +1,35 @@ +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include "datalink.h" +#include <linux/mm.h> +#include <linux/in.h> + +static void +p8023_datalink_header(struct datalink_proto *dl, + struct sk_buff *skb, unsigned char *dest_node) +{ + struct device *dev = skb->dev; + unsigned long len = skb->len; + unsigned long hard_len = dev->hard_header_len; + + dev->hard_header(skb->data, dev, len - hard_len, + dest_node, NULL, len - hard_len, skb); + skb->h.raw = skb->data + hard_len; +} + +struct datalink_proto * +make_8023_client(void) +{ + struct datalink_proto *proto; + + proto = (struct datalink_proto *) kmalloc(sizeof(*proto), GFP_ATOMIC); + if (proto != NULL) { + proto->type_len = 0; + proto->header_length = 0; + proto->datalink_header = p8023_datalink_header; + proto->string_name = "802.3"; + } + + return proto; +} + diff --git a/pfinet/linux-inet/packet.c b/pfinet/linux-inet/packet.c new file mode 100644 index 00000000..ab031c81 --- /dev/null +++ b/pfinet/linux-inet/packet.c @@ -0,0 +1,410 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * PACKET - implements raw packet sockets. + * + * Version: @(#)packet.c 1.0.6 05/25/93 + * + * Authors: Ross Biro, <bir7@leland.Stanford.Edu> + * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> + * Alan Cox, <gw4pts@gw4pts.ampr.org> + * + * Fixes: + * Alan Cox : verify_area() now used correctly + * Alan Cox : new skbuff lists, look ma no backlogs! + * Alan Cox : tidied skbuff lists. + * Alan Cox : Now uses generic datagram routines I + * added. Also fixed the peek/read crash + * from all old Linux datagram code. + * Alan Cox : Uses the improved datagram code. + * Alan Cox : Added NULL's for socket options. + * Alan Cox : Re-commented the code. + * Alan Cox : Use new kernel side addressing + * Rob Janssen : Correct MTU usage. + * Dave Platt : Counter leaks caused by incorrect + * interrupt locking and some slightly + * dubious gcc output. Can you read + * compiler: it said _VOLATILE_ + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/types.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/fcntl.h> +#include <linux/socket.h> +#include <linux/in.h> +#include <linux/inet.h> +#include <linux/netdevice.h> +#include "ip.h" +#include "protocol.h" +#include <linux/skbuff.h> +#include "sock.h" +#include <linux/errno.h> +#include <linux/timer.h> +#include <asm/system.h> +#include <asm/segment.h> + +/* + * We really ought to have a single public _inline_ min function! + */ + +static unsigned long min(unsigned long a, unsigned long b) +{ + if (a < b) + return(a); + return(b); +} + + +/* + * This should be the easiest of all, all we do is copy it into a buffer. + */ + +int packet_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt) +{ + struct sock *sk; + unsigned long flags; + + /* + * When we registered the protocol we saved the socket in the data + * field for just this event. + */ + + sk = (struct sock *) pt->data; + + /* + * The SOCK_PACKET socket receives _all_ frames, and as such + * therefore needs to put the header back onto the buffer. + * (it was removed by inet_bh()). + */ + + skb->dev = dev; + skb->len += dev->hard_header_len; + + /* + * Charge the memory to the socket. This is done specifically + * to prevent sockets using all the memory up. + */ + + if (sk->rmem_alloc & 0xFF000000) { + printk("packet_rcv: sk->rmem_alloc = %ld\n", sk->rmem_alloc); + sk->rmem_alloc = 0; + } + + if (sk->rmem_alloc + skb->mem_len >= sk->rcvbuf) + { +/* printk("packet_rcv: drop, %d+%d>%d\n", sk->rmem_alloc, skb->mem_len, sk->rcvbuf); */ + skb->sk = NULL; + kfree_skb(skb, FREE_READ); + return(0); + } + + save_flags(flags); + cli(); + + skb->sk = sk; + sk->rmem_alloc += skb->mem_len; + + /* + * Queue the packet up, and wake anyone waiting for it. + */ + + skb_queue_tail(&sk->receive_queue,skb); + if(!sk->dead) + sk->data_ready(sk,skb->len); + + restore_flags(flags); + + /* + * Processing complete. + */ + + release_sock(sk); /* This is now effectively surplus in this layer */ + return(0); +} + + +/* + * Output a raw packet to a device layer. This bypasses all the other + * protocol layers and you must therefore supply it with a complete frame + */ + +static int packet_sendto(struct sock *sk, unsigned char *from, int len, + int noblock, unsigned flags, struct sockaddr_in *usin, + int addr_len) +{ + struct sk_buff *skb; + struct device *dev; + struct sockaddr *saddr=(struct sockaddr *)usin; + + /* + * Check the flags. + */ + + if (flags) + return(-EINVAL); + + /* + * Get and verify the address. + */ + + if (usin) + { + if (addr_len < sizeof(*saddr)) + return(-EINVAL); + } + else + return(-EINVAL); /* SOCK_PACKET must be sent giving an address */ + + /* + * Find the device first to size check it + */ + + saddr->sa_data[13] = 0; + dev = dev_get(saddr->sa_data); + if (dev == NULL) + { + return(-ENXIO); + } + + /* + * You may not queue a frame bigger than the mtu. This is the lowest level + * raw protocol and you must do your own fragmentation at this level. + */ + + if(len>dev->mtu+dev->hard_header_len) + return -EMSGSIZE; + + skb = sk->prot->wmalloc(sk, len, 0, GFP_KERNEL); + + /* + * If the write buffer is full, then tough. At this level the user gets to + * deal with the problem - do your own algorithmic backoffs. + */ + + if (skb == NULL) + { + return(-ENOBUFS); + } + + /* + * Fill it in + */ + + skb->sk = sk; + skb->free = 1; + memcpy_fromfs(skb->data, from, len); + skb->len = len; + skb->arp = 1; /* No ARP needs doing on this (complete) frame */ + + /* + * Now send it + */ + + if (dev->flags & IFF_UP) + dev_queue_xmit(skb, dev, sk->priority); + else + kfree_skb(skb, FREE_WRITE); + return(len); +} + +/* + * A write to a SOCK_PACKET can't actually do anything useful and will + * always fail but we include it for completeness and future expansion. + */ + +static int packet_write(struct sock *sk, unsigned char *buff, + int len, int noblock, unsigned flags) +{ + return(packet_sendto(sk, buff, len, noblock, flags, NULL, 0)); +} + +/* + * Close a SOCK_PACKET socket. This is fairly simple. We immediately go + * to 'closed' state and remove our protocol entry in the device list. + * The release_sock() will destroy the socket if a user has closed the + * file side of the object. + */ + +static void packet_close(struct sock *sk, int timeout) +{ + sk->inuse = 1; + sk->state = TCP_CLOSE; + dev_remove_pack((struct packet_type *)sk->pair); + kfree_s((void *)sk->pair, sizeof(struct packet_type)); + sk->pair = NULL; + release_sock(sk); +} + +/* + * Create a packet of type SOCK_PACKET. We do one slightly irregular + * thing here that wants tidying up. We borrow the 'pair' pointer in + * the socket object so we can find the packet_type entry in the + * device list. The reverse is easy as we use the data field of the + * packet type to point to our socket. + */ + +static int packet_init(struct sock *sk) +{ + struct packet_type *p; + + p = (struct packet_type *) kmalloc(sizeof(*p), GFP_KERNEL); + if (p == NULL) + return(-ENOMEM); + + p->func = packet_rcv; + p->type = sk->num; + p->data = (void *)sk; + p->dev = NULL; + dev_add_pack(p); + + /* + * We need to remember this somewhere. + */ + + sk->pair = (struct sock *)p; + + return(0); +} + + +/* + * Pull a packet from our receive queue and hand it to the user. + * If necessary we block. + */ + +int packet_recvfrom(struct sock *sk, unsigned char *to, int len, + int noblock, unsigned flags, struct sockaddr_in *sin, + int *addr_len) +{ + int copied=0; + struct sk_buff *skb; + struct sockaddr *saddr; + int err; + int truesize; + + saddr = (struct sockaddr *)sin; + + if (sk->shutdown & RCV_SHUTDOWN) + return(0); + + /* + * If the address length field is there to be filled in, we fill + * it in now. + */ + + if (addr_len) + *addr_len=sizeof(*saddr); + + /* + * Call the generic datagram receiver. This handles all sorts + * of horrible races and re-entrancy so we can forget about it + * in the protocol layers. + */ + + skb=skb_recv_datagram(sk,flags,noblock,&err); + + /* + * An error occurred so return it. Because skb_recv_datagram() + * handles the blocking we don't see and worry about blocking + * retries. + */ + + if(skb==NULL) + return err; + + /* + * You lose any data beyond the buffer you gave. If it worries a + * user program they can ask the device for its MTU anyway. + */ + + truesize = skb->len; + copied = min(len, truesize); + + memcpy_tofs(to, skb->data, copied); /* We can't use skb_copy_datagram here */ + + /* + * Copy the address. + */ + + if (saddr) + { + saddr->sa_family = skb->dev->type; + memcpy(saddr->sa_data,skb->dev->name, 14); + } + + /* + * Free or return the buffer as appropriate. Again this hides all the + * races and re-entrancy issues from us. + */ + + skb_free_datagram(skb); + + /* + * We are done. + */ + + release_sock(sk); + return(truesize); +} + + +/* + * A packet read can succeed and is just the same as a recvfrom but without the + * addresses being recorded. + */ + +int packet_read(struct sock *sk, unsigned char *buff, + int len, int noblock, unsigned flags) +{ + return(packet_recvfrom(sk, buff, len, noblock, flags, NULL, NULL)); +} + + +/* + * This structure declares to the lower layer socket subsystem currently + * incorrectly embedded in the IP code how to behave. This interface needs + * a lot of work and will change. + */ + +struct proto packet_prot = +{ + sock_wmalloc, + sock_rmalloc, + sock_wfree, + sock_rfree, + sock_rspace, + sock_wspace, + packet_close, + packet_read, + packet_write, + packet_sendto, + packet_recvfrom, + ip_build_header, /* Not actually used */ + NULL, + NULL, + ip_queue_xmit, /* These two are not actually used */ + NULL, + NULL, + NULL, + NULL, + datagram_select, + NULL, + packet_init, + NULL, + NULL, /* No set/get socket options */ + NULL, + 128, + 0, + {NULL,}, + "PACKET", + 0, 0 +}; diff --git a/pfinet/linux-inet/pe2.c b/pfinet/linux-inet/pe2.c new file mode 100644 index 00000000..856e454b --- /dev/null +++ b/pfinet/linux-inet/pe2.c @@ -0,0 +1,35 @@ +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include "datalink.h" +#include <linux/mm.h> +#include <linux/in.h> + +static void +pEII_datalink_header(struct datalink_proto *dl, + struct sk_buff *skb, unsigned char *dest_node) +{ + struct device *dev = skb->dev; + unsigned long len = skb->len; + unsigned long hard_len = dev->hard_header_len; + + dev->hard_header(skb->data, dev, ETH_P_IPX, + dest_node, NULL, len - hard_len, skb); + skb->h.raw = skb->data + hard_len; +} + +struct datalink_proto * +make_EII_client(void) +{ + struct datalink_proto *proto; + + proto = (struct datalink_proto *) kmalloc(sizeof(*proto), GFP_ATOMIC); + if (proto != NULL) { + proto->type_len = 0; + proto->header_length = 0; + proto->datalink_header = pEII_datalink_header; + proto->string_name = "EtherII"; + } + + return proto; +} + diff --git a/pfinet/linux-inet/proc.c b/pfinet/linux-inet/proc.c new file mode 100644 index 00000000..aec473a2 --- /dev/null +++ b/pfinet/linux-inet/proc.c @@ -0,0 +1,257 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * This file implements the various access functions for the + * PROC file system. It is mainly used for debugging and + * statistics. + * + * Version: @(#)proc.c 1.0.5 05/27/93 + * + * Authors: Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> + * Gerald J. Heim, <heim@peanuts.informatik.uni-tuebingen.de> + * Fred Baumgarten, <dc6iq@insu1.etec.uni-karlsruhe.de> + * Erik Schoenfelder, <schoenfr@ibr.cs.tu-bs.de> + * + * Fixes: + * Alan Cox : UDP sockets show the rxqueue/txqueue + * using hint flag for the netinfo. + * Pauline Middelink : identd support + * Alan Cox : Make /proc safer. + * Erik Schoenfelder : /proc/net/snmp + * Alan Cox : Handle dead sockets properly. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <asm/system.h> +#include <linux/autoconf.h> +#include <linux/sched.h> +#include <linux/socket.h> +#include <linux/net.h> +#include <linux/un.h> +#include <linux/in.h> +#include <linux/param.h> +#include <linux/inet.h> +#include <linux/netdevice.h> +#include "ip.h" +#include "icmp.h" +#include "protocol.h" +#include "tcp.h" +#include "udp.h" +#include <linux/skbuff.h> +#include "sock.h" +#include "raw.h" + +/* + * Get__netinfo returns the length of that string. + * + * KNOWN BUGS + * As in get_unix_netinfo, the buffer might be too small. If this + * happens, get__netinfo returns only part of the available infos. + */ +static int +get__netinfo(struct proto *pro, char *buffer, int format, char **start, off_t offset, int length) +{ + struct sock **s_array; + struct sock *sp; + int i; + int timer_active; + unsigned long dest, src; + unsigned short destp, srcp; + int len=0; + off_t pos=0; + off_t begin=0; + + s_array = pro->sock_array; + len+=sprintf(buffer, "sl local_address rem_address st tx_queue rx_queue tr tm->when uid\n"); +/* + * This was very pretty but didn't work when a socket is destroyed at the wrong moment + * (eg a syn recv socket getting a reset), or a memory timer destroy. Instead of playing + * with timers we just concede defeat and cli(). + */ + for(i = 0; i < SOCK_ARRAY_SIZE; i++) + { + cli(); + sp = s_array[i]; + while(sp != NULL) + { + dest = sp->daddr; + src = sp->saddr; + destp = sp->dummy_th.dest; + srcp = sp->dummy_th.source; + + /* Since we are Little Endian we need to swap the bytes :-( */ + destp = ntohs(destp); + srcp = ntohs(srcp); + timer_active = del_timer(&sp->timer); + if (!timer_active) + sp->timer.expires = 0; + len+=sprintf(buffer+len, "%2d: %08lX:%04X %08lX:%04X %02X %08lX:%08lX %02X:%08lX %08X %d %d\n", + i, src, srcp, dest, destp, sp->state, + format==0?sp->write_seq-sp->rcv_ack_seq:sp->rmem_alloc, + format==0?sp->acked_seq-sp->copied_seq:sp->wmem_alloc, + timer_active, sp->timer.expires, (unsigned) sp->retransmits, + sp->socket?SOCK_INODE(sp->socket)->i_uid:0, + timer_active?sp->timeout:0); + if (timer_active) + add_timer(&sp->timer); + /* + * All sockets with (port mod SOCK_ARRAY_SIZE) = i + * are kept in sock_array[i], so we must follow the + * 'next' link to get them all. + */ + sp = sp->next; + pos=begin+len; + if(pos<offset) + { + len=0; + begin=pos; + } + if(pos>offset+length) + break; + } + sti(); /* We only turn interrupts back on for a moment, but because the interrupt queues anything built up + before this will clear before we jump back and cli, so it's not as bad as it looks */ + if(pos>offset+length) + break; + } + *start=buffer+(offset-begin); + len-=(offset-begin); + if(len>length) + len=length; + return len; +} + + +int tcp_get_info(char *buffer, char **start, off_t offset, int length) +{ + return get__netinfo(&tcp_prot, buffer,0, start, offset, length); +} + + +int udp_get_info(char *buffer, char **start, off_t offset, int length) +{ + return get__netinfo(&udp_prot, buffer,1, start, offset, length); +} + + +int raw_get_info(char *buffer, char **start, off_t offset, int length) +{ + return get__netinfo(&raw_prot, buffer,1, start, offset, length); +} + + +/* + * Report socket allocation statistics [mea@utu.fi] + */ +int afinet_get_info(char *buffer, char **start, off_t offset, int length) +{ + /* From net/socket.c */ + extern int socket_get_info(char *, char **, off_t, int); +#ifndef _HURD_ + extern struct proto packet_prot; + int len = socket_get_info(buffer,start,offset,length); +#else + int len = 0; +#endif + + + len += sprintf(buffer+len,"SOCK_ARRAY_SIZE=%d\n",SOCK_ARRAY_SIZE); + len += sprintf(buffer+len,"TCP: inuse %d highest %d\n", + tcp_prot.inuse, tcp_prot.highestinuse); + len += sprintf(buffer+len,"UDP: inuse %d highest %d\n", + udp_prot.inuse, udp_prot.highestinuse); + len += sprintf(buffer+len,"RAW: inuse %d highest %d\n", + raw_prot.inuse, raw_prot.highestinuse); +#ifndef _HURD_ + len += sprintf(buffer+len,"PAC: inuse %d highest %d\n", + packet_prot.inuse, packet_prot.highestinuse); +#endif + *start = buffer + offset; + len -= offset; + if (len > length) + len = length; + return len; +} + + +/* + * Called from the PROCfs module. This outputs /proc/net/snmp. + */ + +int snmp_get_info(char *buffer, char **start, off_t offset, int length) +{ + extern struct tcp_mib tcp_statistics; + extern struct udp_mib udp_statistics; + int len; +/* + extern unsigned long tcp_rx_miss, tcp_rx_hit1,tcp_rx_hit2; +*/ + + len = sprintf (buffer, + "Ip: Forwarding DefaultTTL InReceives InHdrErrors InAddrErrors ForwDatagrams InUnknownProtos InDiscards InDelivers OutRequests OutDiscards OutNoRoutes ReasmTimeout ReasmReqds ReasmOKs ReasmFails FragOKs FragFails FragCreates\n" + "Ip: %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n", + ip_statistics.IpForwarding, ip_statistics.IpDefaultTTL, + ip_statistics.IpInReceives, ip_statistics.IpInHdrErrors, + ip_statistics.IpInAddrErrors, ip_statistics.IpForwDatagrams, + ip_statistics.IpInUnknownProtos, ip_statistics.IpInDiscards, + ip_statistics.IpInDelivers, ip_statistics.IpOutRequests, + ip_statistics.IpOutDiscards, ip_statistics.IpOutNoRoutes, + ip_statistics.IpReasmTimeout, ip_statistics.IpReasmReqds, + ip_statistics.IpReasmOKs, ip_statistics.IpReasmFails, + ip_statistics.IpFragOKs, ip_statistics.IpFragFails, + ip_statistics.IpFragCreates); + + len += sprintf (buffer + len, + "Icmp: InMsgs InErrors InDestUnreachs InTimeExcds InParmProbs InSrcQuenchs InRedirects InEchos InEchoReps InTimestamps InTimestampReps InAddrMasks InAddrMaskReps OutMsgs OutErrors OutDestUnreachs OutTimeExcds OutParmProbs OutSrcQuenchs OutRedirects OutEchos OutEchoReps OutTimestamps OutTimestampReps OutAddrMasks OutAddrMaskReps\n" + "Icmp: %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n", + icmp_statistics.IcmpInMsgs, icmp_statistics.IcmpInErrors, + icmp_statistics.IcmpInDestUnreachs, icmp_statistics.IcmpInTimeExcds, + icmp_statistics.IcmpInParmProbs, icmp_statistics.IcmpInSrcQuenchs, + icmp_statistics.IcmpInRedirects, icmp_statistics.IcmpInEchos, + icmp_statistics.IcmpInEchoReps, icmp_statistics.IcmpInTimestamps, + icmp_statistics.IcmpInTimestampReps, icmp_statistics.IcmpInAddrMasks, + icmp_statistics.IcmpInAddrMaskReps, icmp_statistics.IcmpOutMsgs, + icmp_statistics.IcmpOutErrors, icmp_statistics.IcmpOutDestUnreachs, + icmp_statistics.IcmpOutTimeExcds, icmp_statistics.IcmpOutParmProbs, + icmp_statistics.IcmpOutSrcQuenchs, icmp_statistics.IcmpOutRedirects, + icmp_statistics.IcmpOutEchos, icmp_statistics.IcmpOutEchoReps, + icmp_statistics.IcmpOutTimestamps, icmp_statistics.IcmpOutTimestampReps, + icmp_statistics.IcmpOutAddrMasks, icmp_statistics.IcmpOutAddrMaskReps); + + len += sprintf (buffer + len, + "Tcp: RtoAlgorithm RtoMin RtoMax MaxConn ActiveOpens PassiveOpens AttemptFails EstabResets CurrEstab InSegs OutSegs RetransSegs\n" + "Tcp: %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n", + tcp_statistics.TcpRtoAlgorithm, tcp_statistics.TcpRtoMin, + tcp_statistics.TcpRtoMax, tcp_statistics.TcpMaxConn, + tcp_statistics.TcpActiveOpens, tcp_statistics.TcpPassiveOpens, + tcp_statistics.TcpAttemptFails, tcp_statistics.TcpEstabResets, + tcp_statistics.TcpCurrEstab, tcp_statistics.TcpInSegs, + tcp_statistics.TcpOutSegs, tcp_statistics.TcpRetransSegs); + + len += sprintf (buffer + len, + "Udp: InDatagrams NoPorts InErrors OutDatagrams\nUdp: %lu %lu %lu %lu\n", + udp_statistics.UdpInDatagrams, udp_statistics.UdpNoPorts, + udp_statistics.UdpInErrors, udp_statistics.UdpOutDatagrams); +/* + len += sprintf( buffer + len, + "TCP fast path RX: H2: %ul H1: %ul L: %ul\n", + tcp_rx_hit2,tcp_rx_hit1,tcp_rx_miss); +*/ + + if (offset >= len) + { + *start = buffer; + return 0; + } + *start = buffer + offset; + len -= offset; + if (len > length) + len = length; + return len; +} + diff --git a/pfinet/linux-inet/protocol.c b/pfinet/linux-inet/protocol.c new file mode 100644 index 00000000..a47d27cd --- /dev/null +++ b/pfinet/linux-inet/protocol.c @@ -0,0 +1,177 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * INET protocol dispatch tables. + * + * Version: @(#)protocol.c 1.0.5 05/25/93 + * + * Authors: Ross Biro, <bir7@leland.Stanford.Edu> + * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> + * + * Fixes: + * Alan Cox : Ahah! udp icmp errors don't work because + * udp_err is never called! + * Alan Cox : Added new fields for init and ready for + * proper fragmentation (_NO_ 4K limits!) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <asm/segment.h> +#include <asm/system.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/string.h> +#include <linux/config.h> +#include <linux/socket.h> +#include <linux/in.h> +#include <linux/inet.h> +#include <linux/netdevice.h> +#include <linux/timer.h> +#include "ip.h" +#include "protocol.h" +#include "tcp.h" +#include <linux/skbuff.h> +#include "sock.h" +#include "icmp.h" +#include "udp.h" +#include <linux/igmp.h> + + +static struct inet_protocol tcp_protocol = { + tcp_rcv, /* TCP handler */ + NULL, /* No fragment handler (and won't be for a long time) */ + tcp_err, /* TCP error control */ + NULL, /* next */ + IPPROTO_TCP, /* protocol ID */ + 0, /* copy */ + NULL, /* data */ + "TCP" /* name */ +}; + + +static struct inet_protocol udp_protocol = { + udp_rcv, /* UDP handler */ + NULL, /* Will be UDP fraglist handler */ + udp_err, /* UDP error control */ + &tcp_protocol, /* next */ + IPPROTO_UDP, /* protocol ID */ + 0, /* copy */ + NULL, /* data */ + "UDP" /* name */ +}; + + +static struct inet_protocol icmp_protocol = { + icmp_rcv, /* ICMP handler */ + NULL, /* ICMP never fragments anyway */ + NULL, /* ICMP error control */ + &udp_protocol, /* next */ + IPPROTO_ICMP, /* protocol ID */ + 0, /* copy */ + NULL, /* data */ + "ICMP" /* name */ +}; + +#ifndef CONFIG_IP_MULTICAST +struct inet_protocol *inet_protocol_base = &icmp_protocol; +#else +static struct inet_protocol igmp_protocol = { + igmp_rcv, /* IGMP handler */ + NULL, /* IGMP never fragments anyway */ + NULL, /* IGMP error control */ + &icmp_protocol, /* next */ + IPPROTO_IGMP, /* protocol ID */ + 0, /* copy */ + NULL, /* data */ + "IGMP" /* name */ +}; + +struct inet_protocol *inet_protocol_base = &igmp_protocol; +#endif + +struct inet_protocol *inet_protos[MAX_INET_PROTOS] = { + NULL +}; + + +struct inet_protocol * +inet_get_protocol(unsigned char prot) +{ + unsigned char hash; + struct inet_protocol *p; + + hash = prot & (MAX_INET_PROTOS - 1); + for (p = inet_protos[hash] ; p != NULL; p=p->next) { + if (p->protocol == prot) return((struct inet_protocol *) p); + } + return(NULL); +} + + +void +inet_add_protocol(struct inet_protocol *prot) +{ + unsigned char hash; + struct inet_protocol *p2; + + hash = prot->protocol & (MAX_INET_PROTOS - 1); + prot ->next = inet_protos[hash]; + inet_protos[hash] = prot; + prot->copy = 0; + + /* Set the copy bit if we need to. */ + p2 = (struct inet_protocol *) prot->next; + while(p2 != NULL) { + if (p2->protocol == prot->protocol) { + prot->copy = 1; + break; + } + p2 = (struct inet_protocol *) prot->next; + } +} + + +int +inet_del_protocol(struct inet_protocol *prot) +{ + struct inet_protocol *p; + struct inet_protocol *lp = NULL; + unsigned char hash; + + hash = prot->protocol & (MAX_INET_PROTOS - 1); + if (prot == inet_protos[hash]) { + inet_protos[hash] = (struct inet_protocol *) inet_protos[hash]->next; + return(0); + } + + p = (struct inet_protocol *) inet_protos[hash]; + while(p != NULL) { + /* + * We have to worry if the protocol being deleted is + * the last one on the list, then we may need to reset + * someone's copied bit. + */ + if (p->next != NULL && p->next == prot) { + /* + * if we are the last one with this protocol and + * there is a previous one, reset its copy bit. + */ + if (p->copy == 0 && lp != NULL) lp->copy = 0; + p->next = prot->next; + return(0); + } + + if (p->next != NULL && p->next->protocol == prot->protocol) { + lp = p; + } + + p = (struct inet_protocol *) p->next; + } + return(-1); +} diff --git a/pfinet/linux-inet/protocol.h b/pfinet/linux-inet/protocol.h new file mode 100644 index 00000000..3e0b6fb3 --- /dev/null +++ b/pfinet/linux-inet/protocol.h @@ -0,0 +1,59 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Definitions for the protocol dispatcher. + * + * Version: @(#)protocol.h 1.0.2 05/07/93 + * + * Author: Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Changes: + * Alan Cox : Added a name field and a frag handler + * field for later. + */ + +#ifndef _PROTOCOL_H +#define _PROTOCOL_H + + +#define MAX_INET_PROTOS 32 /* Must be a power of 2 */ + + +/* This is used to register protocols. */ +struct inet_protocol { + int (*handler)(struct sk_buff *skb, struct device *dev, + struct options *opt, unsigned long daddr, + unsigned short len, unsigned long saddr, + int redo, struct inet_protocol *protocol); + int (*frag_handler)(struct sk_buff *skb, struct device *dev, + struct options *opt, unsigned long daddr, + unsigned short len, unsigned long saddr, + int redo, struct inet_protocol *protocol); + void (*err_handler)(int err, unsigned char *buff, + unsigned long daddr, + unsigned long saddr, + struct inet_protocol *protocol); + struct inet_protocol *next; + unsigned char protocol; + unsigned char copy:1; + void *data; + char *name; +}; + + +extern struct inet_protocol *inet_protocol_base; +extern struct inet_protocol *inet_protos[MAX_INET_PROTOS]; + + +extern void inet_add_protocol(struct inet_protocol *prot); +extern int inet_del_protocol(struct inet_protocol *prot); + + +#endif /* _PROTOCOL_H */ diff --git a/pfinet/linux-inet/psnap.c b/pfinet/linux-inet/psnap.c new file mode 100644 index 00000000..287b3353 --- /dev/null +++ b/pfinet/linux-inet/psnap.c @@ -0,0 +1,123 @@ +/* + * SNAP data link layer. Derived from 802.2 + * + * Alan Cox <Alan.Cox@linux.org>, from the 802.2 layer by Greg Page. + * Merged in additions from Greg Page's psnap.c. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include "datalink.h" +#include "p8022.h" +#include "psnap.h" +#include <linux/mm.h> +#include <linux/in.h> + +static struct datalink_proto *snap_list = NULL; +static struct datalink_proto *snap_dl = NULL; /* 802.2 DL for SNAP */ + +/* + * Find a snap client by matching the 5 bytes. + */ + +static struct datalink_proto *find_snap_client(unsigned char *desc) +{ + struct datalink_proto *proto; + + for (proto = snap_list; proto != NULL && memcmp(proto->type, desc, 5) ; proto = proto->next); + return proto; +} + +/* + * A SNAP packet has arrived + */ + +int snap_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt) +{ + static struct packet_type psnap_packet_type = + { + 0, + NULL, /* All Devices */ + snap_rcv, + NULL, + NULL, + }; + + struct datalink_proto *proto; + + proto = find_snap_client(skb->h.raw); + if (proto != NULL) + { + /* + * Pass the frame on. + */ + + skb->h.raw += 5; + skb->len -= 5; + if (psnap_packet_type.type == 0) + psnap_packet_type.type=htons(ETH_P_SNAP); + return proto->rcvfunc(skb, dev, &psnap_packet_type); + } + skb->sk = NULL; + kfree_skb(skb, FREE_READ); + return 0; +} + +/* + * Put a SNAP header on a frame and pass to 802.2 + */ + +static void snap_datalink_header(struct datalink_proto *dl, struct sk_buff *skb, unsigned char *dest_node) +{ + struct device *dev = skb->dev; + unsigned char *rawp; + + rawp = skb->data + snap_dl->header_length+dev->hard_header_len; + memcpy(rawp,dl->type,5); + skb->h.raw = rawp+5; + snap_dl->datalink_header(snap_dl, skb, dest_node); +} + +/* + * Set up the SNAP layer + */ + +void snap_proto_init(struct net_proto *pro) +{ + snap_dl=register_8022_client(0xAA, snap_rcv); + if(snap_dl==NULL) + printk("SNAP - unable to register with 802.2\n"); +} + +/* + * Register SNAP clients. We don't yet use this for IP or IPX. + */ + +struct datalink_proto *register_snap_client(unsigned char *desc, int (*rcvfunc)(struct sk_buff *, struct device *, struct packet_type *)) +{ + struct datalink_proto *proto; + + if (find_snap_client(desc) != NULL) + return NULL; + + proto = (struct datalink_proto *) kmalloc(sizeof(*proto), GFP_ATOMIC); + if (proto != NULL) + { + memcpy(proto->type, desc,5); + proto->type_len = 5; + proto->rcvfunc = rcvfunc; + proto->header_length = 5+snap_dl->header_length; + proto->datalink_header = snap_datalink_header; + proto->string_name = "SNAP"; + proto->next = snap_list; + snap_list = proto; + } + + return proto; +} + diff --git a/pfinet/linux-inet/psnap.h b/pfinet/linux-inet/psnap.h new file mode 100644 index 00000000..b69859db --- /dev/null +++ b/pfinet/linux-inet/psnap.h @@ -0,0 +1,2 @@ +struct datalink_proto *register_snap_client(unsigned char *desc, int (*rcvfunc)(struct sk_buff *, struct device *, struct packet_type *)); + diff --git a/pfinet/linux-inet/psnapcall.h b/pfinet/linux-inet/psnapcall.h new file mode 100644 index 00000000..9da5763c --- /dev/null +++ b/pfinet/linux-inet/psnapcall.h @@ -0,0 +1,2 @@ +/* Separate to keep compilation of Space.c simpler */ +extern void snap_proto_init(struct net_proto *); diff --git a/pfinet/linux-inet/rarp.c b/pfinet/linux-inet/rarp.c new file mode 100644 index 00000000..72924bb2 --- /dev/null +++ b/pfinet/linux-inet/rarp.c @@ -0,0 +1,491 @@ +/* linux/net/inet/rarp.c + * + * Copyright (C) 1994 by Ross Martin + * Based on linux/net/inet/arp.c, Copyright (C) 1994 by Florian La Roche + * + * This module implements the Reverse Address Resolution Protocol + * (RARP, RFC 903), which is used to convert low level addresses such + * as ethernet addresses into high level addresses such as IP addresses. + * The most common use of RARP is as a means for a diskless workstation + * to discover its IP address during a network boot. + * + ** + *** WARNING:::::::::::::::::::::::::::::::::WARNING + **** + ***** SUN machines seem determined to boot solely from the person who + **** answered their RARP query. NEVER add a SUN to your RARP table + *** unless you have all the rest to boot the box from it. + ** + * + * Currently, only ethernet address -> IP address is likely to work. + * (Is RARP ever used for anything else?) + * + * This code is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/types.h> +#include <linux/string.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/config.h> +#include <linux/socket.h> +#include <linux/sockios.h> +#include <linux/errno.h> +#include <linux/if_arp.h> +#include <linux/in.h> +#include <asm/system.h> +#include <asm/segment.h> +#include <stdarg.h> +#include <linux/inet.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include "ip.h" +#include "route.h" +#include "protocol.h" +#include "tcp.h" +#include <linux/skbuff.h> +#include "sock.h" +#include "arp.h" +#include "rarp.h" +#ifdef CONFIG_AX25 +#include "ax25.h" +#endif + +#ifdef CONFIG_INET_RARP + +/* + * This structure defines the RARP mapping cache. As long as we make + * changes in this structure, we keep interrupts off. + */ + +struct rarp_table +{ + struct rarp_table *next; /* Linked entry list */ + unsigned long ip; /* ip address of entry */ + unsigned char ha[MAX_ADDR_LEN]; /* Hardware address */ + unsigned char hlen; /* Length of hardware address */ + unsigned char htype; /* Type of hardware in use */ + struct device *dev; /* Device the entry is tied to */ +}; + +struct rarp_table *rarp_tables = NULL; + + +static struct packet_type rarp_packet_type = +{ + 0, /* Should be: __constant_htons(ETH_P_RARP) - but this _doesn't_ come out constant! */ + 0, /* copy */ + rarp_rcv, + NULL, + NULL +}; + +static initflag = 1; + +/* + * Called once when data first added to rarp cache with ioctl. + */ + +static void rarp_init (void) +{ + /* Register the packet type */ + rarp_packet_type.type=htons(ETH_P_RARP); + dev_add_pack(&rarp_packet_type); +} + +/* + * Release the memory for this entry. + */ + +static inline void rarp_release_entry(struct rarp_table *entry) +{ + kfree_s(entry, sizeof(struct rarp_table)); + return; +} + +/* + * Delete a RARP mapping entry in the cache. + */ + +static void rarp_destroy(unsigned long ip_addr) +{ + struct rarp_table *entry; + struct rarp_table **pentry; + + cli(); + pentry = &rarp_tables; + while ((entry = *pentry) != NULL) + { + if (entry->ip == ip_addr) + { + *pentry = entry->next; + sti(); + rarp_release_entry(entry); + return; + } + pentry = &entry->next; + } + sti(); +} + + +/* + * Receive an arp request by the device layer. Maybe it should be + * rewritten to use the incoming packet for the reply. The current + * "overhead" time isn't that high... + */ + +int rarp_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt) +{ +/* + * We shouldn't use this type conversion. Check later. + */ + struct arphdr *rarp = (struct arphdr *)skb->h.raw; + unsigned char *rarp_ptr = (unsigned char *)(rarp+1); + struct rarp_table *entry; + long sip,tip; + unsigned char *sha,*tha; /* s for "source", t for "target" */ + +/* + * If this test doesn't pass, it's not IP, or we should ignore it anyway + */ + + if (rarp->ar_hln != dev->addr_len || dev->type != ntohs(rarp->ar_hrd) + || dev->flags&IFF_NOARP) + { + kfree_skb(skb, FREE_READ); + return 0; + } + +/* + * If it's not a RARP request, delete it. + */ + if (rarp->ar_op != htons(ARPOP_RREQUEST)) + { + kfree_skb(skb, FREE_READ); + return 0; + } + +/* + * For now we will only deal with IP addresses. + */ + + if ( +#ifdef CONFIG_AX25 + (rarp->ar_pro != htons(AX25_P_IP) && dev->type == ARPHRD_AX25) || +#endif + (rarp->ar_pro != htons(ETH_P_IP) && dev->type != ARPHRD_AX25) + || rarp->ar_pln != 4) + { + /* + * This packet is not for us. Remove it. + */ + kfree_skb(skb, FREE_READ); + return 0; +} + +/* + * Extract variable width fields + */ + + sha=rarp_ptr; + rarp_ptr+=dev->addr_len; + memcpy(&sip,rarp_ptr,4); + rarp_ptr+=4; + tha=rarp_ptr; + rarp_ptr+=dev->addr_len; + memcpy(&tip,rarp_ptr,4); + +/* + * Process entry. Use tha for table lookup according to RFC903. + */ + + cli(); + for (entry = rarp_tables; entry != NULL; entry = entry->next) + if (!memcmp(entry->ha, tha, rarp->ar_hln)) + break; + + if (entry != NULL) + { + sip=entry->ip; + sti(); + + arp_send(ARPOP_RREPLY, ETH_P_RARP, sip, dev, dev->pa_addr, sha, + dev->dev_addr); + } + else + sti(); + + kfree_skb(skb, FREE_READ); + return 0; +} + + +/* + * Set (create) a RARP cache entry. + */ + +static int rarp_req_set(struct arpreq *req) +{ + struct arpreq r; + struct rarp_table *entry; + struct sockaddr_in *si; + int htype, hlen; + unsigned long ip; + struct rtable *rt; + + memcpy_fromfs(&r, req, sizeof(r)); + + /* + * We only understand about IP addresses... + */ + + if (r.arp_pa.sa_family != AF_INET) + return -EPFNOSUPPORT; + + switch (r.arp_ha.sa_family) + { + case ARPHRD_ETHER: + htype = ARPHRD_ETHER; + hlen = ETH_ALEN; + break; +#ifdef CONFIG_AX25 + case ARPHRD_AX25: + htype = ARPHRD_AX25; + hlen = 7; + break; +#endif + default: + return -EPFNOSUPPORT; + } + + si = (struct sockaddr_in *) &r.arp_pa; + ip = si->sin_addr.s_addr; + if (ip == 0) + { + printk("RARP: SETRARP: requested PA is 0.0.0.0 !\n"); + return -EINVAL; + } + +/* + * Is it reachable directly ? + */ + + rt = ip_rt_route(ip, NULL, NULL); + if (rt == NULL) + return -ENETUNREACH; + +/* + * Is there an existing entry for this address? Find out... + */ + + cli(); + for (entry = rarp_tables; entry != NULL; entry = entry->next) + if (entry->ip == ip) + break; + +/* + * If no entry was found, create a new one. + */ + + if (entry == NULL) + { + entry = (struct rarp_table *) kmalloc(sizeof(struct rarp_table), + GFP_ATOMIC); + if (entry == NULL) + { + sti(); + return -ENOMEM; + } + if(initflag) + { + rarp_init(); + initflag=0; + } + + entry->next = rarp_tables; + rarp_tables = entry; + } + + entry->ip = ip; + entry->hlen = hlen; + entry->htype = htype; + memcpy(&entry->ha, &r.arp_ha.sa_data, hlen); + entry->dev = rt->rt_dev; + + sti(); + + return 0; +} + + +/* + * Get a RARP cache entry. + */ + +static int rarp_req_get(struct arpreq *req) +{ + struct arpreq r; + struct rarp_table *entry; + struct sockaddr_in *si; + unsigned long ip; + +/* + * We only understand about IP addresses... + */ + + memcpy_fromfs(&r, req, sizeof(r)); + + if (r.arp_pa.sa_family != AF_INET) + return -EPFNOSUPPORT; + +/* + * Is there an existing entry for this address? + */ + + si = (struct sockaddr_in *) &r.arp_pa; + ip = si->sin_addr.s_addr; + + cli(); + for (entry = rarp_tables; entry != NULL; entry = entry->next) + if (entry->ip == ip) + break; + + if (entry == NULL) + { + sti(); + return -ENXIO; + } + +/* + * We found it; copy into structure. + */ + + memcpy(r.arp_ha.sa_data, &entry->ha, entry->hlen); + r.arp_ha.sa_family = entry->htype; + sti(); + +/* + * Copy the information back + */ + + memcpy_tofs(req, &r, sizeof(r)); + return 0; +} + + +/* + * Handle a RARP layer I/O control request. + */ + +int rarp_ioctl(unsigned int cmd, void *arg) +{ + struct arpreq r; + struct sockaddr_in *si; + int err; + + switch(cmd) + { + case SIOCDRARP: + if (!suser()) + return -EPERM; + err = verify_area(VERIFY_READ, arg, sizeof(struct arpreq)); + if(err) + return err; + memcpy_fromfs(&r, arg, sizeof(r)); + if (r.arp_pa.sa_family != AF_INET) + return -EPFNOSUPPORT; + si = (struct sockaddr_in *) &r.arp_pa; + rarp_destroy(si->sin_addr.s_addr); + return 0; + + case SIOCGRARP: + err = verify_area(VERIFY_WRITE, arg, sizeof(struct arpreq)); + if(err) + return err; + return rarp_req_get((struct arpreq *)arg); + case SIOCSRARP: + if (!suser()) + return -EPERM; + err = verify_area(VERIFY_READ, arg, sizeof(struct arpreq)); + if(err) + return err; + return rarp_req_set((struct arpreq *)arg); + default: + return -EINVAL; + } + + /*NOTREACHED*/ + return 0; +} + +int rarp_get_info(char *buffer, char **start, off_t offset, int length) +{ + int len=0; + off_t begin=0; + off_t pos=0; + int size; + struct rarp_table *entry; + char ipbuffer[20]; + unsigned long netip; + if(initflag) + { + size = sprintf(buffer,"RARP disabled until entries added to cache.\n"); + pos+=size; + len+=size; + } + else + { + size = sprintf(buffer, + "IP address HW type HW address\n"); + pos+=size; + len+=size; + + cli(); + for(entry=rarp_tables; entry!=NULL; entry=entry->next) + { + netip=htonl(entry->ip); /* switch to network order */ + sprintf(ipbuffer,"%d.%d.%d.%d", + (unsigned int)(netip>>24)&255, + (unsigned int)(netip>>16)&255, + (unsigned int)(netip>>8)&255, + (unsigned int)(netip)&255); + + size = sprintf(buffer+len, + "%-17s%-20s%02x:%02x:%02x:%02x:%02x:%02x\n", + ipbuffer, + "10Mbps Ethernet", + (unsigned int)entry->ha[0], + (unsigned int)entry->ha[1], + (unsigned int)entry->ha[2], + (unsigned int)entry->ha[3], + (unsigned int)entry->ha[4], + (unsigned int)entry->ha[5]); + + len+=size; + pos=begin+len; + + if(pos<offset) + { + len=0; + begin=pos; + } + if(pos>offset+length) + break; + } + sti(); + } + + *start=buffer+(offset-begin); /* Start of wanted data */ + len-=(offset-begin); /* Start slop */ + if(len>length) + len=length; /* Ending slop */ + return len; +} + +#endif diff --git a/pfinet/linux-inet/rarp.h b/pfinet/linux-inet/rarp.h new file mode 100644 index 00000000..02ee7784 --- /dev/null +++ b/pfinet/linux-inet/rarp.h @@ -0,0 +1,14 @@ +/* linux/net/inet/rarp.h */ +#ifndef _RARP_H +#define _RARP_H + +extern int rarp_ioctl(unsigned int cmd, void *arg); +extern int rarp_rcv(struct sk_buff *skb, + struct device *dev, + struct packet_type *pt); +extern int rarp_get_info(char *buffer, + char **start, + off_t offset, + int length); +#endif /* _RARP_H */ + diff --git a/pfinet/linux-inet/raw.c b/pfinet/linux-inet/raw.c new file mode 100644 index 00000000..b7d34a37 --- /dev/null +++ b/pfinet/linux-inet/raw.c @@ -0,0 +1,319 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * RAW - implementation of IP "raw" sockets. + * + * Version: @(#)raw.c 1.0.4 05/25/93 + * + * Authors: Ross Biro, <bir7@leland.Stanford.Edu> + * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> + * + * Fixes: + * Alan Cox : verify_area() fixed up + * Alan Cox : ICMP error handling + * Alan Cox : EMSGSIZE if you send too big a packet + * Alan Cox : Now uses generic datagrams and shared skbuff + * library. No more peek crashes, no more backlogs + * Alan Cox : Checks sk->broadcast. + * Alan Cox : Uses skb_free_datagram/skb_copy_datagram + * Alan Cox : Raw passes ip options too + * Alan Cox : Setsocketopt added + * Alan Cox : Fixed error return for broadcasts + * Alan Cox : Removed wake_up calls + * Alan Cox : Use ttl/tos + * Alan Cox : Cleaned up old debugging + * Alan Cox : Use new kernel side addresses + * Arnt Gulbrandsen : Fixed MSG_DONTROUTE in raw sockets. + * Alan Cox : BSD style RAW socket demultiplexing. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <asm/system.h> +#include <asm/segment.h> +#include <linux/types.h> +#include <linux/sched.h> +#include <linux/errno.h> +#include <linux/timer.h> +#include <linux/mm.h> +#include <linux/kernel.h> +#include <linux/fcntl.h> +#include <linux/socket.h> +#include <linux/in.h> +#include <linux/inet.h> +#include <linux/netdevice.h> +#include "ip.h" +#include "protocol.h" +#include <linux/skbuff.h> +#include "sock.h" +#include "icmp.h" +#include "udp.h" + + +static inline unsigned long min(unsigned long a, unsigned long b) +{ + if (a < b) + return(a); + return(b); +} + + +/* raw_err gets called by the icmp module. */ +void raw_err (int err, unsigned char *header, unsigned long daddr, + unsigned long saddr, struct inet_protocol *protocol) +{ + struct sock *sk; + + if (protocol == NULL) + return; + sk = (struct sock *) protocol->data; + if (sk == NULL) + return; + + /* This is meaningless in raw sockets. */ + if (err & 0xff00 == (ICMP_SOURCE_QUENCH << 8)) + { + if (sk->cong_window > 1) sk->cong_window = sk->cong_window/2; + return; + } + + sk->err = icmp_err_convert[err & 0xff].error; + sk->error_report(sk); + + return; +} + + +/* + * This should be the easiest of all, all we do is + * copy it into a buffer. All demultiplexing is done + * in ip.c + */ + +int raw_rcv(struct sock *sk, struct sk_buff *skb, struct device *dev, long saddr, long daddr) +{ + /* Now we need to copy this into memory. */ + skb->sk = sk; + skb->len = ntohs(skb->ip_hdr->tot_len); + skb->h.raw = (unsigned char *) skb->ip_hdr; + skb->dev = dev; + skb->saddr = daddr; + skb->daddr = saddr; + + /* Charge it to the socket. */ + + if(sock_queue_rcv_skb(sk,skb)<0) + { + ip_statistics.IpInDiscards++; + skb->sk=NULL; + kfree_skb(skb, FREE_READ); + return(0); + } + + ip_statistics.IpInDelivers++; + release_sock(sk); + return(0); +} + +/* + * Send a RAW IP packet. + */ + +static int raw_sendto(struct sock *sk, unsigned char *from, + int len, int noblock, unsigned flags, struct sockaddr_in *usin, int addr_len) +{ + struct sk_buff *skb; + struct device *dev=NULL; + struct sockaddr_in sin; + int tmp; + int err; + + /* + * Check the flags. Only MSG_DONTROUTE is permitted. + */ + + if (flags & MSG_OOB) /* Mirror BSD error message compatibility */ + return -EOPNOTSUPP; + + if (flags & ~MSG_DONTROUTE) + return(-EINVAL); + /* + * Get and verify the address. + */ + + if (usin) + { + if (addr_len < sizeof(sin)) + return(-EINVAL); + memcpy(&sin, usin, sizeof(sin)); + if (sin.sin_family && sin.sin_family != AF_INET) + return(-EINVAL); + } + else + { + if (sk->state != TCP_ESTABLISHED) + return(-EINVAL); + sin.sin_family = AF_INET; + sin.sin_port = sk->protocol; + sin.sin_addr.s_addr = sk->daddr; + } + if (sin.sin_port == 0) + sin.sin_port = sk->protocol; + + if (sin.sin_addr.s_addr == INADDR_ANY) + sin.sin_addr.s_addr = ip_my_addr(); + + if (sk->broadcast == 0 && ip_chk_addr(sin.sin_addr.s_addr)==IS_BROADCAST) + return -EACCES; + + skb=sock_alloc_send_skb(sk, len+sk->prot->max_header, noblock, &err); + if(skb==NULL) + return err; + + skb->sk = sk; + skb->free = 1; + skb->localroute = sk->localroute | (flags&MSG_DONTROUTE); + + tmp = sk->prot->build_header(skb, sk->saddr, + sin.sin_addr.s_addr, &dev, + sk->protocol, sk->opt, skb->mem_len, sk->ip_tos,sk->ip_ttl); + if (tmp < 0) + { + kfree_skb(skb,FREE_WRITE); + release_sock(sk); + return(tmp); + } + + memcpy_fromfs(skb->data + tmp, from, len); + + /* + * If we are using IPPROTO_RAW, we need to fill in the source address in + * the IP header + */ + + if(sk->protocol==IPPROTO_RAW) + { + unsigned char *buff; + struct iphdr *iph; + + buff = skb->data; + buff += tmp; + + iph = (struct iphdr *)buff; + iph->saddr = sk->saddr; + } + + skb->len = tmp + len; + + sk->prot->queue_xmit(sk, dev, skb, 1); + release_sock(sk); + return(len); +} + + +static int raw_write(struct sock *sk, unsigned char *buff, int len, int noblock, + unsigned flags) +{ + return(raw_sendto(sk, buff, len, noblock, flags, NULL, 0)); +} + + +static void raw_close(struct sock *sk, int timeout) +{ + sk->state = TCP_CLOSE; +} + + +static int raw_init(struct sock *sk) +{ + return(0); +} + + +/* + * This should be easy, if there is something there + * we return it, otherwise we block. + */ + +int raw_recvfrom(struct sock *sk, unsigned char *to, int len, + int noblock, unsigned flags, struct sockaddr_in *sin, + int *addr_len) +{ + int copied=0; + struct sk_buff *skb; + int err; + int truesize; + + if (flags & MSG_OOB) + return -EOPNOTSUPP; + + if (sk->shutdown & RCV_SHUTDOWN) + return(0); + + if (addr_len) + *addr_len=sizeof(*sin); + + skb=skb_recv_datagram(sk,flags,noblock,&err); + if(skb==NULL) + return err; + + truesize=skb->len; + copied = min(len, truesize); + + skb_copy_datagram(skb, 0, to, copied); + sk->stamp=skb->stamp; + + /* Copy the address. */ + if (sin) + { + sin->sin_family = AF_INET; + sin->sin_addr.s_addr = skb->daddr; + } + skb_free_datagram(skb); + release_sock(sk); + return (truesize); /* len not copied. BSD returns the true size of the message so you know a bit fell off! */ +} + + +int raw_read (struct sock *sk, unsigned char *buff, int len, int noblock,unsigned flags) +{ + return(raw_recvfrom(sk, buff, len, noblock, flags, NULL, NULL)); +} + + +struct proto raw_prot = { + sock_wmalloc, + sock_rmalloc, + sock_wfree, + sock_rfree, + sock_rspace, + sock_wspace, + raw_close, + raw_read, + raw_write, + raw_sendto, + raw_recvfrom, + ip_build_header, + udp_connect, + NULL, + ip_queue_xmit, + NULL, + NULL, + NULL, + NULL, + datagram_select, + NULL, + raw_init, + NULL, + ip_setsockopt, + ip_getsockopt, + 128, + 0, + {NULL,}, + "RAW", + 0, 0 +}; diff --git a/pfinet/linux-inet/raw.h b/pfinet/linux-inet/raw.h new file mode 100644 index 00000000..8f1cf0c2 --- /dev/null +++ b/pfinet/linux-inet/raw.h @@ -0,0 +1,34 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Definitions for the RAW-IP module. + * + * Version: @(#)raw.h 1.0.2 05/07/93 + * + * Author: Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _RAW_H +#define _RAW_H + + +extern struct proto raw_prot; + + +extern void raw_err(int err, unsigned char *header, unsigned long daddr, + unsigned long saddr, struct inet_protocol *protocol); +extern int raw_recvfrom(struct sock *sk, unsigned char *to, + int len, int noblock, unsigned flags, + struct sockaddr_in *sin, int *addr_len); +extern int raw_read(struct sock *sk, unsigned char *buff, + int len, int noblock, unsigned flags); +extern int raw_rcv(struct sock *, struct sk_buff *, struct device *, + long, long); + +#endif /* _RAW_H */ diff --git a/pfinet/linux-inet/route.c b/pfinet/linux-inet/route.c new file mode 100644 index 00000000..ce06dcfe --- /dev/null +++ b/pfinet/linux-inet/route.c @@ -0,0 +1,684 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * ROUTE - implementation of the IP router. + * + * Version: @(#)route.c 1.0.14 05/31/93 + * + * Authors: Ross Biro, <bir7@leland.Stanford.Edu> + * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> + * Alan Cox, <gw4pts@gw4pts.ampr.org> + * Linus Torvalds, <Linus.Torvalds@helsinki.fi> + * + * Fixes: + * Alan Cox : Verify area fixes. + * Alan Cox : cli() protects routing changes + * Rui Oliveira : ICMP routing table updates + * (rco@di.uminho.pt) Routing table insertion and update + * Linus Torvalds : Rewrote bits to be sensible + * Alan Cox : Added BSD route gw semantics + * Alan Cox : Super /proc >4K + * Alan Cox : MTU in route table + * Alan Cox : MSS actually. Also added the window + * clamper. + * Sam Lantinga : Fixed route matching in rt_del() + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <asm/segment.h> +#include <asm/system.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/string.h> +#include <linux/socket.h> +#include <linux/sockios.h> +#include <linux/errno.h> +#include <linux/in.h> +#include <linux/inet.h> +#include <linux/netdevice.h> +#include "ip.h" +#include "protocol.h" +#include "route.h" +#include "tcp.h" +#include <linux/skbuff.h> +#include "sock.h" +#include "icmp.h" + +/* + * The routing table list + */ + +static struct rtable *rt_base = NULL; + +/* + * Pointer to the loopback route + */ + +static struct rtable *rt_loopback = NULL; + +/* + * Remove a routing table entry. + */ + +static void rt_del(unsigned long dst, char *devname) +{ + struct rtable *r, **rp; + unsigned long flags; + + rp = &rt_base; + + /* + * This must be done with interrupts off because we could take + * an ICMP_REDIRECT. + */ + + save_flags(flags); + cli(); + while((r = *rp) != NULL) + { + /* Make sure both the destination and the device match */ + if ( r->rt_dst != dst || + (devname != NULL && strcmp((r->rt_dev)->name,devname) != 0) ) + { + rp = &r->rt_next; + continue; + } + *rp = r->rt_next; + + /* + * If we delete the loopback route update its pointer. + */ + + if (rt_loopback == r) + rt_loopback = NULL; + kfree_s(r, sizeof(struct rtable)); + } + restore_flags(flags); +} + + +/* + * Remove all routing table entries for a device. This is called when + * a device is downed. + */ + +void ip_rt_flush(struct device *dev) +{ + struct rtable *r; + struct rtable **rp; + unsigned long flags; + + rp = &rt_base; + save_flags(flags); + cli(); + while ((r = *rp) != NULL) { + if (r->rt_dev != dev) { + rp = &r->rt_next; + continue; + } + *rp = r->rt_next; + if (rt_loopback == r) + rt_loopback = NULL; + kfree_s(r, sizeof(struct rtable)); + } + restore_flags(flags); +} + +/* + * Used by 'rt_add()' when we can't get the netmask any other way.. + * + * If the lower byte or two are zero, we guess the mask based on the + * number of zero 8-bit net numbers, otherwise we use the "default" + * masks judging by the destination address and our device netmask. + */ + +static inline unsigned long default_mask(unsigned long dst) +{ + dst = ntohl(dst); + if (IN_CLASSA(dst)) + return htonl(IN_CLASSA_NET); + if (IN_CLASSB(dst)) + return htonl(IN_CLASSB_NET); + return htonl(IN_CLASSC_NET); +} + + +/* + * If no mask is specified then generate a default entry. + */ + +static unsigned long guess_mask(unsigned long dst, struct device * dev) +{ + unsigned long mask; + + if (!dst) + return 0; + mask = default_mask(dst); + if ((dst ^ dev->pa_addr) & mask) + return mask; + return dev->pa_mask; +} + + +/* + * Find the route entry through which our gateway will be reached + */ + +static inline struct device * get_gw_dev(unsigned long gw) +{ + struct rtable * rt; + + for (rt = rt_base ; ; rt = rt->rt_next) + { + if (!rt) + return NULL; + if ((gw ^ rt->rt_dst) & rt->rt_mask) + continue; + /* + * Gateways behind gateways are a no-no + */ + + if (rt->rt_flags & RTF_GATEWAY) + return NULL; + return rt->rt_dev; + } +} + +/* + * Rewrote rt_add(), as the old one was weird - Linus + * + * This routine is used to update the IP routing table, either + * from the kernel (ICMP_REDIRECT) or via an ioctl call issued + * by the superuser. + */ + +void ip_rt_add(short flags, unsigned long dst, unsigned long mask, + unsigned long gw, struct device *dev, unsigned short mtu, unsigned long window) +{ + struct rtable *r, *rt; + struct rtable **rp; + unsigned long cpuflags; + + /* + * A host is a unique machine and has no network bits. + */ + + if (flags & RTF_HOST) + { + mask = 0xffffffff; + } + + /* + * Calculate the network mask + */ + + else if (!mask) + { + if (!((dst ^ dev->pa_addr) & dev->pa_mask)) + { + mask = dev->pa_mask; + flags &= ~RTF_GATEWAY; + if (flags & RTF_DYNAMIC) + { + /*printk("Dynamic route to my own net rejected\n");*/ + return; + } + } + else + mask = guess_mask(dst, dev); + dst &= mask; + } + + /* + * A gateway must be reachable and not a local address + */ + + if (gw == dev->pa_addr) + flags &= ~RTF_GATEWAY; + + if (flags & RTF_GATEWAY) + { + /* + * Don't try to add a gateway we can't reach.. + */ + + if (dev != get_gw_dev(gw)) + return; + + flags |= RTF_GATEWAY; + } + else + gw = 0; + + /* + * Allocate an entry and fill it in. + */ + + rt = (struct rtable *) kmalloc(sizeof(struct rtable), GFP_ATOMIC); + if (rt == NULL) + { + return; + } + memset(rt, 0, sizeof(struct rtable)); + rt->rt_flags = flags | RTF_UP; + rt->rt_dst = dst; + rt->rt_dev = dev; + rt->rt_gateway = gw; + rt->rt_mask = mask; + rt->rt_mss = dev->mtu - HEADER_SIZE; + rt->rt_window = 0; /* Default is no clamping */ + + /* Are the MSS/Window valid ? */ + + if(rt->rt_flags & RTF_MSS) + rt->rt_mss = mtu; + + if(rt->rt_flags & RTF_WINDOW) + rt->rt_window = window; + + /* + * What we have to do is loop though this until we have + * found the first address which has a higher generality than + * the one in rt. Then we can put rt in right before it. + * The interrupts must be off for this process. + */ + + save_flags(cpuflags); + cli(); + + /* + * Remove old route if we are getting a duplicate. + */ + + rp = &rt_base; + while ((r = *rp) != NULL) + { + if (r->rt_dst != dst || + r->rt_mask != mask) + { + rp = &r->rt_next; + continue; + } + *rp = r->rt_next; + if (rt_loopback == r) + rt_loopback = NULL; + kfree_s(r, sizeof(struct rtable)); + } + + /* + * Add the new route + */ + + rp = &rt_base; + while ((r = *rp) != NULL) { + if ((r->rt_mask & mask) != mask) + break; + rp = &r->rt_next; + } + rt->rt_next = r; + *rp = rt; + + /* + * Update the loopback route + */ + + if ((rt->rt_dev->flags & IFF_LOOPBACK) && !rt_loopback) + rt_loopback = rt; + + /* + * Restore the interrupts and return + */ + + restore_flags(cpuflags); + return; +} + +/* + * Remove a routing table entry (exported version). + */ +void ip_rt_del (unsigned long dst, struct device *dev) +{ + /* Should probably just copy contents of rt_del and replace name + comparison with device comparsion. */ + rt_del (dst, dev->name); +} + + +/* + * Check if a mask is acceptable. + */ + +static inline int bad_mask(unsigned long mask, unsigned long addr) +{ + if (addr & (mask = ~mask)) + return 1; + mask = ntohl(mask); + if (mask & (mask+1)) + return 1; + return 0; +} + +/* + * Process a route add request from the user + */ + +static int rt_new(struct rtentry *r) +{ + int err; + char * devname; + struct device * dev = NULL; + unsigned long flags, daddr, mask, gw; + + /* + * If a device is specified find it. + */ + + if ((devname = r->rt_dev) != NULL) + { + err = getname(devname, &devname); + if (err) + return err; + dev = dev_get(devname); + putname(devname); + if (!dev) + return -EINVAL; + } + + /* + * If the device isn't INET, don't allow it + */ + + if (r->rt_dst.sa_family != AF_INET) + return -EAFNOSUPPORT; + + /* + * Make local copies of the important bits + */ + + flags = r->rt_flags; + daddr = ((struct sockaddr_in *) &r->rt_dst)->sin_addr.s_addr; + mask = ((struct sockaddr_in *) &r->rt_genmask)->sin_addr.s_addr; + gw = ((struct sockaddr_in *) &r->rt_gateway)->sin_addr.s_addr; + + + /* + * BSD emulation: Permits route add someroute gw one-of-my-addresses + * to indicate which iface. Not as clean as the nice Linux dev technique + * but people keep using it... + */ + + if (!dev && (flags & RTF_GATEWAY)) + { + struct device *dev2; + for (dev2 = dev_base ; dev2 != NULL ; dev2 = dev2->next) + { + if ((dev2->flags & IFF_UP) && dev2->pa_addr == gw) + { + flags &= ~RTF_GATEWAY; + dev = dev2; + break; + } + } + } + + /* + * Ignore faulty masks + */ + + if (bad_mask(mask, daddr)) + mask = 0; + + /* + * Set the mask to nothing for host routes. + */ + + if (flags & RTF_HOST) + mask = 0xffffffff; + else if (mask && r->rt_genmask.sa_family != AF_INET) + return -EAFNOSUPPORT; + + /* + * You can only gateway IP via IP.. + */ + + if (flags & RTF_GATEWAY) + { + if (r->rt_gateway.sa_family != AF_INET) + return -EAFNOSUPPORT; + if (!dev) + dev = get_gw_dev(gw); + } + else if (!dev) + dev = ip_dev_check(daddr); + + /* + * Unknown device. + */ + + if (dev == NULL) + return -ENETUNREACH; + + /* + * Add the route + */ + + ip_rt_add(flags, daddr, mask, gw, dev, r->rt_mss, r->rt_window); + return 0; +} + + +/* + * Remove a route, as requested by the user. + */ + +static int rt_kill(struct rtentry *r) +{ + struct sockaddr_in *trg; + char *devname; + int err; + + trg = (struct sockaddr_in *) &r->rt_dst; + if ((devname = r->rt_dev) != NULL) + { + err = getname(devname, &devname); + if (err) + return err; + } + rt_del(trg->sin_addr.s_addr, devname); + if ( devname != NULL ) + putname(devname); + return 0; +} + + +/* + * Called from the PROCfs module. This outputs /proc/net/route. + */ + +int rt_get_info(char *buffer, char **start, off_t offset, int length) +{ + struct rtable *r; + int len=0; + off_t pos=0; + off_t begin=0; + int size; + + len += sprintf(buffer, + "Iface\tDestination\tGateway \tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\n"); + pos=len; + + /* + * This isn't quite right -- r->rt_dst is a struct! + */ + + for (r = rt_base; r != NULL; r = r->rt_next) + { + size = sprintf(buffer+len, "%s\t%08lX\t%08lX\t%02X\t%d\t%lu\t%d\t%08lX\t%d\t%lu\n", + r->rt_dev->name, r->rt_dst, r->rt_gateway, + r->rt_flags, r->rt_refcnt, r->rt_use, r->rt_metric, + r->rt_mask, (int)r->rt_mss, r->rt_window); + len+=size; + pos+=size; + if(pos<offset) + { + len=0; + begin=pos; + } + if(pos>offset+length) + break; + } + + *start=buffer+(offset-begin); + len-=(offset-begin); + if(len>length) + len=length; + return len; +} + +/* + * This is hackish, but results in better code. Use "-S" to see why. + */ + +#define early_out ({ goto no_route; 1; }) + +/* + * Route a packet. This needs to be fairly quick. Florian & Co. + * suggested a unified ARP and IP routing cache. Done right its + * probably a brilliant idea. I'd actually suggest a unified + * ARP/IP routing/Socket pointer cache. Volunteers welcome + */ + +struct rtable * ip_rt_route(unsigned long daddr, struct options *opt, unsigned long *src_addr) +{ + struct rtable *rt; + + for (rt = rt_base; rt != NULL || early_out ; rt = rt->rt_next) + { + if (!((rt->rt_dst ^ daddr) & rt->rt_mask)) + break; + /* + * broadcast addresses can be special cases.. + */ + if (rt->rt_flags & RTF_GATEWAY) + continue; + if ((rt->rt_dev->flags & IFF_BROADCAST) && + (rt->rt_dev->pa_brdaddr == daddr)) + break; + } + + if(src_addr!=NULL) + *src_addr= rt->rt_dev->pa_addr; + + if (daddr == rt->rt_dev->pa_addr) { + if ((rt = rt_loopback) == NULL) + goto no_route; + } + rt->rt_use++; + return rt; +no_route: + return NULL; +} + +struct rtable * ip_rt_local(unsigned long daddr, struct options *opt, unsigned long *src_addr) +{ + struct rtable *rt; + + for (rt = rt_base; rt != NULL || early_out ; rt = rt->rt_next) + { + /* + * No routed addressing. + */ + if (rt->rt_flags&RTF_GATEWAY) + continue; + + if (!((rt->rt_dst ^ daddr) & rt->rt_mask)) + break; + /* + * broadcast addresses can be special cases.. + */ + + if ((rt->rt_dev->flags & IFF_BROADCAST) && + rt->rt_dev->pa_brdaddr == daddr) + break; + } + + if(src_addr!=NULL) + *src_addr= rt->rt_dev->pa_addr; + + if (daddr == rt->rt_dev->pa_addr) { + if ((rt = rt_loopback) == NULL) + goto no_route; + } + rt->rt_use++; + return rt; +no_route: + return NULL; +} + +/* + * Backwards compatibility + */ + +static int ip_get_old_rtent(struct old_rtentry * src, struct rtentry * rt) +{ + int err; + struct old_rtentry tmp; + + err=verify_area(VERIFY_READ, src, sizeof(*src)); + if (err) + return err; + memcpy_fromfs(&tmp, src, sizeof(*src)); + memset(rt, 0, sizeof(*rt)); + rt->rt_dst = tmp.rt_dst; + rt->rt_gateway = tmp.rt_gateway; + rt->rt_genmask.sa_family = AF_INET; + ((struct sockaddr_in *) &rt->rt_genmask)->sin_addr.s_addr = tmp.rt_genmask; + rt->rt_flags = tmp.rt_flags; + rt->rt_dev = tmp.rt_dev; + printk("Warning: obsolete routing request made.\n"); + return 0; +} + +#ifndef _HURD_ +/* + * Handle IP routing ioctl calls. These are used to manipulate the routing tables + */ + +int ip_rt_ioctl(unsigned int cmd, void *arg) +{ + int err; + struct rtentry rt; + + switch(cmd) + { + case SIOCADDRTOLD: /* Old style add route */ + case SIOCDELRTOLD: /* Old style delete route */ + if (!suser()) + return -EPERM; + err = ip_get_old_rtent((struct old_rtentry *) arg, &rt); + if (err) + return err; + return (cmd == SIOCDELRTOLD) ? rt_kill(&rt) : rt_new(&rt); + + case SIOCADDRT: /* Add a route */ + case SIOCDELRT: /* Delete a route */ + if (!suser()) + return -EPERM; + err=verify_area(VERIFY_READ, arg, sizeof(struct rtentry)); + if (err) + return err; + memcpy_fromfs(&rt, arg, sizeof(struct rtentry)); + return (cmd == SIOCDELRT) ? rt_kill(&rt) : rt_new(&rt); + } + + return -EINVAL; +} +#endif diff --git a/pfinet/linux-inet/route.h b/pfinet/linux-inet/route.h new file mode 100644 index 00000000..e43efa4e --- /dev/null +++ b/pfinet/linux-inet/route.h @@ -0,0 +1,54 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Definitions for the IP router. + * + * Version: @(#)route.h 1.0.4 05/27/93 + * + * Authors: Ross Biro, <bir7@leland.Stanford.Edu> + * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> + * Fixes: + * Alan Cox : Reformatted. Added ip_rt_local() + * Alan Cox : Support for TCP parameters. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _ROUTE_H +#define _ROUTE_H + + +#include <linux/route.h> + + +/* This is an entry in the IP routing table. */ +struct rtable +{ + struct rtable *rt_next; + unsigned long rt_dst; + unsigned long rt_mask; + unsigned long rt_gateway; + unsigned char rt_flags; + unsigned char rt_metric; + short rt_refcnt; + unsigned long rt_use; + unsigned short rt_mss; + unsigned long rt_window; + struct device *rt_dev; +}; + + +extern void ip_rt_flush(struct device *dev); +extern void ip_rt_add(short flags, unsigned long addr, unsigned long mask, + unsigned long gw, struct device *dev, unsigned short mss, unsigned long window); +extern void ip_rt_del(unsigned long dst, struct device *dev); +extern struct rtable *ip_rt_route(unsigned long daddr, struct options *opt, unsigned long *src_addr); +extern struct rtable *ip_rt_local(unsigned long daddr, struct options *opt, unsigned long *src_addr); +extern int rt_get_info(char * buffer, char **start, off_t offset, int length); +extern int ip_rt_ioctl(unsigned int cmd, void *arg); + +#endif /* _ROUTE_H */ diff --git a/pfinet/linux-inet/skbuff.c b/pfinet/linux-inet/skbuff.c new file mode 100644 index 00000000..e4e1d247 --- /dev/null +++ b/pfinet/linux-inet/skbuff.c @@ -0,0 +1,573 @@ +/* + * Routines having to do with the 'struct sk_buff' memory handlers. + * + * Authors: Alan Cox <iiitac@pyr.swan.ac.uk> + * Florian La Roche <rzsfl@rz.uni-sb.de> + * + * Fixes: + * Alan Cox : Fixed the worst of the load balancer bugs. + * Dave Platt : Interrupt stacking fix + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +/* + * Note: There are a load of cli()/sti() pairs protecting the net_memory type + * variables. Without them for some reason the ++/-- operators do not come out + * atomic. Also with gcc 2.4.5 these counts can come out wrong anyway - use 2.5.8!! + */ + +#include <linux/config.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <asm/segment.h> +#include <asm/system.h> +#include <linux/mm.h> +#include <linux/interrupt.h> +#include <linux/in.h> +#include <linux/inet.h> +#include <linux/netdevice.h> +#include "ip.h" +#include "protocol.h" +#include <linux/string.h> +#include "route.h" +#include "tcp.h" +#include "udp.h" +#include <linux/skbuff.h> +#include "sock.h" + + +/* + * Resource tracking variables + */ + +volatile unsigned long net_memory = 0; +volatile unsigned long net_skbcount = 0; +volatile unsigned long net_locked = 0; +volatile unsigned long net_allocs = 0; +volatile unsigned long net_fails = 0; +volatile unsigned long net_free_locked = 0; + +void show_net_buffers(void) +{ + printk("Networking buffers in use : %lu\n",net_skbcount); + printk("Memory committed to network buffers: %lu\n",net_memory); + printk("Network buffers locked by drivers : %lu\n",net_locked); + printk("Total network buffer allocations : %lu\n",net_allocs); + printk("Total failed network buffer allocs : %lu\n",net_fails); + printk("Total free while locked events : %lu\n",net_free_locked); +} + +#if CONFIG_SKB_CHECK + +/* + * Debugging paranoia. Can go later when this crud stack works + */ + +int skb_check(struct sk_buff *skb, int head, int line, char *file) +{ + if (head) { + if (skb->magic_debug_cookie != SK_HEAD_SKB) { + printk("File: %s Line %d, found a bad skb-head\n", + file,line); + return -1; + } + if (!skb->next || !skb->prev) { + printk("skb_check: head without next or prev\n"); + return -1; + } + if (skb->next->magic_debug_cookie != SK_HEAD_SKB + && skb->next->magic_debug_cookie != SK_GOOD_SKB) { + printk("File: %s Line %d, bad next head-skb member\n", + file,line); + return -1; + } + if (skb->prev->magic_debug_cookie != SK_HEAD_SKB + && skb->prev->magic_debug_cookie != SK_GOOD_SKB) { + printk("File: %s Line %d, bad prev head-skb member\n", + file,line); + return -1; + } +#if 0 + { + struct sk_buff *skb2 = skb->next; + int i = 0; + while (skb2 != skb && i < 5) { + if (skb_check(skb2, 0, line, file) < 0) { + printk("bad queue element in whole queue\n"); + return -1; + } + i++; + skb2 = skb2->next; + } + } +#endif + return 0; + } + if (skb->next != NULL && skb->next->magic_debug_cookie != SK_HEAD_SKB + && skb->next->magic_debug_cookie != SK_GOOD_SKB) { + printk("File: %s Line %d, bad next skb member\n", + file,line); + return -1; + } + if (skb->prev != NULL && skb->prev->magic_debug_cookie != SK_HEAD_SKB + && skb->prev->magic_debug_cookie != SK_GOOD_SKB) { + printk("File: %s Line %d, bad prev skb member\n", + file,line); + return -1; + } + + + if(skb->magic_debug_cookie==SK_FREED_SKB) + { + printk("File: %s Line %d, found a freed skb lurking in the undergrowth!\n", + file,line); + printk("skb=%p, real size=%ld, claimed size=%ld, free=%d\n", + skb,skb->truesize,skb->mem_len,skb->free); + return -1; + } + if(skb->magic_debug_cookie!=SK_GOOD_SKB) + { + printk("File: %s Line %d, passed a non skb!\n", file,line); + printk("skb=%p, real size=%ld, claimed size=%ld, free=%d\n", + skb,skb->truesize,skb->mem_len,skb->free); + return -1; + } + if(skb->mem_len!=skb->truesize) + { + printk("File: %s Line %d, Dubious size setting!\n",file,line); + printk("skb=%p, real size=%ld, claimed size=%ld\n", + skb,skb->truesize,skb->mem_len); + return -1; + } + /* Guess it might be acceptable then */ + return 0; +} +#endif + + +#ifdef CONFIG_SKB_CHECK +void skb_queue_head_init(struct sk_buff_head *list) +{ + list->prev = (struct sk_buff *)list; + list->next = (struct sk_buff *)list; + list->magic_debug_cookie = SK_HEAD_SKB; +} + + +/* + * Insert an sk_buff at the start of a list. + */ +void skb_queue_head(struct sk_buff_head *list_,struct sk_buff *newsk) +{ + unsigned long flags; + struct sk_buff *list = (struct sk_buff *)list_; + + save_flags(flags); + cli(); + + IS_SKB(newsk); + IS_SKB_HEAD(list); + if (newsk->next || newsk->prev) + printk("Suspicious queue head: sk_buff on list!\n"); + + newsk->next = list->next; + newsk->prev = list; + + newsk->next->prev = newsk; + newsk->prev->next = newsk; + + restore_flags(flags); +} + +/* + * Insert an sk_buff at the end of a list. + */ +void skb_queue_tail(struct sk_buff_head *list_, struct sk_buff *newsk) +{ + unsigned long flags; + struct sk_buff *list = (struct sk_buff *)list_; + + save_flags(flags); + cli(); + + if (newsk->next || newsk->prev) + printk("Suspicious queue tail: sk_buff on list!\n"); + IS_SKB(newsk); + IS_SKB_HEAD(list); + + newsk->next = list; + newsk->prev = list->prev; + + newsk->next->prev = newsk; + newsk->prev->next = newsk; + + restore_flags(flags); +} + +/* + * Remove an sk_buff from a list. This routine is also interrupt safe + * so you can grab read and free buffers as another process adds them. + */ + +struct sk_buff *skb_dequeue(struct sk_buff_head *list_) +{ + long flags; + struct sk_buff *result; + struct sk_buff *list = (struct sk_buff *)list_; + + save_flags(flags); + cli(); + + IS_SKB_HEAD(list); + + result = list->next; + if (result == list) { + restore_flags(flags); + return NULL; + } + + result->next->prev = list; + list->next = result->next; + + result->next = NULL; + result->prev = NULL; + + restore_flags(flags); + + IS_SKB(result); + return result; +} + +/* + * Insert a packet before another one in a list. + */ +void skb_insert(struct sk_buff *old, struct sk_buff *newsk) +{ + unsigned long flags; + + IS_SKB(old); + IS_SKB(newsk); + + if(!old->next || !old->prev) + printk("insert before unlisted item!\n"); + if(newsk->next || newsk->prev) + printk("inserted item is already on a list.\n"); + + save_flags(flags); + cli(); + newsk->next = old; + newsk->prev = old->prev; + old->prev = newsk; + newsk->prev->next = newsk; + + restore_flags(flags); +} + +/* + * Place a packet after a given packet in a list. + */ +void skb_append(struct sk_buff *old, struct sk_buff *newsk) +{ + unsigned long flags; + + IS_SKB(old); + IS_SKB(newsk); + + if(!old->next || !old->prev) + printk("append before unlisted item!\n"); + if(newsk->next || newsk->prev) + printk("append item is already on a list.\n"); + + save_flags(flags); + cli(); + + newsk->prev = old; + newsk->next = old->next; + newsk->next->prev = newsk; + old->next = newsk; + + restore_flags(flags); +} + +/* + * Remove an sk_buff from its list. Works even without knowing the list it + * is sitting on, which can be handy at times. It also means that THE LIST + * MUST EXIST when you unlink. Thus a list must have its contents unlinked + * _FIRST_. + */ +void skb_unlink(struct sk_buff *skb) +{ + unsigned long flags; + + save_flags(flags); + cli(); + + IS_SKB(skb); + + if(skb->prev && skb->next) + { + skb->next->prev = skb->prev; + skb->prev->next = skb->next; + skb->next = NULL; + skb->prev = NULL; + } +#ifdef PARANOID_BUGHUNT_MODE /* This is legal but we sometimes want to watch it */ + else + printk("skb_unlink: not a linked element\n"); +#endif + restore_flags(flags); +} + +#endif + +/* + * Free an sk_buff. This still knows about things it should + * not need to like protocols and sockets. + */ + +void kfree_skb(struct sk_buff *skb, int rw) +{ + if (skb == NULL) + { + printk("kfree_skb: skb = NULL (from %p)\n", + __builtin_return_address(0)); + return; + } +#ifdef CONFIG_SKB_CHECK + IS_SKB(skb); +#endif + if (skb->lock) + { + skb->free = 3; /* Free when unlocked */ + net_free_locked++; + return; + } + if (skb->free == 2) + printk("Warning: kfree_skb passed an skb that nobody set the free flag on! (from %p)\n", + __builtin_return_address(0)); + if (skb->next) + printk("Warning: kfree_skb passed an skb still on a list (from %p).\n", + __builtin_return_address(0)); + if (skb->sk) + { + if(skb->sk->prot!=NULL) + { + if (rw) + skb->sk->prot->rfree(skb->sk, skb, skb->mem_len); + else + skb->sk->prot->wfree(skb->sk, skb, skb->mem_len); + + } + else + { + unsigned long flags; + /* Non INET - default wmalloc/rmalloc handler */ + save_flags(flags); + cli(); + if (rw) + skb->sk->rmem_alloc-=skb->mem_len; + else + skb->sk->wmem_alloc-=skb->mem_len; + restore_flags(flags); + if(!skb->sk->dead) + skb->sk->write_space(skb->sk); + kfree_skbmem(skb,skb->mem_len); + } + } + else + kfree_skbmem(skb, skb->mem_len); +} + +/* + * Allocate a new skbuff. We do this ourselves so we can fill in a few 'private' + * fields and also do memory statistics to find all the [BEEP] leaks. + */ +struct sk_buff *alloc_skb(unsigned int size,int priority) +{ + struct sk_buff *skb; + unsigned long flags; + + if (intr_count && priority!=GFP_ATOMIC) { + static int count = 0; + if (++count < 5) { + printk("alloc_skb called nonatomically from interrupt %p\n", + __builtin_return_address(0)); + priority = GFP_ATOMIC; + } + } + + size+=sizeof(struct sk_buff); + skb=(struct sk_buff *)kmalloc(size,priority); + if (skb == NULL) + { + net_fails++; + return NULL; + } +#ifdef PARANOID_BUGHUNT_MODE + if(skb->magic_debug_cookie == SK_GOOD_SKB) + printk("Kernel kmalloc handed us an existing skb (%p)\n",skb); +#endif + + net_allocs++; + + skb->free = 2; /* Invalid so we pick up forgetful users */ + skb->lock = 0; + skb->pkt_type = PACKET_HOST; /* Default type */ + skb->truesize = size; + skb->mem_len = size; + skb->mem_addr = skb; +#ifdef CONFIG_SLAVE_BALANCING + skb->in_dev_queue = 0; +#endif + skb->fraglist = NULL; + skb->prev = skb->next = NULL; + skb->link3 = NULL; + skb->sk = NULL; + skb->localroute=0; + skb->stamp.tv_sec=0; /* No idea about time */ + skb->localroute = 0; + save_flags(flags); + cli(); + net_memory += size; + net_skbcount++; + restore_flags(flags); +#if CONFIG_SKB_CHECK + skb->magic_debug_cookie = SK_GOOD_SKB; +#endif + skb->users = 0; + return skb; +} + +/* + * Free an skbuff by memory + */ + +void kfree_skbmem(struct sk_buff *skb,unsigned size) +{ + unsigned long flags; +#ifdef CONFIG_SLAVE_BALANCING + save_flags(flags); + cli(); + if(skb->in_dev_queue && skb->dev!=NULL) + skb->dev->pkt_queue--; + restore_flags(flags); +#endif +#ifdef CONFIG_SKB_CHECK + IS_SKB(skb); + if(size!=skb->truesize) + printk("kfree_skbmem: size mismatch.\n"); + + if(skb->magic_debug_cookie == SK_GOOD_SKB) + { + save_flags(flags); + cli(); + IS_SKB(skb); + skb->magic_debug_cookie = SK_FREED_SKB; + kfree_s((void *)skb,size); + net_skbcount--; + net_memory -= size; + restore_flags(flags); + } + else + printk("kfree_skbmem: bad magic cookie\n"); +#else + save_flags(flags); + cli(); + kfree_s((void *)skb,size); + net_skbcount--; + net_memory -= size; + restore_flags(flags); +#endif +} + +/* + * Duplicate an sk_buff. The new one is not owned by a socket or locked + * and will be freed on deletion. + */ + +struct sk_buff *skb_clone(struct sk_buff *skb, int priority) +{ + struct sk_buff *n; + unsigned long offset; + + n=alloc_skb(skb->mem_len-sizeof(struct sk_buff),priority); + if(n==NULL) + return NULL; + + offset=((char *)n)-((char *)skb); + + memcpy(n->data,skb->data,skb->mem_len-sizeof(struct sk_buff)); + n->len=skb->len; + n->link3=NULL; + n->sk=NULL; + n->when=skb->when; + n->dev=skb->dev; + n->h.raw=skb->h.raw+offset; + n->ip_hdr=(struct iphdr *)(((char *)skb->ip_hdr)+offset); + n->fraglen=skb->fraglen; + n->fraglist=skb->fraglist; + n->saddr=skb->saddr; + n->daddr=skb->daddr; + n->raddr=skb->raddr; + n->acked=skb->acked; + n->used=skb->used; + n->free=1; + n->arp=skb->arp; + n->tries=0; + n->lock=0; + n->users=0; + n->pkt_type=skb->pkt_type; + return n; +} + + +/* + * Skbuff device locking + */ + +void skb_device_lock(struct sk_buff *skb) +{ + if(skb->lock) + printk("double lock on device queue!\n"); + else + net_locked++; + skb->lock++; +} + +void skb_device_unlock(struct sk_buff *skb) +{ + if(skb->lock==0) + printk("double unlock on device queue!\n"); + skb->lock--; + if(skb->lock==0) + net_locked--; +} + +void dev_kfree_skb(struct sk_buff *skb, int mode) +{ + unsigned long flags; + + save_flags(flags); + cli(); + if(skb->lock==1) + net_locked--; + + if (!--skb->lock && (skb->free == 1 || skb->free == 3)) + { + restore_flags(flags); + kfree_skb(skb,mode); + } + else + restore_flags(flags); +} + +int skb_device_locked(struct sk_buff *skb) +{ + return skb->lock? 1 : 0; +} + diff --git a/pfinet/linux-inet/snmp.h b/pfinet/linux-inet/snmp.h new file mode 100644 index 00000000..552292be --- /dev/null +++ b/pfinet/linux-inet/snmp.h @@ -0,0 +1,107 @@ +/* + * + * SNMP MIB entries for the IP subsystem. + * + * Alan Cox <gw4pts@gw4pts.ampr.org> + * + * We don't chose to implement SNMP in the kernel (this would + * be silly as SNMP is a pain in the backside in places). We do + * however need to collect the MIB statistics and export them + * out of /proc (eventually) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#ifndef _SNMP_H +#define _SNMP_H + +/* + * We use all unsigned longs. Linux will soon be so reliable that even these + * will rapidly get too small 8-). Seriously consider the IpInReceives count + * on the 20Gb/s + networks people expect in a few years time! + */ + +struct ip_mib +{ + unsigned long IpForwarding; + unsigned long IpDefaultTTL; + unsigned long IpInReceives; + unsigned long IpInHdrErrors; + unsigned long IpInAddrErrors; + unsigned long IpForwDatagrams; + unsigned long IpInUnknownProtos; + unsigned long IpInDiscards; + unsigned long IpInDelivers; + unsigned long IpOutRequests; + unsigned long IpOutDiscards; + unsigned long IpOutNoRoutes; + unsigned long IpReasmTimeout; + unsigned long IpReasmReqds; + unsigned long IpReasmOKs; + unsigned long IpReasmFails; + unsigned long IpFragOKs; + unsigned long IpFragFails; + unsigned long IpFragCreates; +}; + + +struct icmp_mib +{ + unsigned long IcmpInMsgs; + unsigned long IcmpInErrors; + unsigned long IcmpInDestUnreachs; + unsigned long IcmpInTimeExcds; + unsigned long IcmpInParmProbs; + unsigned long IcmpInSrcQuenchs; + unsigned long IcmpInRedirects; + unsigned long IcmpInEchos; + unsigned long IcmpInEchoReps; + unsigned long IcmpInTimestamps; + unsigned long IcmpInTimestampReps; + unsigned long IcmpInAddrMasks; + unsigned long IcmpInAddrMaskReps; + unsigned long IcmpOutMsgs; + unsigned long IcmpOutErrors; + unsigned long IcmpOutDestUnreachs; + unsigned long IcmpOutTimeExcds; + unsigned long IcmpOutParmProbs; + unsigned long IcmpOutSrcQuenchs; + unsigned long IcmpOutRedirects; + unsigned long IcmpOutEchos; + unsigned long IcmpOutEchoReps; + unsigned long IcmpOutTimestamps; + unsigned long IcmpOutTimestampReps; + unsigned long IcmpOutAddrMasks; + unsigned long IcmpOutAddrMaskReps; +}; + +struct tcp_mib +{ + unsigned long TcpRtoAlgorithm; + unsigned long TcpRtoMin; + unsigned long TcpRtoMax; + unsigned long TcpMaxConn; + unsigned long TcpActiveOpens; + unsigned long TcpPassiveOpens; + unsigned long TcpAttemptFails; + unsigned long TcpEstabResets; + unsigned long TcpCurrEstab; + unsigned long TcpInSegs; + unsigned long TcpOutSegs; + unsigned long TcpRetransSegs; +}; + +struct udp_mib +{ + unsigned long UdpInDatagrams; + unsigned long UdpNoPorts; + unsigned long UdpInErrors; + unsigned long UdpOutDatagrams; +}; + + +#endif diff --git a/pfinet/linux-inet/sock.c b/pfinet/linux-inet/sock.c new file mode 100644 index 00000000..40d4a8f4 --- /dev/null +++ b/pfinet/linux-inet/sock.c @@ -0,0 +1,574 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Generic socket support routines. Memory allocators, sk->inuse/release + * handler for protocols to use and generic option handler. + * + * + * Version: @(#)sock.c 1.0.17 06/02/93 + * + * Authors: Ross Biro, <bir7@leland.Stanford.Edu> + * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> + * Florian La Roche, <flla@stud.uni-sb.de> + * Alan Cox, <A.Cox@swansea.ac.uk> + * + * Fixes: + * Alan Cox : Numerous verify_area() problems + * Alan Cox : Connecting on a connecting socket + * now returns an error for tcp. + * Alan Cox : sock->protocol is set correctly. + * and is not sometimes left as 0. + * Alan Cox : connect handles icmp errors on a + * connect properly. Unfortunately there + * is a restart syscall nasty there. I + * can't match BSD without hacking the C + * library. Ideas urgently sought! + * Alan Cox : Disallow bind() to addresses that are + * not ours - especially broadcast ones!! + * Alan Cox : Socket 1024 _IS_ ok for users. (fencepost) + * Alan Cox : sock_wfree/sock_rfree don't destroy sockets, + * instead they leave that for the DESTROY timer. + * Alan Cox : Clean up error flag in accept + * Alan Cox : TCP ack handling is buggy, the DESTROY timer + * was buggy. Put a remove_sock() in the handler + * for memory when we hit 0. Also altered the timer + * code. The ACK stuff can wait and needs major + * TCP layer surgery. + * Alan Cox : Fixed TCP ack bug, removed remove sock + * and fixed timer/inet_bh race. + * Alan Cox : Added zapped flag for TCP + * Alan Cox : Move kfree_skb into skbuff.c and tidied up surplus code + * Alan Cox : for new sk_buff allocations wmalloc/rmalloc now call alloc_skb + * Alan Cox : kfree_s calls now are kfree_skbmem so we can track skb resources + * Alan Cox : Supports socket option broadcast now as does udp. Packet and raw need fixing. + * Alan Cox : Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so... + * Rick Sladkey : Relaxed UDP rules for matching packets. + * C.E.Hawkins : IFF_PROMISC/SIOCGHWADDR support + * Pauline Middelink : identd support + * Alan Cox : Fixed connect() taking signals I think. + * Alan Cox : SO_LINGER supported + * Alan Cox : Error reporting fixes + * Anonymous : inet_create tidied up (sk->reuse setting) + * Alan Cox : inet sockets don't set sk->type! + * Alan Cox : Split socket option code + * Alan Cox : Callbacks + * Alan Cox : Nagle flag for Charles & Johannes stuff + * Alex : Removed restriction on inet fioctl + * Alan Cox : Splitting INET from NET core + * Alan Cox : Fixed bogus SO_TYPE handling in getsockopt() + * Adam Caldwell : Missing return in SO_DONTROUTE/SO_DEBUG code + * Alan Cox : Split IP from generic code + * Alan Cox : New kfree_skbmem() + * Alan Cox : Make SO_DEBUG superuser only. + * Alan Cox : Allow anyone to clear SO_DEBUG + * (compatibility fix) + * + * To Fix: + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/config.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/in.h> +#include <linux/kernel.h> +#include <linux/major.h> +#include <linux/sched.h> +#include <linux/timer.h> +#include <linux/string.h> +#include <linux/sockios.h> +#include <linux/net.h> +#include <linux/fcntl.h> +#include <linux/mm.h> +#include <linux/interrupt.h> + +#include <asm/segment.h> +#include <asm/system.h> + +#include <linux/inet.h> +#include <linux/netdevice.h> +#include "ip.h" +#include "protocol.h" +#include "arp.h" +#include "rarp.h" +#include "route.h" +#include "tcp.h" +#include "udp.h" +#include <linux/skbuff.h> +#include "sock.h" +#include "raw.h" +#include "icmp.h" + +#define min(a,b) ((a)<(b)?(a):(b)) + +/* + * This is meant for all protocols to use and covers goings on + * at the socket level. Everything here is generic. + */ + +int sock_setsockopt(struct sock *sk, int level, int optname, + char *optval, int optlen) +{ + int val; + int err; + struct linger ling; + + if (optval == NULL) + return(-EINVAL); + + err=verify_area(VERIFY_READ, optval, sizeof(int)); + if(err) + return err; + + val = get_fs_long((unsigned long *)optval); + switch(optname) + { + case SO_TYPE: + case SO_ERROR: + return(-ENOPROTOOPT); + + case SO_DEBUG: + if(val && !suser()) + return(-EPERM); + sk->debug=val?1:0; + return 0; + case SO_DONTROUTE: + sk->localroute=val?1:0; + return 0; + case SO_BROADCAST: + sk->broadcast=val?1:0; + return 0; + case SO_SNDBUF: + if(val>32767) + val=32767; + if(val<256) + val=256; + sk->sndbuf=val; + return 0; + case SO_LINGER: + err=verify_area(VERIFY_READ,optval,sizeof(ling)); + if(err) + return err; + memcpy_fromfs(&ling,optval,sizeof(ling)); + if(ling.l_onoff==0) + sk->linger=0; + else + { + sk->lingertime=ling.l_linger; + sk->linger=1; + } + return 0; + case SO_RCVBUF: + if(val>32767) + val=32767; + if(val<256) + val=256; + sk->rcvbuf=val; + return(0); + + case SO_REUSEADDR: + if (val) + sk->reuse = 1; + else + sk->reuse = 0; + return(0); + + case SO_KEEPALIVE: + if (val) + sk->keepopen = 1; + else + sk->keepopen = 0; + return(0); + + case SO_OOBINLINE: + if (val) + sk->urginline = 1; + else + sk->urginline = 0; + return(0); + + case SO_NO_CHECK: + if (val) + sk->no_check = 1; + else + sk->no_check = 0; + return(0); + + case SO_PRIORITY: + if (val >= 0 && val < DEV_NUMBUFFS) + { + sk->priority = val; + } + else + { + return(-EINVAL); + } + return(0); + + default: + return(-ENOPROTOOPT); + } +} + + +int sock_getsockopt(struct sock *sk, int level, int optname, + char *optval, int *optlen) +{ + int val; + int err; + struct linger ling; + + switch(optname) + { + case SO_DEBUG: + val = sk->debug; + break; + + case SO_DONTROUTE: + val = sk->localroute; + break; + + case SO_BROADCAST: + val= sk->broadcast; + break; + + case SO_LINGER: + err=verify_area(VERIFY_WRITE,optval,sizeof(ling)); + if(err) + return err; + err=verify_area(VERIFY_WRITE,optlen,sizeof(int)); + if(err) + return err; + put_fs_long(sizeof(ling),(unsigned long *)optlen); + ling.l_onoff=sk->linger; + ling.l_linger=sk->lingertime; + memcpy_tofs(optval,&ling,sizeof(ling)); + return 0; + + case SO_SNDBUF: + val=sk->sndbuf; + break; + + case SO_RCVBUF: + val =sk->rcvbuf; + break; + + case SO_REUSEADDR: + val = sk->reuse; + break; + + case SO_KEEPALIVE: + val = sk->keepopen; + break; + + case SO_TYPE: +#if 0 + if (sk->prot == &tcp_prot) + val = SOCK_STREAM; + else + val = SOCK_DGRAM; +#endif + val = sk->type; + break; + + case SO_ERROR: + val = sk->err; + sk->err = 0; + break; + + case SO_OOBINLINE: + val = sk->urginline; + break; + + case SO_NO_CHECK: + val = sk->no_check; + break; + + case SO_PRIORITY: + val = sk->priority; + break; + + default: + return(-ENOPROTOOPT); + } + err=verify_area(VERIFY_WRITE, optlen, sizeof(int)); + if(err) + return err; + put_fs_long(sizeof(int),(unsigned long *) optlen); + + err=verify_area(VERIFY_WRITE, optval, sizeof(int)); + if(err) + return err; + put_fs_long(val,(unsigned long *)optval); + + return(0); +} + + +struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, int priority) +{ + if (sk) + { + if (sk->wmem_alloc + size < sk->sndbuf || force) + { + struct sk_buff * c = alloc_skb(size, priority); + if (c) + { + unsigned long flags; + save_flags(flags); + cli(); + sk->wmem_alloc+= c->mem_len; + restore_flags(flags); /* was sti(); */ + } + return c; + } + return(NULL); + } + return(alloc_skb(size, priority)); +} + + +struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, int priority) +{ + if (sk) + { + if (sk->rmem_alloc + size < sk->rcvbuf || force) + { + struct sk_buff *c = alloc_skb(size, priority); + if (c) + { + unsigned long flags; + save_flags(flags); + cli(); + sk->rmem_alloc += c->mem_len; + restore_flags(flags); /* was sti(); */ + } + return(c); + } + return(NULL); + } + return(alloc_skb(size, priority)); +} + + +unsigned long sock_rspace(struct sock *sk) +{ + int amt; + + if (sk != NULL) + { + if (sk->rmem_alloc >= sk->rcvbuf-2*MIN_WINDOW) + return(0); + amt = min((sk->rcvbuf-sk->rmem_alloc)/2-MIN_WINDOW, MAX_WINDOW); + if (amt < 0) + return(0); + return(amt); + } + return(0); +} + + +unsigned long sock_wspace(struct sock *sk) +{ + if (sk != NULL) + { + if (sk->shutdown & SEND_SHUTDOWN) + return(0); + if (sk->wmem_alloc >= sk->sndbuf) + return(0); + return(sk->sndbuf-sk->wmem_alloc ); + } + return(0); +} + + +void sock_wfree(struct sock *sk, struct sk_buff *skb, unsigned long size) +{ +#ifdef CONFIG_SKB_CHECK + IS_SKB(skb); +#endif + kfree_skbmem(skb, size); + if (sk) + { + unsigned long flags; + save_flags(flags); + cli(); + sk->wmem_alloc -= size; + restore_flags(flags); + /* In case it might be waiting for more memory. */ + if (!sk->dead) + sk->write_space(sk); + return; + } +} + + +void sock_rfree(struct sock *sk, struct sk_buff *skb, unsigned long size) +{ +#ifdef CONFIG_SKB_CHECK + IS_SKB(skb); +#endif + kfree_skbmem(skb, size); + if (sk) + { + unsigned long flags; + save_flags(flags); + cli(); + sk->rmem_alloc -= size; + restore_flags(flags); + } +} + +/* + * Generic send/receive buffer handlers + */ + +struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, int noblock, int *errcode) +{ + struct sk_buff *skb; + int err; + + sk->inuse=1; + + do + { + if(sk->err!=0) + { + cli(); + err= -sk->err; + sk->err=0; + sti(); + *errcode=err; + return NULL; + } + + if(sk->shutdown&SEND_SHUTDOWN) + { + *errcode=-EPIPE; + return NULL; + } + + skb = sock_wmalloc(sk, size, 0, GFP_KERNEL); + + if(skb==NULL) + { + unsigned long tmp; + + sk->socket->flags |= SO_NOSPACE; + if(noblock) + { + *errcode=-EAGAIN; + return NULL; + } + if(sk->shutdown&SEND_SHUTDOWN) + { + *errcode=-EPIPE; + return NULL; + } + tmp = sk->wmem_alloc; + cli(); + if(sk->shutdown&SEND_SHUTDOWN) + { + sti(); + *errcode=-EPIPE; + return NULL; + } + + if( tmp <= sk->wmem_alloc) + { + sk->socket->flags &= ~SO_NOSPACE; + interruptible_sleep_on(sk->sleep); + if (current->signal & ~current->blocked) + { + sti(); + *errcode = -ERESTARTSYS; + return NULL; + } + } + sti(); + } + } + while(skb==NULL); + + return skb; +} + +/* + * Queue a received datagram if it will fit. Stream and sequenced protocols + * can't normally use this as they need to fit buffers in and play with them. + */ + +int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +{ + unsigned long flags; + if(sk->rmem_alloc + skb->mem_len >= sk->rcvbuf) + return -ENOMEM; + save_flags(flags); + cli(); + sk->rmem_alloc+=skb->mem_len; + skb->sk=sk; + restore_flags(flags); + skb_queue_tail(&sk->receive_queue,skb); + if(!sk->dead) + sk->data_ready(sk,skb->len); + return 0; +} + +void release_sock(struct sock *sk) +{ + unsigned long flags; +#ifdef CONFIG_INET + struct sk_buff *skb; +#endif + + if (!sk->prot) + return; + /* + * Make the backlog atomic. If we don't do this there is a tiny + * window where a packet may arrive between the sk->blog being + * tested and then set with sk->inuse still 0 causing an extra + * unwanted re-entry into release_sock(). + */ + + save_flags(flags); + cli(); + if (sk->blog) + { + restore_flags(flags); + return; + } + sk->blog=1; + sk->inuse = 1; + restore_flags(flags); +#ifdef CONFIG_INET + /* See if we have any packets built up. */ + while((skb = skb_dequeue(&sk->back_log)) != NULL) + { + sk->blog = 1; + if (sk->prot->rcv) + sk->prot->rcv(skb, skb->dev, sk->opt, + skb->saddr, skb->len, skb->daddr, 1, + /* Only used for/by raw sockets. */ + (struct inet_protocol *)sk->pair); + } +#endif + sk->blog = 0; + sk->inuse = 0; +#ifdef CONFIG_INET + if (sk->dead && sk->state == TCP_CLOSE) + { + /* Should be about 2 rtt's */ + reset_timer(sk, TIME_DONE, min(sk->rtt * 2, TCP_DONE_TIME)); + } +#endif +} + + diff --git a/pfinet/linux-inet/sock.h b/pfinet/linux-inet/sock.h new file mode 100644 index 00000000..2005745e --- /dev/null +++ b/pfinet/linux-inet/sock.h @@ -0,0 +1,316 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Definitions for the AF_INET socket handler. + * + * Version: @(#)sock.h 1.0.4 05/13/93 + * + * Authors: Ross Biro, <bir7@leland.Stanford.Edu> + * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> + * Corey Minyard <wf-rch!minyard@relay.EU.net> + * Florian La Roche <flla@stud.uni-sb.de> + * + * Fixes: + * Alan Cox : Volatiles in skbuff pointers. See + * skbuff comments. May be overdone, + * better to prove they can be removed + * than the reverse. + * Alan Cox : Added a zapped field for tcp to note + * a socket is reset and must stay shut up + * Alan Cox : New fields for options + * Pauline Middelink : identd support + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _SOCK_H +#define _SOCK_H + +#include <linux/timer.h> +#include <linux/ip.h> /* struct options */ +#include <linux/tcp.h> /* struct tcphdr */ +#include <linux/config.h> + +#include <linux/skbuff.h> /* struct sk_buff */ +#include "protocol.h" /* struct inet_protocol */ +#ifdef CONFIG_AX25 +#include "ax25.h" +#endif +#ifdef CONFIG_IPX +#include "ipx.h" +#endif +#ifdef CONFIG_ATALK +#include <linux/atalk.h> +#endif + +#include <linux/igmp.h> + +#define SOCK_ARRAY_SIZE 256 /* Think big (also on some systems a byte is faster */ + + +/* + * This structure really needs to be cleaned up. + * Most of it is for TCP, and not used by any of + * the other protocols. + */ +struct sock { + struct options *opt; + volatile unsigned long wmem_alloc; + volatile unsigned long rmem_alloc; + unsigned long write_seq; + unsigned long sent_seq; + unsigned long acked_seq; + unsigned long copied_seq; + unsigned long rcv_ack_seq; + unsigned long window_seq; + unsigned long fin_seq; + unsigned long urg_seq; + unsigned long urg_data; + + /* + * Not all are volatile, but some are, so we + * might as well say they all are. + */ + volatile char inuse, + dead, + urginline, + intr, + blog, + done, + reuse, + keepopen, + linger, + delay_acks, + destroy, + ack_timed, + no_check, + zapped, /* In ax25 & ipx means not linked */ + broadcast, + nonagle; + unsigned long lingertime; + int proc; + struct sock *next; + struct sock *prev; /* Doubly linked chain.. */ + struct sock *pair; + struct sk_buff * volatile send_head; + struct sk_buff * volatile send_tail; + struct sk_buff_head back_log; + struct sk_buff *partial; + struct timer_list partial_timer; + long retransmits; + struct sk_buff_head write_queue, + receive_queue; + struct proto *prot; + struct wait_queue **sleep; + unsigned long daddr; + unsigned long saddr; + unsigned short max_unacked; + unsigned short window; + unsigned short bytes_rcv; +/* mss is min(mtu, max_window) */ + unsigned short mtu; /* mss negotiated in the syn's */ + volatile unsigned short mss; /* current eff. mss - can change */ + volatile unsigned short user_mss; /* mss requested by user in ioctl */ + volatile unsigned short max_window; + unsigned long window_clamp; + unsigned short num; + volatile unsigned short cong_window; + volatile unsigned short cong_count; + volatile unsigned short ssthresh; + volatile unsigned short packets_out; + volatile unsigned short shutdown; + volatile unsigned long rtt; + volatile unsigned long mdev; + volatile unsigned long rto; +/* currently backoff isn't used, but I'm maintaining it in case + * we want to go back to a backoff formula that needs it + */ + volatile unsigned short backoff; + volatile error_t err; /* Note change XXX HURD */ + unsigned char protocol; + volatile unsigned char state; + volatile unsigned char ack_backlog; + unsigned char max_ack_backlog; + unsigned char priority; + unsigned char debug; + unsigned short rcvbuf; + unsigned short sndbuf; + unsigned short type; + unsigned char localroute; /* Route locally only */ +#ifdef CONFIG_IPX + ipx_address ipx_dest_addr; + ipx_interface *ipx_intrfc; + unsigned short ipx_port; + unsigned short ipx_type; +#endif +#ifdef CONFIG_AX25 +/* Really we want to add a per protocol private area */ + ax25_address ax25_source_addr,ax25_dest_addr; + struct sk_buff *volatile ax25_retxq[8]; + char ax25_state,ax25_vs,ax25_vr,ax25_lastrxnr,ax25_lasttxnr; + char ax25_condition; + char ax25_retxcnt; + char ax25_xx; + char ax25_retxqi; + char ax25_rrtimer; + char ax25_timer; + unsigned char ax25_n2; + unsigned short ax25_t1,ax25_t2,ax25_t3; + ax25_digi *ax25_digipeat; +#endif +#ifdef CONFIG_ATALK + struct atalk_sock at; +#endif + +/* IP 'private area' or will be eventually */ + int ip_ttl; /* TTL setting */ + int ip_tos; /* TOS */ + struct tcphdr dummy_th; + struct timer_list keepalive_timer; /* TCP keepalive hack */ + struct timer_list retransmit_timer; /* TCP retransmit timer */ + struct timer_list ack_timer; /* TCP delayed ack timer */ + int ip_xmit_timeout; /* Why the timeout is running */ +#ifdef CONFIG_IP_MULTICAST + int ip_mc_ttl; /* Multicasting TTL */ + int ip_mc_loop; /* Loopback (not implemented yet) */ + char ip_mc_name[MAX_ADDR_LEN]; /* Multicast device name */ + struct ip_mc_socklist *ip_mc_list; /* Group array */ +#endif + + /* This part is used for the timeout functions (timer.c). */ + int timeout; /* What are we waiting for? */ + struct timer_list timer; /* This is the TIME_WAIT/receive timer when we are doing IP */ + struct timeval stamp; + + /* identd */ + struct socket *socket; + + /* Callbacks */ + void (*state_change)(struct sock *sk); + void (*data_ready)(struct sock *sk,int bytes); + void (*write_space)(struct sock *sk); + void (*error_report)(struct sock *sk); + +}; + +struct proto { + struct sk_buff * (*wmalloc)(struct sock *sk, + unsigned long size, int force, + int priority); + struct sk_buff * (*rmalloc)(struct sock *sk, + unsigned long size, int force, + int priority); + void (*wfree)(struct sock *sk, struct sk_buff *skb, + unsigned long size); + void (*rfree)(struct sock *sk, struct sk_buff *skb, + unsigned long size); + unsigned long (*rspace)(struct sock *sk); + unsigned long (*wspace)(struct sock *sk); + void (*close)(struct sock *sk, int timeout); + int (*read)(struct sock *sk, unsigned char *to, + int len, int nonblock, unsigned flags); + int (*write)(struct sock *sk, unsigned char *to, + int len, int nonblock, unsigned flags); + int (*sendto)(struct sock *sk, + unsigned char *from, int len, int noblock, + unsigned flags, struct sockaddr_in *usin, + int addr_len); + int (*recvfrom)(struct sock *sk, + unsigned char *from, int len, int noblock, + unsigned flags, struct sockaddr_in *usin, + int *addr_len); + int (*build_header)(struct sk_buff *skb, + unsigned long saddr, + unsigned long daddr, + struct device **dev, int type, + struct options *opt, int len, int tos, int ttl); + int (*connect)(struct sock *sk, + struct sockaddr_in *usin, int addr_len); + struct sock * (*accept) (struct sock *sk, int flags); + void (*queue_xmit)(struct sock *sk, + struct device *dev, struct sk_buff *skb, + int free); + void (*retransmit)(struct sock *sk, int all); + void (*write_wakeup)(struct sock *sk); + void (*read_wakeup)(struct sock *sk); + int (*rcv)(struct sk_buff *buff, struct device *dev, + struct options *opt, unsigned long daddr, + unsigned short len, unsigned long saddr, + int redo, struct inet_protocol *protocol); + int (*select)(struct sock *sk, int which, + select_table *wait); + int (*ioctl)(struct sock *sk, int cmd, + unsigned long arg); + int (*init)(struct sock *sk); + void (*shutdown)(struct sock *sk, int how); + int (*setsockopt)(struct sock *sk, int level, int optname, + char *optval, int optlen); + int (*getsockopt)(struct sock *sk, int level, int optname, + char *optval, int *option); + unsigned short max_header; + unsigned long retransmits; + struct sock * sock_array[SOCK_ARRAY_SIZE]; + char name[80]; + int inuse, highestinuse; +}; + +#define TIME_WRITE 1 +#define TIME_CLOSE 2 +#define TIME_KEEPOPEN 3 +#define TIME_DESTROY 4 +#define TIME_DONE 5 /* used to absorb those last few packets */ +#define TIME_PROBE0 6 +#define SOCK_DESTROY_TIME 1000 /* about 10 seconds */ + +#define PROT_SOCK 1024 /* Sockets 0-1023 can't be bound too unless you are superuser */ + +#define SHUTDOWN_MASK 3 +#define RCV_SHUTDOWN 1 +#define SEND_SHUTDOWN 2 + + +extern void destroy_sock(struct sock *sk); +extern unsigned short get_new_socknum(struct proto *, unsigned short); +extern void put_sock(unsigned short, struct sock *); +extern void release_sock(struct sock *sk); +extern struct sock *get_sock(struct proto *, unsigned short, + unsigned long, unsigned short, + unsigned long); +extern struct sock *get_sock_mcast(struct sock *, unsigned short, + unsigned long, unsigned short, + unsigned long); +extern struct sock *get_sock_raw(struct sock *, unsigned short, + unsigned long, unsigned long); + +extern struct sk_buff *sock_wmalloc(struct sock *sk, + unsigned long size, int force, + int priority); +extern struct sk_buff *sock_rmalloc(struct sock *sk, + unsigned long size, int force, + int priority); +extern void sock_wfree(struct sock *sk, struct sk_buff *skb, + unsigned long size); +extern void sock_rfree(struct sock *sk, struct sk_buff *skb, + unsigned long size); +extern unsigned long sock_rspace(struct sock *sk); +extern unsigned long sock_wspace(struct sock *sk); + +extern int sock_setsockopt(struct sock *sk,int level,int op,char *optval,int optlen); + +extern int sock_getsockopt(struct sock *sk,int level,int op,char *optval,int *optlen); +extern struct sk_buff *sock_alloc_send_skb(struct sock *skb, unsigned long size, int noblock, int *errcode); +extern int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); + +/* declarations from timer.c */ +extern struct sock *timer_base; + +void delete_timer (struct sock *); +void reset_timer (struct sock *, int, unsigned long); +void net_timer (unsigned long); + + +#endif /* _SOCK_H */ diff --git a/pfinet/linux-inet/tcp.c b/pfinet/linux-inet/tcp.c new file mode 100644 index 00000000..3005fbfd --- /dev/null +++ b/pfinet/linux-inet/tcp.c @@ -0,0 +1,5121 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Implementation of the Transmission Control Protocol(TCP). + * + * Version: @(#)tcp.c 1.0.16 05/25/93 + * + * Authors: Ross Biro, <bir7@leland.Stanford.Edu> + * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> + * Mark Evans, <evansmp@uhura.aston.ac.uk> + * Corey Minyard <wf-rch!minyard@relay.EU.net> + * Florian La Roche, <flla@stud.uni-sb.de> + * Charles Hedrick, <hedrick@klinzhai.rutgers.edu> + * Linus Torvalds, <torvalds@cs.helsinki.fi> + * Alan Cox, <gw4pts@gw4pts.ampr.org> + * Matthew Dillon, <dillon@apollo.west.oic.com> + * Arnt Gulbrandsen, <agulbra@no.unit.nvg> + * + * Fixes: + * Alan Cox : Numerous verify_area() calls + * Alan Cox : Set the ACK bit on a reset + * Alan Cox : Stopped it crashing if it closed while sk->inuse=1 + * and was trying to connect (tcp_err()). + * Alan Cox : All icmp error handling was broken + * pointers passed where wrong and the + * socket was looked up backwards. Nobody + * tested any icmp error code obviously. + * Alan Cox : tcp_err() now handled properly. It wakes people + * on errors. select behaves and the icmp error race + * has gone by moving it into sock.c + * Alan Cox : tcp_reset() fixed to work for everything not just + * packets for unknown sockets. + * Alan Cox : tcp option processing. + * Alan Cox : Reset tweaked (still not 100%) [Had syn rule wrong] + * Herp Rosmanith : More reset fixes + * Alan Cox : No longer acks invalid rst frames. Acking + * any kind of RST is right out. + * Alan Cox : Sets an ignore me flag on an rst receive + * otherwise odd bits of prattle escape still + * Alan Cox : Fixed another acking RST frame bug. Should stop + * LAN workplace lockups. + * Alan Cox : Some tidyups using the new skb list facilities + * Alan Cox : sk->keepopen now seems to work + * Alan Cox : Pulls options out correctly on accepts + * Alan Cox : Fixed assorted sk->rqueue->next errors + * Alan Cox : PSH doesn't end a TCP read. Switched a bit to skb ops. + * Alan Cox : Tidied tcp_data to avoid a potential nasty. + * Alan Cox : Added some better commenting, as the tcp is hard to follow + * Alan Cox : Removed incorrect check for 20 * psh + * Michael O'Reilly : ack < copied bug fix. + * Johannes Stille : Misc tcp fixes (not all in yet). + * Alan Cox : FIN with no memory -> CRASH + * Alan Cox : Added socket option proto entries. Also added awareness of them to accept. + * Alan Cox : Added TCP options (SOL_TCP) + * Alan Cox : Switched wakeup calls to callbacks, so the kernel can layer network sockets. + * Alan Cox : Use ip_tos/ip_ttl settings. + * Alan Cox : Handle FIN (more) properly (we hope). + * Alan Cox : RST frames sent on unsynchronised state ack error/ + * Alan Cox : Put in missing check for SYN bit. + * Alan Cox : Added tcp_select_window() aka NET2E + * window non shrink trick. + * Alan Cox : Added a couple of small NET2E timer fixes + * Charles Hedrick : TCP fixes + * Toomas Tamm : TCP window fixes + * Alan Cox : Small URG fix to rlogin ^C ack fight + * Charles Hedrick : Rewrote most of it to actually work + * Linus : Rewrote tcp_read() and URG handling + * completely + * Gerhard Koerting: Fixed some missing timer handling + * Matthew Dillon : Reworked TCP machine states as per RFC + * Gerhard Koerting: PC/TCP workarounds + * Adam Caldwell : Assorted timer/timing errors + * Matthew Dillon : Fixed another RST bug + * Alan Cox : Move to kernel side addressing changes. + * Alan Cox : Beginning work on TCP fastpathing (not yet usable) + * Arnt Gulbrandsen: Turbocharged tcp_check() routine. + * Alan Cox : TCP fast path debugging + * Alan Cox : Window clamping + * Michael Riepe : Bug in tcp_check() + * Matt Dillon : More TCP improvements and RST bug fixes + * Matt Dillon : Yet more small nasties remove from the TCP code + * (Be very nice to this man if tcp finally works 100%) 8) + * Alan Cox : BSD accept semantics. + * Alan Cox : Reset on closedown bug. + * Peter De Schrijver : ENOTCONN check missing in tcp_sendto(). + * Michael Pall : Handle select() after URG properly in all cases. + * Michael Pall : Undo the last fix in tcp_read_urg() (multi URG PUSH broke rlogin). + * Michael Pall : Fix the multi URG PUSH problem in tcp_readable(), select() after URG works now. + * Michael Pall : recv(...,MSG_OOB) never blocks in the BSD api. + * Alan Cox : Changed the semantics of sk->socket to + * fix a race and a signal problem with + * accept() and async I/O. + * Alan Cox : Relaxed the rules on tcp_sendto(). + * Yury Shevchuk : Really fixed accept() blocking problem. + * Craig I. Hagan : Allow for BSD compatible TIME_WAIT for + * clients/servers which listen in on + * fixed ports. + * Alan Cox : Cleaned the above up and shrank it to + * a sensible code size. + * Alan Cox : Self connect lockup fix. + * Alan Cox : No connect to multicast. + * Ross Biro : Close unaccepted children on master + * socket close. + * Alan Cox : Reset tracing code. + * Alan Cox : Spurious resets on shutdown. + * Alan Cox : Giant 15 minute/60 second timer error + * Alan Cox : Small whoops in selecting before an accept. + * Alan Cox : Kept the state trace facility since it's + * handy for debugging. + * Alan Cox : More reset handler fixes. + * Alan Cox : Started rewriting the code based on the RFC's + * for other useful protocol references see: + * Comer, KA9Q NOS, and for a reference on the + * difference between specifications and how BSD + * works see the 4.4lite source. + * A.N.Kuznetsov : Don't time wait on completion of tidy + * close. + * Linus Torvalds : Fin/Shutdown & copied_seq changes. + * Linus Torvalds : Fixed BSD port reuse to work first syn + * Alan Cox : Reimplemented timers as per the RFC and using multiple + * timers for sanity. + * Alan Cox : Small bug fixes, and a lot of new + * comments. + * Alan Cox : Fixed dual reader crash by locking + * the buffers (much like datagram.c) + * Alan Cox : Fixed stuck sockets in probe. A probe + * now gets fed up of retrying without + * (even a no space) answer. + * Alan Cox : Extracted closing code better + * Alan Cox : Fixed the closing state machine to + * resemble the RFC. + * Alan Cox : More 'per spec' fixes. + * Alan Cox : tcp_data() doesn't ack illegal PSH + * only frames. At least one pc tcp stack + * generates them. + * + * + * To Fix: + * Fast path the code. Two things here - fix the window calculation + * so it doesn't iterate over the queue, also spot packets with no funny + * options arriving in order and process directly. + * + * Implement RFC 1191 [Path MTU discovery] + * Look at the effect of implementing RFC 1337 suggestions and their impact. + * Rewrite output state machine to use a single queue and do low window + * situations as per the spec (RFC 1122) + * Speed up input assembly algorithm. + * RFC1323 - PAWS and window scaling. PAWS is required for IPv6 so we + * could do with it working on IPv4 + * User settable/learned rtt/max window/mtu + * Cope with MTU/device switches when retransmitting in tcp. + * Fix the window handling to use PR's new code. + * + * Change the fundamental structure to a single send queue maintained + * by TCP (removing the bogus ip stuff [thus fixing mtu drops on + * active routes too]). Cut the queue off in tcp_retransmit/ + * tcp_transmit. + * Change the receive queue to assemble as it goes. This lets us + * dispose of most of tcp_sequence, half of tcp_ack and chunks of + * tcp_data/tcp_read as well as the window shrink crud. + * Separate out duplicated code - tcp_alloc_skb, tcp_build_ack + * tcp_queue_skb seem obvious routines to extract. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or(at your option) any later version. + * + * Description of States: + * + * TCP_SYN_SENT sent a connection request, waiting for ack + * + * TCP_SYN_RECV received a connection request, sent ack, + * waiting for final ack in three-way handshake. + * + * TCP_ESTABLISHED connection established + * + * TCP_FIN_WAIT1 our side has shutdown, waiting to complete + * transmission of remaining buffered data + * + * TCP_FIN_WAIT2 all buffered data sent, waiting for remote + * to shutdown + * + * TCP_CLOSING both sides have shutdown but we still have + * data we have to finish sending + * + * TCP_TIME_WAIT timeout to catch resent junk before entering + * closed, can only be entered from FIN_WAIT2 + * or CLOSING. Required because the other end + * may not have gotten our last ACK causing it + * to retransmit the data packet (which we ignore) + * + * TCP_CLOSE_WAIT remote side has shutdown and is waiting for + * us to finish writing our data and to shutdown + * (we have to close() to move on to LAST_ACK) + * + * TCP_LAST_ACK out side has shutdown after remote has + * shutdown. There may still be data in our + * buffer that we have to finish sending + * + * TCP_CLOSE socket is finished + */ + +#include <linux/types.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/time.h> +#include <linux/string.h> +#include <linux/config.h> +#include <linux/socket.h> +#include <linux/sockios.h> +#include <linux/termios.h> +#include <linux/in.h> +#include <linux/fcntl.h> +#include <linux/inet.h> +#include <linux/netdevice.h> +#include "snmp.h" +#include "ip.h" +#include "protocol.h" +#include "icmp.h" +#include "tcp.h" +#include "arp.h" +#include <linux/skbuff.h> +#include "sock.h" +#include "route.h" +#include <linux/errno.h> +#include <linux/timer.h> +#include <asm/system.h> +#include <asm/segment.h> +#include <linux/mm.h> + +/* + * The MSL timer is the 'normal' timer. + */ + +#define reset_msl_timer(x,y,z) reset_timer(x,y,z) + +#define SEQ_TICK 3 +unsigned long seq_offset; +struct tcp_mib tcp_statistics; + +static void tcp_close(struct sock *sk, int timeout); + + +/* + * The less said about this the better, but it works and will do for 1.2 + */ + +static struct wait_queue *master_select_wakeup; + +static __inline__ int min(unsigned int a, unsigned int b) +{ + if (a < b) + return(a); + return(b); +} + +#undef STATE_TRACE + +#ifdef STATE_TRACE +static char *statename[]={ + "Unused","Established","Syn Sent","Syn Recv", + "Fin Wait 1","Fin Wait 2","Time Wait", "Close", + "Close Wait","Last ACK","Listen","Closing" +}; +#endif + +static __inline__ void tcp_set_state(struct sock *sk, int state) +{ + if(sk->state==TCP_ESTABLISHED) + tcp_statistics.TcpCurrEstab--; +#ifdef STATE_TRACE + if(sk->debug) + printk("TCP sk=%p, State %s -> %s\n",sk, statename[sk->state],statename[state]); +#endif + /* This is a hack but it doesn't occur often and it's going to + be a real to fix nicely */ + + if(state==TCP_ESTABLISHED && sk->state==TCP_SYN_RECV) + { + wake_up_interruptible(&master_select_wakeup); + } + sk->state=state; + if(state==TCP_ESTABLISHED) + tcp_statistics.TcpCurrEstab++; +} + +/* + * This routine picks a TCP windows for a socket based on + * the following constraints + * + * 1. The window can never be shrunk once it is offered (RFC 793) + * 2. We limit memory per socket + * + * For now we use NET2E3's heuristic of offering half the memory + * we have handy. All is not as bad as this seems however because + * of two things. Firstly we will bin packets even within the window + * in order to get the data we are waiting for into the memory limit. + * Secondly we bin common duplicate forms at receive time + * Better heuristics welcome + */ + +int tcp_select_window(struct sock *sk) +{ + int new_window = sk->prot->rspace(sk); + + if(sk->window_clamp) + new_window=min(sk->window_clamp,new_window); + /* + * Two things are going on here. First, we don't ever offer a + * window less than min(sk->mss, MAX_WINDOW/2). This is the + * receiver side of SWS as specified in RFC1122. + * Second, we always give them at least the window they + * had before, in order to avoid retracting window. This + * is technically allowed, but RFC1122 advises against it and + * in practice it causes trouble. + * + * Fixme: This doesn't correctly handle the case where + * new_window > sk->window but not by enough to allow for the + * shift in sequence space. + */ + if (new_window < min(sk->mss, MAX_WINDOW/2) || new_window < sk->window) + return(sk->window); + return(new_window); +} + +/* + * Find someone to 'accept'. Must be called with + * sk->inuse=1 or cli() + */ + +static struct sk_buff *tcp_find_established(struct sock *s) +{ + struct sk_buff *p=skb_peek(&s->receive_queue); + if(p==NULL) + return NULL; + do + { + if(p->sk->state == TCP_ESTABLISHED || p->sk->state >= TCP_FIN_WAIT1) + return p; + p=p->next; + } + while(p!=(struct sk_buff *)&s->receive_queue); + return NULL; +} + +/* + * Remove a completed connection and return it. This is used by + * tcp_accept() to get connections from the queue. + */ + +static struct sk_buff *tcp_dequeue_established(struct sock *s) +{ + struct sk_buff *skb; + unsigned long flags; + save_flags(flags); + cli(); + skb=tcp_find_established(s); + if(skb!=NULL) + skb_unlink(skb); /* Take it off the queue */ + restore_flags(flags); + return skb; +} + +/* + * This routine closes sockets which have been at least partially + * opened, but not yet accepted. Currently it is only called by + * tcp_close, and timeout mirrors the value there. + */ + +static void tcp_close_pending (struct sock *sk) +{ + struct sk_buff *skb; + + while ((skb = skb_dequeue(&sk->receive_queue)) != NULL) + { + skb->sk->dead=1; + tcp_close(skb->sk, 0); + kfree_skb(skb, FREE_READ); + } + return; +} + +/* + * Enter the time wait state. + */ + +static void tcp_time_wait(struct sock *sk) +{ + tcp_set_state(sk,TCP_TIME_WAIT); + sk->shutdown = SHUTDOWN_MASK; + if (!sk->dead) + sk->state_change(sk); + reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN); +} + +/* + * A socket has timed out on its send queue and wants to do a + * little retransmitting. Currently this means TCP. + */ + +void tcp_do_retransmit(struct sock *sk, int all) +{ + struct sk_buff * skb; + struct proto *prot; + struct device *dev; + int ct=0; + + prot = sk->prot; + skb = sk->send_head; + + while (skb != NULL) + { + struct tcphdr *th; + struct iphdr *iph; + int size; + + dev = skb->dev; + IS_SKB(skb); + skb->when = jiffies; + + /* + * In general it's OK just to use the old packet. However we + * need to use the current ack and window fields. Urg and + * urg_ptr could possibly stand to be updated as well, but we + * don't keep the necessary data. That shouldn't be a problem, + * if the other end is doing the right thing. Since we're + * changing the packet, we have to issue a new IP identifier. + */ + + iph = (struct iphdr *)(skb->data + dev->hard_header_len); + th = (struct tcphdr *)(((char *)iph) + (iph->ihl << 2)); + size = skb->len - (((unsigned char *) th) - skb->data); + + /* + * Note: We ought to check for window limits here but + * currently this is done (less efficiently) elsewhere. + * We do need to check for a route change but can't handle + * that until we have the new 1.3.x buffers in. + * + */ + + iph->id = htons(ip_id_count++); + ip_send_check(iph); + + /* + * This is not the right way to handle this. We have to + * issue an up to date window and ack report with this + * retransmit to keep the odd buggy tcp that relies on + * the fact BSD does this happy. + * We don't however need to recalculate the entire + * checksum, so someone wanting a small problem to play + * with might like to implement RFC1141/RFC1624 and speed + * this up by avoiding a full checksum. + */ + + th->ack_seq = ntohl(sk->acked_seq); + th->window = ntohs(tcp_select_window(sk)); + tcp_send_check(th, sk->saddr, sk->daddr, size, sk); + + /* + * If the interface is (still) up and running, kick it. + */ + + if (dev->flags & IFF_UP) + { + /* + * If the packet is still being sent by the device/protocol + * below then don't retransmit. This is both needed, and good - + * especially with connected mode AX.25 where it stops resends + * occurring of an as yet unsent anyway frame! + * We still add up the counts as the round trip time wants + * adjusting. + */ + if (sk && !skb_device_locked(skb)) + { + /* Remove it from any existing driver queue first! */ + skb_unlink(skb); + /* Now queue it */ + ip_statistics.IpOutRequests++; + dev_queue_xmit(skb, dev, sk->priority); + } + } + + /* + * Count retransmissions + */ + + ct++; + sk->prot->retransmits ++; + + /* + * Only one retransmit requested. + */ + + if (!all) + break; + + /* + * This should cut it off before we send too many packets. + */ + + if (ct >= sk->cong_window) + break; + skb = skb->link3; + } +} + +/* + * Reset the retransmission timer + */ + +static void reset_xmit_timer(struct sock *sk, int why, unsigned long when) +{ + del_timer(&sk->retransmit_timer); + sk->ip_xmit_timeout = why; + if((int)when < 0) + { + when=3; + printk("Error: Negative timer in xmit_timer\n"); + } + sk->retransmit_timer.expires=when; + add_timer(&sk->retransmit_timer); +} + +/* + * This is the normal code called for timeouts. It does the retransmission + * and then does backoff. tcp_do_retransmit is separated out because + * tcp_ack needs to send stuff from the retransmit queue without + * initiating a backoff. + */ + + +void tcp_retransmit_time(struct sock *sk, int all) +{ + tcp_do_retransmit(sk, all); + + /* + * Increase the timeout each time we retransmit. Note that + * we do not increase the rtt estimate. rto is initialized + * from rtt, but increases here. Jacobson (SIGCOMM 88) suggests + * that doubling rto each time is the least we can get away with. + * In KA9Q, Karn uses this for the first few times, and then + * goes to quadratic. netBSD doubles, but only goes up to *64, + * and clamps at 1 to 64 sec afterwards. Note that 120 sec is + * defined in the protocol as the maximum possible RTT. I guess + * we'll have to use something other than TCP to talk to the + * University of Mars. + * + * PAWS allows us longer timeouts and large windows, so once + * implemented ftp to mars will work nicely. We will have to fix + * the 120 second clamps though! + */ + + sk->retransmits++; + sk->backoff++; + sk->rto = min(sk->rto << 1, 120*HZ); + reset_xmit_timer(sk, TIME_WRITE, sk->rto); +} + + +/* + * A timer event has trigger a tcp retransmit timeout. The + * socket xmit queue is ready and set up to send. Because + * the ack receive code keeps the queue straight we do + * nothing clever here. + */ + +static void tcp_retransmit(struct sock *sk, int all) +{ + if (all) + { + tcp_retransmit_time(sk, all); + return; + } + + sk->ssthresh = sk->cong_window >> 1; /* remember window where we lost */ + /* sk->ssthresh in theory can be zero. I guess that's OK */ + sk->cong_count = 0; + + sk->cong_window = 1; + + /* Do the actual retransmit. */ + tcp_retransmit_time(sk, all); +} + +/* + * A write timeout has occurred. Process the after effects. + */ + +static int tcp_write_timeout(struct sock *sk) +{ + /* + * Look for a 'soft' timeout. + */ + if ((sk->state == TCP_ESTABLISHED && sk->retransmits && !(sk->retransmits & 7)) + || (sk->state != TCP_ESTABLISHED && sk->retransmits > TCP_RETR1)) + { + /* + * Attempt to recover if arp has changed (unlikely!) or + * a route has shifted (not supported prior to 1.3). + */ + arp_destroy (sk->daddr, 0); + ip_route_check (sk->daddr); + } + /* + * Has it gone just too far ? + */ + if (sk->retransmits > TCP_RETR2) + { + sk->err = ETIMEDOUT; + sk->error_report(sk); + del_timer(&sk->retransmit_timer); + /* + * Time wait the socket + */ + if (sk->state == TCP_FIN_WAIT1 || sk->state == TCP_FIN_WAIT2 || sk->state == TCP_CLOSING ) + { + tcp_set_state(sk,TCP_TIME_WAIT); + reset_msl_timer (sk, TIME_CLOSE, TCP_TIMEWAIT_LEN); + } + else + { + /* + * Clean up time. + */ + tcp_set_state(sk, TCP_CLOSE); + return 0; + } + } + return 1; +} + +/* + * The TCP retransmit timer. This lacks a few small details. + * + * 1. An initial rtt timeout on the probe0 should cause what we can + * of the first write queue buffer to be split and sent. + * 2. On a 'major timeout' as defined by RFC1122 we shouldn't report + * ETIMEDOUT if we know an additional 'soft' error caused this. + * tcp_err should save a 'soft error' for us. + */ + +static void retransmit_timer(unsigned long data) +{ + struct sock *sk = (struct sock*)data; + int why = sk->ip_xmit_timeout; + + /* + * only process if socket is not in use + */ + + cli(); + if (sk->inuse || in_bh) + { + /* Try again in 1 second */ + sk->retransmit_timer.expires = HZ; + add_timer(&sk->retransmit_timer); + sti(); + return; + } + + sk->inuse = 1; + sti(); + + /* Always see if we need to send an ack. */ + + if (sk->ack_backlog && !sk->zapped) + { + sk->prot->read_wakeup (sk); + if (! sk->dead) + sk->data_ready(sk,0); + } + + /* Now we need to figure out why the socket was on the timer. */ + + switch (why) + { + /* Window probing */ + case TIME_PROBE0: + tcp_send_probe0(sk); + tcp_write_timeout(sk); + break; + /* Retransmitting */ + case TIME_WRITE: + /* It could be we got here because we needed to send an ack. + * So we need to check for that. + */ + { + struct sk_buff *skb; + unsigned long flags; + + save_flags(flags); + cli(); + skb = sk->send_head; + if (!skb) + { + restore_flags(flags); + } + else + { + /* + * Kicked by a delayed ack. Reset timer + * correctly now + */ + if (jiffies < skb->when + sk->rto) + { + reset_xmit_timer (sk, TIME_WRITE, skb->when + sk->rto - jiffies); + restore_flags(flags); + break; + } + restore_flags(flags); + /* + * Retransmission + */ + sk->prot->retransmit (sk, 0); + tcp_write_timeout(sk); + } + break; + } + /* Sending Keepalives */ + case TIME_KEEPOPEN: + /* + * this reset_timer() call is a hack, this is not + * how KEEPOPEN is supposed to work. + */ + reset_xmit_timer (sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN); + + /* Send something to keep the connection open. */ + if (sk->prot->write_wakeup) + sk->prot->write_wakeup (sk); + sk->retransmits++; + tcp_write_timeout(sk); + break; + default: + printk ("rexmit_timer: timer expired - reason unknown\n"); + break; + } + release_sock(sk); +} + +/* + * This routine is called by the ICMP module when it gets some + * sort of error condition. If err < 0 then the socket should + * be closed and the error returned to the user. If err > 0 + * it's just the icmp type << 8 | icmp code. After adjustment + * header points to the first 8 bytes of the tcp header. We need + * to find the appropriate port. + */ + +void tcp_err(int err, unsigned char *header, unsigned long daddr, + unsigned long saddr, struct inet_protocol *protocol) +{ + struct tcphdr *th; + struct sock *sk; + struct iphdr *iph=(struct iphdr *)header; + + header+=4*iph->ihl; + + + th =(struct tcphdr *)header; + sk = get_sock(&tcp_prot, th->source, daddr, th->dest, saddr); + + if (sk == NULL) + return; + + if(err<0) + { + sk->err = -err; + sk->error_report(sk); + return; + } + + if ((err & 0xff00) == (ICMP_SOURCE_QUENCH << 8)) + { + /* + * FIXME: + * For now we will just trigger a linear backoff. + * The slow start code should cause a real backoff here. + */ + if (sk->cong_window > 4) + sk->cong_window--; + return; + } + +/* sk->err = icmp_err_convert[err & 0xff].errno; -- moved as TCP should hide non fatals internally (and does) */ + + /* + * If we've already connected we will keep trying + * until we time out, or the user gives up. + */ + + if (icmp_err_convert[err & 0xff].fatal || sk->state == TCP_SYN_SENT) + { + if (sk->state == TCP_SYN_SENT) + { + tcp_statistics.TcpAttemptFails++; + tcp_set_state(sk,TCP_CLOSE); + sk->error_report(sk); /* Wake people up to see the error (see connect in sock.c) */ + } + sk->err = icmp_err_convert[err & 0xff].error; + } + return; +} + + +/* + * Walk down the receive queue counting readable data until we hit the end or we find a gap + * in the received data queue (ie a frame missing that needs sending to us). Not + * sorting using two queues as data arrives makes life so much harder. + */ + +#ifndef _HURD_ +static +#endif +int tcp_readable(struct sock *sk) +{ + unsigned long counted; + unsigned long amount; + struct sk_buff *skb; + int sum; + unsigned long flags; + + if(sk && sk->debug) + printk("tcp_readable: %p - ",sk); + + save_flags(flags); + cli(); + if (sk == NULL || (skb = skb_peek(&sk->receive_queue)) == NULL) + { + restore_flags(flags); + if(sk && sk->debug) + printk("empty\n"); + return(0); + } + + counted = sk->copied_seq; /* Where we are at the moment */ + amount = 0; + + /* + * Do until a push or until we are out of data. + */ + + do + { + if (before(counted, skb->h.th->seq)) /* Found a hole so stops here */ + break; + sum = skb->len -(counted - skb->h.th->seq); /* Length - header but start from where we are up to (avoid overlaps) */ + if (skb->h.th->syn) + sum++; + if (sum > 0) + { /* Add it up, move on */ + amount += sum; + if (skb->h.th->syn) + amount--; + counted += sum; + } + /* + * Don't count urg data ... but do it in the right place! + * Consider: "old_data (ptr is here) URG PUSH data" + * The old code would stop at the first push because + * it counted the urg (amount==1) and then does amount-- + * *after* the loop. This means tcp_readable() always + * returned zero if any URG PUSH was in the queue, even + * though there was normal data available. If we subtract + * the urg data right here, we even get it to work for more + * than one URG PUSH skb without normal data. + * This means that select() finally works now with urg data + * in the queue. Note that rlogin was never affected + * because it doesn't use select(); it uses two processes + * and a blocking read(). And the queue scan in tcp_read() + * was correct. Mike <pall@rz.uni-karlsruhe.de> + */ + if (skb->h.th->urg) + amount--; /* don't count urg data */ + if (amount && skb->h.th->psh) break; + skb = skb->next; + } + while(skb != (struct sk_buff *)&sk->receive_queue); + + restore_flags(flags); + if(sk->debug) + printk("got %lu bytes.\n",amount); + return(amount); +} + +/* + * LISTEN is a special case for select.. + */ +static int tcp_listen_select(struct sock *sk, int sel_type, select_table *wait) +{ + if (sel_type == SEL_IN) { + int retval; + + sk->inuse = 1; + retval = (tcp_find_established(sk) != NULL); + release_sock(sk); + if (!retval) + select_wait(&master_select_wakeup,wait); + return retval; + } + return 0; +} + + +/* + * Wait for a TCP event. + * + * Note that we don't need to set "sk->inuse", as the upper select layers + * take care of normal races (between the test and the event) and we don't + * go look at any of the socket buffers directly. + */ +static int tcp_select(struct sock *sk, int sel_type, select_table *wait) +{ + if (sk->state == TCP_LISTEN) + return tcp_listen_select(sk, sel_type, wait); + + switch(sel_type) { + case SEL_IN: + if (sk->err) + return 1; + if (sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV) + break; + + if (sk->shutdown & RCV_SHUTDOWN) + return 1; + + if (sk->acked_seq == sk->copied_seq) + break; + + if (sk->urg_seq != sk->copied_seq || + sk->acked_seq != sk->copied_seq+1 || + sk->urginline || !sk->urg_data) + return 1; + break; + + case SEL_OUT: + if (sk->shutdown & SEND_SHUTDOWN) + return 0; + if (sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV) + break; + /* + * This is now right thanks to a small fix + * by Matt Dillon. + */ + + if (sk->prot->wspace(sk) < sk->mtu+128+sk->prot->max_header) + break; + return 1; + + case SEL_EX: + if (sk->err || sk->urg_data) + return 1; + break; + } + select_wait(sk->sleep, wait); + return 0; +} + +#ifndef _HURD_ +int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) +{ + int err; + switch(cmd) + { + + case TIOCINQ: +#ifdef FIXME /* FIXME: */ + case FIONREAD: +#endif + { + unsigned long amount; + + if (sk->state == TCP_LISTEN) + return(-EINVAL); + + sk->inuse = 1; + amount = tcp_readable(sk); + release_sock(sk); + err=verify_area(VERIFY_WRITE,(void *)arg, + sizeof(unsigned long)); + if(err) + return err; + put_fs_long(amount,(unsigned long *)arg); + return(0); + } + case SIOCATMARK: + { + int answ = sk->urg_data && sk->urg_seq == sk->copied_seq; + + err = verify_area(VERIFY_WRITE,(void *) arg, + sizeof(unsigned long)); + if (err) + return err; + put_fs_long(answ,(int *) arg); + return(0); + } + case TIOCOUTQ: + { + unsigned long amount; + + if (sk->state == TCP_LISTEN) return(-EINVAL); + amount = sk->prot->wspace(sk); + err=verify_area(VERIFY_WRITE,(void *)arg, + sizeof(unsigned long)); + if(err) + return err; + put_fs_long(amount,(unsigned long *)arg); + return(0); + } + default: + return(-EINVAL); + } +} +#endif + + + +void tcp_send_check(struct tcphdr *th, unsigned long saddr, + unsigned long daddr, int len, struct sock *sk) +{ + th->check = 0; + th->check = tcp_check(th, len, saddr, daddr); + return; +} + +/* + * This is the main buffer sending routine. We queue the buffer + * having checked it is sane seeming. + */ + +static void tcp_send_skb(struct sock *sk, struct sk_buff *skb) +{ + int size; + struct tcphdr * th = skb->h.th; + + /* + * length of packet (not counting length of pre-tcp headers) + */ + + size = skb->len - ((unsigned char *) th - skb->data); + + /* + * Sanity check it.. + */ + + if (size < sizeof(struct tcphdr) || size > skb->len) + { + printk("tcp_send_skb: bad skb (skb = %p, data = %p, th = %p, len = %lu)\n", + skb, skb->data, th, skb->len); + kfree_skb(skb, FREE_WRITE); + return; + } + + /* + * If we have queued a header size packet.. (these crash a few + * tcp stacks if ack is not set) + */ + + if (size == sizeof(struct tcphdr)) + { + /* If it's got a syn or fin it's notionally included in the size..*/ + if(!th->syn && !th->fin) + { + printk("tcp_send_skb: attempt to queue a bogon.\n"); + kfree_skb(skb,FREE_WRITE); + return; + } + } + + /* + * Actual processing. + */ + + tcp_statistics.TcpOutSegs++; + skb->h.seq = ntohl(th->seq) + size - 4*th->doff; + + /* + * We must queue if + * + * a) The right edge of this frame exceeds the window + * b) We are retransmitting (Nagle's rule) + * c) We have too many packets 'in flight' + */ + + if (after(skb->h.seq, sk->window_seq) || + (sk->retransmits && sk->ip_xmit_timeout == TIME_WRITE) || + sk->packets_out >= sk->cong_window) + { + /* checksum will be supplied by tcp_write_xmit. So + * we shouldn't need to set it at all. I'm being paranoid */ + th->check = 0; + if (skb->next != NULL) + { + printk("tcp_send_partial: next != NULL\n"); + skb_unlink(skb); + } + skb_queue_tail(&sk->write_queue, skb); + + /* + * If we don't fit we have to start the zero window + * probes. This is broken - we really need to do a partial + * send _first_ (This is what causes the Cisco and PC/TCP + * grief). + */ + + if (before(sk->window_seq, sk->write_queue.next->h.seq) && + sk->send_head == NULL && sk->ack_backlog == 0) + reset_xmit_timer(sk, TIME_PROBE0, sk->rto); + } + else + { + /* + * This is going straight out + */ + + th->ack_seq = ntohl(sk->acked_seq); + th->window = ntohs(tcp_select_window(sk)); + + tcp_send_check(th, sk->saddr, sk->daddr, size, sk); + + sk->sent_seq = sk->write_seq; + + /* + * This is mad. The tcp retransmit queue is put together + * by the ip layer. This causes half the problems with + * unroutable FIN's and other things. + */ + + sk->prot->queue_xmit(sk, skb->dev, skb, 0); + + /* + * Set for next retransmit based on expected ACK time. + * FIXME: We set this every time which means our + * retransmits are really about a window behind. + */ + + reset_xmit_timer(sk, TIME_WRITE, sk->rto); + } +} + +/* + * Locking problems lead us to a messy situation where we can have + * multiple partially complete buffers queued up. This is really bad + * as we don't want to be sending partial buffers. Fix this with + * a semaphore or similar to lock tcp_write per socket. + * + * These routines are pretty self descriptive. + */ + +struct sk_buff * tcp_dequeue_partial(struct sock * sk) +{ + struct sk_buff * skb; + unsigned long flags; + + save_flags(flags); + cli(); + skb = sk->partial; + if (skb) { + sk->partial = NULL; + del_timer(&sk->partial_timer); + } + restore_flags(flags); + return skb; +} + +/* + * Empty the partial queue + */ + +static void tcp_send_partial(struct sock *sk) +{ + struct sk_buff *skb; + + if (sk == NULL) + return; + while ((skb = tcp_dequeue_partial(sk)) != NULL) + tcp_send_skb(sk, skb); +} + +/* + * Queue a partial frame + */ + +void tcp_enqueue_partial(struct sk_buff * skb, struct sock * sk) +{ + struct sk_buff * tmp; + unsigned long flags; + + save_flags(flags); + cli(); + tmp = sk->partial; + if (tmp) + del_timer(&sk->partial_timer); + sk->partial = skb; + init_timer(&sk->partial_timer); + /* + * Wait up to 1 second for the buffer to fill. + */ + sk->partial_timer.expires = HZ; + sk->partial_timer.function = (void (*)(unsigned long)) tcp_send_partial; + sk->partial_timer.data = (unsigned long) sk; + add_timer(&sk->partial_timer); + restore_flags(flags); + if (tmp) + tcp_send_skb(sk, tmp); +} + + +/* + * This routine sends an ack and also updates the window. + */ + +static void tcp_send_ack(unsigned long sequence, unsigned long ack, + struct sock *sk, + struct tcphdr *th, unsigned long daddr) +{ + struct sk_buff *buff; + struct tcphdr *t1; + struct device *dev = NULL; + int tmp; + + if(sk->zapped) + return; /* We have been reset, we may not send again */ + + /* + * We need to grab some memory, and put together an ack, + * and then put it into the queue to be sent. + */ + + buff = sk->prot->wmalloc(sk, MAX_ACK_SIZE, 1, GFP_ATOMIC); + if (buff == NULL) + { + /* + * Force it to send an ack. We don't have to do this + * (ACK is unreliable) but it's much better use of + * bandwidth on slow links to send a spare ack than + * resend packets. + */ + + sk->ack_backlog++; + if (sk->ip_xmit_timeout != TIME_WRITE && tcp_connected(sk->state)) + { + reset_xmit_timer(sk, TIME_WRITE, HZ); + } + return; + } + + /* + * Assemble a suitable TCP frame + */ + + buff->len = sizeof(struct tcphdr); + buff->sk = sk; + buff->localroute = sk->localroute; + t1 =(struct tcphdr *) buff->data; + + /* + * Put in the IP header and routing stuff. + */ + + tmp = sk->prot->build_header(buff, sk->saddr, daddr, &dev, + IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl); + if (tmp < 0) + { + buff->free = 1; + sk->prot->wfree(sk, buff->mem_addr, buff->mem_len); + return; + } + buff->len += tmp; + t1 =(struct tcphdr *)((char *)t1 +tmp); + + memcpy(t1, th, sizeof(*t1)); + + /* + * Swap the send and the receive. + */ + + t1->dest = th->source; + t1->source = th->dest; + t1->seq = ntohl(sequence); + t1->ack = 1; + sk->window = tcp_select_window(sk); + t1->window = ntohs(sk->window); + t1->res1 = 0; + t1->res2 = 0; + t1->rst = 0; + t1->urg = 0; + t1->syn = 0; + t1->psh = 0; + t1->fin = 0; + + /* + * If we have nothing queued for transmit and the transmit timer + * is on we are just doing an ACK timeout and need to switch + * to a keepalive. + */ + + if (ack == sk->acked_seq) + { + sk->ack_backlog = 0; + sk->bytes_rcv = 0; + sk->ack_timed = 0; + if (sk->send_head == NULL && skb_peek(&sk->write_queue) == NULL + && sk->ip_xmit_timeout == TIME_WRITE) + { + if(sk->keepopen) { + reset_xmit_timer(sk,TIME_KEEPOPEN,TCP_TIMEOUT_LEN); + } else { + delete_timer(sk); + } + } + } + + /* + * Fill in the packet and send it + */ + + t1->ack_seq = ntohl(ack); + t1->doff = sizeof(*t1)/4; + tcp_send_check(t1, sk->saddr, daddr, sizeof(*t1), sk); + if (sk->debug) + printk("\rtcp_ack: seq %lx ack %lx\n", sequence, ack); + tcp_statistics.TcpOutSegs++; + sk->prot->queue_xmit(sk, dev, buff, 1); +} + + +/* + * This routine builds a generic TCP header. + */ + +extern __inline int tcp_build_header(struct tcphdr *th, struct sock *sk, int push) +{ + + memcpy(th,(void *) &(sk->dummy_th), sizeof(*th)); + th->seq = htonl(sk->write_seq); + th->psh =(push == 0) ? 1 : 0; + th->doff = sizeof(*th)/4; + th->ack = 1; + th->fin = 0; + sk->ack_backlog = 0; + sk->bytes_rcv = 0; + sk->ack_timed = 0; + th->ack_seq = htonl(sk->acked_seq); + sk->window = tcp_select_window(sk); + th->window = htons(sk->window); + + return(sizeof(*th)); +} + +/* + * This routine copies from a user buffer into a socket, + * and starts the transmit system. + */ + +static int tcp_write(struct sock *sk, unsigned char *from, + int len, int nonblock, unsigned flags) +{ + int copied = 0; + int copy; + int tmp; + struct sk_buff *skb; + struct sk_buff *send_tmp; + unsigned char *buff; + struct proto *prot; + struct device *dev = NULL; + + sk->inuse=1; + prot = sk->prot; + while(len > 0) + { + if (sk->err) + { /* Stop on an error */ + release_sock(sk); + if (copied) + return(copied); + tmp = -sk->err; + sk->err = 0; + return(tmp); + } + + /* + * First thing we do is make sure that we are established. + */ + + if (sk->shutdown & SEND_SHUTDOWN) + { + release_sock(sk); + sk->err = EPIPE; + if (copied) + return(copied); + sk->err = 0; + return(-EPIPE); + } + + /* + * Wait for a connection to finish. + */ + + while(sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT) + { + if (sk->err) + { + release_sock(sk); + if (copied) + return(copied); + tmp = -sk->err; + sk->err = 0; + return(tmp); + } + + if (sk->state != TCP_SYN_SENT && sk->state != TCP_SYN_RECV) + { + release_sock(sk); + if (copied) + return(copied); + + if (sk->err) + { + tmp = -sk->err; + sk->err = 0; + return(tmp); + } + + if (sk->keepopen) + { + send_sig(SIGPIPE, current, 0); + } + return(-EPIPE); + } + + if (nonblock || copied) + { + release_sock(sk); + if (copied) + return(copied); + return(-EAGAIN); + } + + release_sock(sk); + cli(); + + if (sk->state != TCP_ESTABLISHED && + sk->state != TCP_CLOSE_WAIT && sk->err == 0) + { + interruptible_sleep_on(sk->sleep); + if (current->signal & ~current->blocked) + { + sti(); + if (copied) + return(copied); + return(-ERESTARTSYS); + } + } + sk->inuse = 1; + sti(); + } + + /* + * The following code can result in copy <= if sk->mss is ever + * decreased. It shouldn't be. sk->mss is min(sk->mtu, sk->max_window). + * sk->mtu is constant once SYN processing is finished. I.e. we + * had better not get here until we've seen his SYN and at least one + * valid ack. (The SYN sets sk->mtu and the ack sets sk->max_window.) + * But ESTABLISHED should guarantee that. sk->max_window is by definition + * non-decreasing. Note that any ioctl to set user_mss must be done + * before the exchange of SYN's. If the initial ack from the other + * end has a window of 0, max_window and thus mss will both be 0. + */ + + /* + * Now we need to check if we have a half built packet. + */ + + if ((skb = tcp_dequeue_partial(sk)) != NULL) + { + int hdrlen; + + /* IP header + TCP header */ + hdrlen = ((unsigned long)skb->h.th - (unsigned long)skb->data) + + sizeof(struct tcphdr); + + /* Add more stuff to the end of skb->len */ + if (!(flags & MSG_OOB)) + { + copy = min(sk->mss - (skb->len - hdrlen), len); + /* FIXME: this is really a bug. */ + if (copy <= 0) + { + printk("TCP: **bug**: \"copy\" <= 0!!\n"); + copy = 0; + } + + memcpy_fromfs(skb->data + skb->len, from, copy); + skb->len += copy; + from += copy; + copied += copy; + len -= copy; + sk->write_seq += copy; + } + if ((skb->len - hdrlen) >= sk->mss || + (flags & MSG_OOB) || !sk->packets_out) + tcp_send_skb(sk, skb); + else + tcp_enqueue_partial(skb, sk); + continue; + } + + /* + * We also need to worry about the window. + * If window < 1/2 the maximum window we've seen from this + * host, don't use it. This is sender side + * silly window prevention, as specified in RFC1122. + * (Note that this is different than earlier versions of + * SWS prevention, e.g. RFC813.). What we actually do is + * use the whole MSS. Since the results in the right + * edge of the packet being outside the window, it will + * be queued for later rather than sent. + */ + + copy = sk->window_seq - sk->write_seq; + if (copy <= 0 || copy < (sk->max_window >> 1) || copy > sk->mss) + copy = sk->mss; + if (copy > len) + copy = len; + + /* + * We should really check the window here also. + */ + + send_tmp = NULL; + if (copy < sk->mss && !(flags & MSG_OOB)) + { + /* + * We will release the socket in case we sleep here. + */ + release_sock(sk); + /* + * NB: following must be mtu, because mss can be increased. + * mss is always <= mtu + */ + skb = prot->wmalloc(sk, sk->mtu + 128 + prot->max_header, 0, GFP_KERNEL); + sk->inuse = 1; + send_tmp = skb; + } + else + { + /* + * We will release the socket in case we sleep here. + */ + release_sock(sk); + skb = prot->wmalloc(sk, copy + prot->max_header , 0, GFP_KERNEL); + sk->inuse = 1; + } + + /* + * If we didn't get any memory, we need to sleep. + */ + + if (skb == NULL) + { + sk->socket->flags |= SO_NOSPACE; + if (nonblock) + { + release_sock(sk); + if (copied) + return(copied); + return(-EAGAIN); + } + + /* + * FIXME: here is another race condition. + */ + + tmp = sk->wmem_alloc; + release_sock(sk); + cli(); + /* + * Again we will try to avoid it. + */ + if (tmp <= sk->wmem_alloc && + (sk->state == TCP_ESTABLISHED||sk->state == TCP_CLOSE_WAIT) + && sk->err == 0) + { + sk->socket->flags &= ~SO_NOSPACE; + interruptible_sleep_on(sk->sleep); + if (current->signal & ~current->blocked) + { + sti(); + if (copied) + return(copied); + return(-ERESTARTSYS); + } + } + sk->inuse = 1; + sti(); + continue; + } + + skb->len = 0; + skb->sk = sk; + skb->free = 0; + skb->localroute = sk->localroute|(flags&MSG_DONTROUTE); + + buff = skb->data; + + /* + * FIXME: we need to optimize this. + * Perhaps some hints here would be good. + */ + + tmp = prot->build_header(skb, sk->saddr, sk->daddr, &dev, + IPPROTO_TCP, sk->opt, skb->mem_len,sk->ip_tos,sk->ip_ttl); + if (tmp < 0 ) + { + prot->wfree(sk, skb->mem_addr, skb->mem_len); + release_sock(sk); + if (copied) + return(copied); + return(tmp); + } + skb->len += tmp; + skb->dev = dev; + buff += tmp; + skb->h.th =(struct tcphdr *) buff; + tmp = tcp_build_header((struct tcphdr *)buff, sk, len-copy); + if (tmp < 0) + { + prot->wfree(sk, skb->mem_addr, skb->mem_len); + release_sock(sk); + if (copied) + return(copied); + return(tmp); + } + + if (flags & MSG_OOB) + { + ((struct tcphdr *)buff)->urg = 1; + ((struct tcphdr *)buff)->urg_ptr = ntohs(copy); + } + skb->len += tmp; + memcpy_fromfs(buff+tmp, from, copy); + + from += copy; + copied += copy; + len -= copy; + skb->len += copy; + skb->free = 0; + sk->write_seq += copy; + + if (send_tmp != NULL && sk->packets_out) + { + tcp_enqueue_partial(send_tmp, sk); + continue; + } + tcp_send_skb(sk, skb); + } + sk->err = 0; + +/* + * Nagle's rule. Turn Nagle off with TCP_NODELAY for highly + * interactive fast network servers. It's meant to be on and + * it really improves the throughput though not the echo time + * on my slow slip link - Alan + */ + +/* + * Avoid possible race on send_tmp - c/o Johannes Stille + */ + + if(sk->partial && ((!sk->packets_out) + /* If not nagling we can send on the before case too.. */ + || (sk->nonagle && before(sk->write_seq , sk->window_seq)) + )) + tcp_send_partial(sk); + + release_sock(sk); + return(copied); +} + +/* + * This is just a wrapper. + */ + +static int tcp_sendto(struct sock *sk, unsigned char *from, + int len, int nonblock, unsigned flags, + struct sockaddr_in *addr, int addr_len) +{ + if (flags & ~(MSG_OOB|MSG_DONTROUTE)) + return -EINVAL; + if (sk->state == TCP_CLOSE) + return -ENOTCONN; + if (addr_len < sizeof(*addr)) + return -EINVAL; + if (addr->sin_family && addr->sin_family != AF_INET) + return -EINVAL; + if (addr->sin_port != sk->dummy_th.dest) + return -EISCONN; + if (addr->sin_addr.s_addr != sk->daddr) + return -EISCONN; + return tcp_write(sk, from, len, nonblock, flags); +} + + +/* + * Send an ack if one is backlogged at this point. Ought to merge + * this with tcp_send_ack(). + */ + +static void tcp_read_wakeup(struct sock *sk) +{ + int tmp; + struct device *dev = NULL; + struct tcphdr *t1; + struct sk_buff *buff; + + if (!sk->ack_backlog) + return; + + /* + * FIXME: we need to put code here to prevent this routine from + * being called. Being called once in a while is ok, so only check + * if this is the second time in a row. + */ + + /* + * We need to grab some memory, and put together an ack, + * and then put it into the queue to be sent. + */ + + buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC); + if (buff == NULL) + { + /* Try again real soon. */ + reset_xmit_timer(sk, TIME_WRITE, HZ); + return; + } + + buff->len = sizeof(struct tcphdr); + buff->sk = sk; + buff->localroute = sk->localroute; + + /* + * Put in the IP header and routing stuff. + */ + + tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev, + IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl); + if (tmp < 0) + { + buff->free = 1; + sk->prot->wfree(sk, buff->mem_addr, buff->mem_len); + return; + } + + buff->len += tmp; + t1 =(struct tcphdr *)(buff->data +tmp); + + memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1)); + t1->seq = htonl(sk->sent_seq); + t1->ack = 1; + t1->res1 = 0; + t1->res2 = 0; + t1->rst = 0; + t1->urg = 0; + t1->syn = 0; + t1->psh = 0; + sk->ack_backlog = 0; + sk->bytes_rcv = 0; + sk->window = tcp_select_window(sk); + t1->window = ntohs(sk->window); + t1->ack_seq = ntohl(sk->acked_seq); + t1->doff = sizeof(*t1)/4; + tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk); + sk->prot->queue_xmit(sk, dev, buff, 1); + tcp_statistics.TcpOutSegs++; +} + + +/* + * FIXME: + * This routine frees used buffers. + * It should consider sending an ACK to let the + * other end know we now have a bigger window. + */ + +static void cleanup_rbuf(struct sock *sk) +{ + unsigned long flags; + unsigned long left; + struct sk_buff *skb; + unsigned long rspace; + + if(sk->debug) + printk("cleaning rbuf for sk=%p\n", sk); + + save_flags(flags); + cli(); + + left = sk->prot->rspace(sk); + + /* + * We have to loop through all the buffer headers, + * and try to free up all the space we can. + */ + + while((skb=skb_peek(&sk->receive_queue)) != NULL) + { + if (!skb->used || skb->users) + break; + skb_unlink(skb); + skb->sk = sk; + kfree_skb(skb, FREE_READ); + } + + restore_flags(flags); + + /* + * FIXME: + * At this point we should send an ack if the difference + * in the window, and the amount of space is bigger than + * TCP_WINDOW_DIFF. + */ + + if(sk->debug) + printk("sk->rspace = %lu, was %lu\n", sk->prot->rspace(sk), + left); + if ((rspace=sk->prot->rspace(sk)) != left) + { + /* + * This area has caused the most trouble. The current strategy + * is to simply do nothing if the other end has room to send at + * least 3 full packets, because the ack from those will auto- + * matically update the window. If the other end doesn't think + * we have much space left, but we have room for at least 1 more + * complete packet than it thinks we do, we will send an ack + * immediately. Otherwise we will wait up to .5 seconds in case + * the user reads some more. + */ + sk->ack_backlog++; + /* + * It's unclear whether to use sk->mtu or sk->mss here. They differ only + * if the other end is offering a window smaller than the agreed on MSS + * (called sk->mtu here). In theory there's no connection between send + * and receive, and so no reason to think that they're going to send + * small packets. For the moment I'm using the hack of reducing the mss + * only on the send side, so I'm putting mtu here. + */ + + if (rspace > (sk->window - sk->bytes_rcv + sk->mtu)) + { + /* Send an ack right now. */ + tcp_read_wakeup(sk); + } + else + { + /* Force it to send an ack soon. */ + int was_active = del_timer(&sk->retransmit_timer); + if (!was_active || TCP_ACK_TIME < sk->timer.expires) + { + reset_xmit_timer(sk, TIME_WRITE, TCP_ACK_TIME); + } + else + add_timer(&sk->retransmit_timer); + } + } +} + + +/* + * Handle reading urgent data. BSD has very simple semantics for + * this, no blocking and very strange errors 8) + */ + +static int tcp_read_urg(struct sock * sk, int nonblock, + unsigned char *to, int len, unsigned flags) +{ + /* + * No URG data to read + */ + if (sk->urginline || !sk->urg_data || sk->urg_data == URG_READ) + return -EINVAL; /* Yes this is right ! */ + + if (sk->err) + { + int tmp = -sk->err; + sk->err = 0; + return tmp; + } + + if (sk->state == TCP_CLOSE || sk->done) + { + if (!sk->done) { + sk->done = 1; + return 0; + } + return -ENOTCONN; + } + + if (sk->shutdown & RCV_SHUTDOWN) + { + sk->done = 1; + return 0; + } + sk->inuse = 1; + if (sk->urg_data & URG_VALID) + { + char c = sk->urg_data; + if (!(flags & MSG_PEEK)) + sk->urg_data = URG_READ; + put_fs_byte(c, to); + release_sock(sk); + return 1; + } + release_sock(sk); + + /* + * Fixed the recv(..., MSG_OOB) behaviour. BSD docs and + * the available implementations agree in this case: + * this call should never block, independent of the + * blocking state of the socket. + * Mike <pall@rz.uni-karlsruhe.de> + */ + return -EAGAIN; +} + + +/* + * This routine copies from a sock struct into the user buffer. + */ + +static int tcp_read(struct sock *sk, unsigned char *to, + int len, int nonblock, unsigned flags) +{ +#ifndef _HURD_ + struct wait_queue wait = { current, NULL }; +#endif + int copied = 0; + unsigned long peek_seq; + volatile unsigned long *seq; /* So gcc doesn't overoptimise */ + unsigned long used; + + /* + * This error should be checked. + */ + + if (sk->state == TCP_LISTEN) + return -ENOTCONN; + + /* + * Urgent data needs to be handled specially. + */ + + if (flags & MSG_OOB) + return tcp_read_urg(sk, nonblock, to, len, flags); + + /* + * Copying sequence to update. This is volatile to handle + * the multi-reader case neatly (memcpy_to/fromfs might be + * inline and thus not flush cached variables otherwise). + */ + + peek_seq = sk->copied_seq; + seq = &sk->copied_seq; + if (flags & MSG_PEEK) + seq = &peek_seq; + +#ifndef _HURD_ + add_wait_queue(sk->sleep, &wait); +#endif + sk->inuse = 1; + while (len > 0) + { + struct sk_buff * skb; + unsigned long offset; + + /* + * Are we at urgent data? Stop if we have read anything. + */ + + if (copied && sk->urg_data && sk->urg_seq == *seq) + break; + + /* + * Next get a buffer. + */ + +#ifndef _HURD_ + current->state = TASK_INTERRUPTIBLE; +#endif + + skb = skb_peek(&sk->receive_queue); + do + { + if (!skb) + break; + if (before(*seq, skb->h.th->seq)) + break; + offset = *seq - skb->h.th->seq; + if (skb->h.th->syn) + offset--; + if (offset < skb->len) + goto found_ok_skb; + if (skb->h.th->fin) + goto found_fin_ok; + if (!(flags & MSG_PEEK)) + skb->used = 1; + skb = skb->next; + } + while (skb != (struct sk_buff *)&sk->receive_queue); + + if (copied) + break; + + if (sk->err) + { + copied = -sk->err; + sk->err = 0; + break; + } + + if (sk->state == TCP_CLOSE) + { + if (!sk->done) + { + sk->done = 1; + break; + } + copied = -ENOTCONN; + break; + } + + if (sk->shutdown & RCV_SHUTDOWN) + { + sk->done = 1; + break; + } + + if (nonblock) + { + copied = -EAGAIN; + break; + } + + cleanup_rbuf(sk); + release_sock(sk); + sk->socket->flags |= SO_WAITDATA; +#ifdef _HURD_ + interruptible_sleep_on (sk->sleep); +#else + schedule(); +#endif + sk->socket->flags &= ~SO_WAITDATA; + sk->inuse = 1; + + if (current->signal & ~current->blocked) + { + copied = -ERESTARTSYS; + break; + } + continue; + + found_ok_skb: + /* + * Lock the buffer. We can be fairly relaxed as + * an interrupt will never steal a buffer we are + * using unless I've missed something serious in + * tcp_data. + */ + + skb->users++; + + /* + * Ok so how much can we use ? + */ + + used = skb->len - offset; + if (len < used) + used = len; + /* + * Do we have urgent data here? + */ + + if (sk->urg_data) + { + unsigned long urg_offset = sk->urg_seq - *seq; + if (urg_offset < used) + { + if (!urg_offset) + { + if (!sk->urginline) + { + ++*seq; + offset++; + used--; + } + } + else + used = urg_offset; + } + } + + /* + * Copy it - We _MUST_ update *seq first so that we + * don't ever double read when we have dual readers + */ + + *seq += used; + + /* + * This memcpy_tofs can sleep. If it sleeps and we + * do a second read it relies on the skb->users to avoid + * a crash when cleanup_rbuf() gets called. + */ + + memcpy_tofs(to,((unsigned char *)skb->h.th) + + skb->h.th->doff*4 + offset, used); + copied += used; + len -= used; + to += used; + + /* + * We now will not sleep again until we are finished + * with skb. Sorry if you are doing the SMP port + * but you'll just have to fix it neatly ;) + */ + + skb->users --; + + if (after(sk->copied_seq,sk->urg_seq)) + sk->urg_data = 0; + if (used + offset < skb->len) + continue; + + /* + * Process the FIN. + */ + + if (skb->h.th->fin) + goto found_fin_ok; + if (flags & MSG_PEEK) + continue; + skb->used = 1; + continue; + + found_fin_ok: + ++*seq; + if (flags & MSG_PEEK) + break; + + /* + * All is done + */ + + skb->used = 1; + sk->shutdown |= RCV_SHUTDOWN; + break; + + } +#ifndef _HURD_ + remove_wait_queue(sk->sleep, &wait); + current->state = TASK_RUNNING; +#endif + + /* Clean up data we have read: This will do ACK frames */ + cleanup_rbuf(sk); + release_sock(sk); + return copied; +} + +/* + * State processing on a close. This implements the state shift for + * sending our FIN frame. Note that we only send a FIN for some + * states. A shutdown() may have already sent the FIN, or we may be + * closed. + */ + +static int tcp_close_state(struct sock *sk, int dead) +{ + int ns=TCP_CLOSE; + int send_fin=0; + switch(sk->state) + { + case TCP_SYN_SENT: /* No SYN back, no FIN needed */ + break; + case TCP_SYN_RECV: + case TCP_ESTABLISHED: /* Closedown begin */ + ns=TCP_FIN_WAIT1; + send_fin=1; + break; + case TCP_FIN_WAIT1: /* Already closing, or FIN sent: no change */ + case TCP_FIN_WAIT2: + case TCP_CLOSING: + ns=sk->state; + break; + case TCP_CLOSE: + case TCP_LISTEN: + break; + case TCP_CLOSE_WAIT: /* They have FIN'd us. We send our FIN and + wait only for the ACK */ + ns=TCP_LAST_ACK; + send_fin=1; + } + + tcp_set_state(sk,ns); + + /* + * This is a (useful) BSD violating of the RFC. There is a + * problem with TCP as specified in that the other end could + * keep a socket open forever with no application left this end. + * We use a 3 minute timeout (about the same as BSD) then kill + * our end. If they send after that then tough - BUT: long enough + * that we won't make the old 4*rto = almost no time - whoops + * reset mistake. + */ + if(dead && ns==TCP_FIN_WAIT2) + { + int timer_active=del_timer(&sk->timer); + if(timer_active) + add_timer(&sk->timer); + else + reset_msl_timer(sk, TIME_CLOSE, TCP_FIN_TIMEOUT); + } + + return send_fin; +} + +/* + * Send a fin. + */ + +static void tcp_send_fin(struct sock *sk) +{ + struct proto *prot =(struct proto *)sk->prot; + struct tcphdr *th =(struct tcphdr *)&sk->dummy_th; + struct tcphdr *t1; + struct sk_buff *buff; + struct device *dev=NULL; + int tmp; + + release_sock(sk); /* in case the malloc sleeps. */ + + buff = prot->wmalloc(sk, MAX_RESET_SIZE,1 , GFP_KERNEL); + sk->inuse = 1; + + if (buff == NULL) + { + /* This is a disaster if it occurs */ + printk("tcp_send_fin: Impossible malloc failure"); + return; + } + + /* + * Administrivia + */ + + buff->sk = sk; + buff->len = sizeof(*t1); + buff->localroute = sk->localroute; + t1 =(struct tcphdr *) buff->data; + + /* + * Put in the IP header and routing stuff. + */ + + tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev, + IPPROTO_TCP, sk->opt, + sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl); + if (tmp < 0) + { + int t; + /* + * Finish anyway, treat this as a send that got lost. + * (Not good). + */ + + buff->free = 1; + prot->wfree(sk,buff->mem_addr, buff->mem_len); + sk->write_seq++; + t=del_timer(&sk->timer); + if(t) + add_timer(&sk->timer); + else + reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN); + return; + } + + /* + * We ought to check if the end of the queue is a buffer and + * if so simply add the fin to that buffer, not send it ahead. + */ + + t1 =(struct tcphdr *)((char *)t1 +tmp); + buff->len += tmp; + buff->dev = dev; + memcpy(t1, th, sizeof(*t1)); + t1->seq = ntohl(sk->write_seq); + sk->write_seq++; + buff->h.seq = sk->write_seq; + t1->ack = 1; + t1->ack_seq = ntohl(sk->acked_seq); + t1->window = ntohs(sk->window=tcp_select_window(sk)); + t1->fin = 1; + t1->rst = 0; + t1->doff = sizeof(*t1)/4; + tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk); + + /* + * If there is data in the write queue, the fin must be appended to + * the write queue. + */ + + if (skb_peek(&sk->write_queue) != NULL) + { + buff->free = 0; + if (buff->next != NULL) + { + printk("tcp_send_fin: next != NULL\n"); + skb_unlink(buff); + } + skb_queue_tail(&sk->write_queue, buff); + } + else + { + sk->sent_seq = sk->write_seq; + sk->prot->queue_xmit(sk, dev, buff, 0); + reset_xmit_timer(sk, TIME_WRITE, sk->rto); + } +} + +/* + * Shutdown the sending side of a connection. Much like close except + * that we don't receive shut down or set sk->dead=1. + */ + +void tcp_shutdown(struct sock *sk, int how) +{ + /* + * We need to grab some memory, and put together a FIN, + * and then put it into the queue to be sent. + * Tim MacKenzie(tym@dibbler.cs.monash.edu.au) 4 Dec '92. + */ + + if (!(how & SEND_SHUTDOWN)) + return; + + /* + * If we've already sent a FIN, or it's a closed state + */ + + if (sk->state == TCP_FIN_WAIT1 || + sk->state == TCP_FIN_WAIT2 || + sk->state == TCP_CLOSING || + sk->state == TCP_LAST_ACK || + sk->state == TCP_TIME_WAIT || + sk->state == TCP_CLOSE || + sk->state == TCP_LISTEN + ) + { + return; + } + sk->inuse = 1; + + /* + * flag that the sender has shutdown + */ + + sk->shutdown |= SEND_SHUTDOWN; + + /* + * Clear out any half completed packets. + */ + + if (sk->partial) + tcp_send_partial(sk); + + /* + * FIN if needed + */ + + if(tcp_close_state(sk,0)) + tcp_send_fin(sk); + + release_sock(sk); +} + + +static int +tcp_recvfrom(struct sock *sk, unsigned char *to, + int to_len, int nonblock, unsigned flags, + struct sockaddr_in *addr, int *addr_len) +{ + int result; + + /* + * Have to check these first unlike the old code. If + * we check them after we lose data on an error + * which is wrong + */ + + if(addr_len) + *addr_len = sizeof(*addr); + result=tcp_read(sk, to, to_len, nonblock, flags); + + if (result < 0) + return(result); + + if(addr) + { + addr->sin_family = AF_INET; + addr->sin_port = sk->dummy_th.dest; + addr->sin_addr.s_addr = sk->daddr; + } + return(result); +} + + +/* + * This routine will send an RST to the other tcp. + */ + +static void tcp_reset(unsigned long saddr, unsigned long daddr, struct tcphdr *th, + struct proto *prot, struct options *opt, struct device *dev, int tos, int ttl) +{ + struct sk_buff *buff; + struct tcphdr *t1; + int tmp; + struct device *ndev=NULL; + + /* + * Cannot reset a reset (Think about it). + */ + + if(th->rst) + return; + + /* + * We need to grab some memory, and put together an RST, + * and then put it into the queue to be sent. + */ + + buff = prot->wmalloc(NULL, MAX_RESET_SIZE, 1, GFP_ATOMIC); + if (buff == NULL) + return; + + buff->len = sizeof(*t1); + buff->sk = NULL; + buff->dev = dev; + buff->localroute = 0; + + t1 =(struct tcphdr *) buff->data; + + /* + * Put in the IP header and routing stuff. + */ + + tmp = prot->build_header(buff, saddr, daddr, &ndev, IPPROTO_TCP, opt, + sizeof(struct tcphdr),tos,ttl); + if (tmp < 0) + { + buff->free = 1; + prot->wfree(NULL, buff->mem_addr, buff->mem_len); + return; + } + + t1 =(struct tcphdr *)((char *)t1 +tmp); + buff->len += tmp; + memcpy(t1, th, sizeof(*t1)); + + /* + * Swap the send and the receive. + */ + + t1->dest = th->source; + t1->source = th->dest; + t1->rst = 1; + t1->window = 0; + + if(th->ack) + { + t1->ack = 0; + t1->seq = th->ack_seq; + t1->ack_seq = 0; + } + else + { + t1->ack = 1; + if(!th->syn) + t1->ack_seq=htonl(th->seq); + else + t1->ack_seq=htonl(th->seq+1); + t1->seq=0; + } + + t1->syn = 0; + t1->urg = 0; + t1->fin = 0; + t1->psh = 0; + t1->doff = sizeof(*t1)/4; + tcp_send_check(t1, saddr, daddr, sizeof(*t1), NULL); + prot->queue_xmit(NULL, ndev, buff, 1); + tcp_statistics.TcpOutSegs++; +} + + +/* + * Look for tcp options. Parses everything but only knows about MSS. + * This routine is always called with the packet containing the SYN. + * However it may also be called with the ack to the SYN. So you + * can't assume this is always the SYN. It's always called after + * we have set up sk->mtu to our own MTU. + * + * We need at minimum to add PAWS support here. Possibly large windows + * as Linux gets deployed on 100Mb/sec networks. + */ + +static void tcp_options(struct sock *sk, struct tcphdr *th) +{ + unsigned char *ptr; + int length=(th->doff*4)-sizeof(struct tcphdr); + int mss_seen = 0; + + ptr = (unsigned char *)(th + 1); + + while(length>0) + { + int opcode=*ptr++; + int opsize=*ptr++; + switch(opcode) + { + case TCPOPT_EOL: + return; + case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */ + length--; + ptr--; /* the opsize=*ptr++ above was a mistake */ + continue; + + default: + if(opsize<=2) /* Avoid silly options looping forever */ + return; + switch(opcode) + { + case TCPOPT_MSS: + if(opsize==4 && th->syn) + { + sk->mtu=min(sk->mtu,ntohs(*(unsigned short *)ptr)); + mss_seen = 1; + } + break; + /* Add other options here as people feel the urge to implement stuff like large windows */ + } + ptr+=opsize-2; + length-=opsize; + } + } + if (th->syn) + { + if (! mss_seen) + sk->mtu=min(sk->mtu, 536); /* default MSS if none sent */ + } +#ifdef CONFIG_INET_PCTCP + sk->mss = min(sk->max_window >> 1, sk->mtu); +#else + sk->mss = min(sk->max_window, sk->mtu); +#endif +} + +static inline unsigned long default_mask(unsigned long dst) +{ + dst = ntohl(dst); + if (IN_CLASSA(dst)) + return htonl(IN_CLASSA_NET); + if (IN_CLASSB(dst)) + return htonl(IN_CLASSB_NET); + return htonl(IN_CLASSC_NET); +} + +/* + * Default sequence number picking algorithm. + * As close as possible to RFC 793, which + * suggests using a 250kHz clock. + * Further reading shows this assumes 2MB/s networks. + * For 10MB/s ethernet, a 1MHz clock is appropriate. + * That's funny, Linux has one built in! Use it! + */ + +extern inline unsigned long tcp_init_seq(void) +{ + struct timeval tv; + do_gettimeofday(&tv); + return tv.tv_usec+tv.tv_sec*1000000; +} + +/* + * This routine handles a connection request. + * It should make sure we haven't already responded. + * Because of the way BSD works, we have to send a syn/ack now. + * This also means it will be harder to close a socket which is + * listening. + */ + +static void tcp_conn_request(struct sock *sk, struct sk_buff *skb, + unsigned long daddr, unsigned long saddr, + struct options *opt, struct device *dev, unsigned long seq) +{ + struct sk_buff *buff; + struct tcphdr *t1; + unsigned char *ptr; + struct sock *newsk; + struct tcphdr *th; + struct device *ndev=NULL; + int tmp; + struct rtable *rt; + + th = skb->h.th; + + /* If the socket is dead, don't accept the connection. */ + if (!sk->dead) + { + sk->data_ready(sk,0); + } + else + { + if(sk->debug) + printk("Reset on %p: Connect on dead socket.\n",sk); + tcp_reset(daddr, saddr, th, sk->prot, opt, dev, sk->ip_tos,sk->ip_ttl); + tcp_statistics.TcpAttemptFails++; + kfree_skb(skb, FREE_READ); + return; + } + + /* + * Make sure we can accept more. This will prevent a + * flurry of syns from eating up all our memory. + */ + + if (sk->ack_backlog >= sk->max_ack_backlog) + { + tcp_statistics.TcpAttemptFails++; + kfree_skb(skb, FREE_READ); + return; + } + + /* + * We need to build a new sock struct. + * It is sort of bad to have a socket without an inode attached + * to it, but the wake_up's will just wake up the listening socket, + * and if the listening socket is destroyed before this is taken + * off of the queue, this will take care of it. + */ + + newsk = (struct sock *) kmalloc(sizeof(struct sock), GFP_ATOMIC); + if (newsk == NULL) + { + /* just ignore the syn. It will get retransmitted. */ + tcp_statistics.TcpAttemptFails++; + kfree_skb(skb, FREE_READ); + return; + } + + memcpy(newsk, sk, sizeof(*newsk)); + skb_queue_head_init(&newsk->write_queue); + skb_queue_head_init(&newsk->receive_queue); + newsk->send_head = NULL; + newsk->send_tail = NULL; + skb_queue_head_init(&newsk->back_log); + newsk->rtt = 0; /*TCP_CONNECT_TIME<<3*/ + newsk->rto = TCP_TIMEOUT_INIT; + newsk->mdev = 0; + newsk->max_window = 0; + newsk->cong_window = 1; + newsk->cong_count = 0; + newsk->ssthresh = 0; + newsk->backoff = 0; + newsk->blog = 0; + newsk->intr = 0; + newsk->proc = 0; + newsk->done = 0; + newsk->partial = NULL; + newsk->pair = NULL; + newsk->wmem_alloc = 0; + newsk->rmem_alloc = 0; + newsk->localroute = sk->localroute; + + newsk->max_unacked = MAX_WINDOW - TCP_WINDOW_DIFF; + + newsk->err = 0; + newsk->shutdown = 0; + newsk->ack_backlog = 0; + newsk->acked_seq = skb->h.th->seq+1; + newsk->copied_seq = skb->h.th->seq+1; + newsk->fin_seq = skb->h.th->seq; + newsk->state = TCP_SYN_RECV; + newsk->timeout = 0; + newsk->ip_xmit_timeout = 0; + newsk->write_seq = seq; + newsk->window_seq = newsk->write_seq; + newsk->rcv_ack_seq = newsk->write_seq; + newsk->urg_data = 0; + newsk->retransmits = 0; + newsk->linger=0; + newsk->destroy = 0; + init_timer(&newsk->timer); + newsk->timer.data = (unsigned long)newsk; + newsk->timer.function = &net_timer; + init_timer(&newsk->retransmit_timer); + newsk->retransmit_timer.data = (unsigned long)newsk; + newsk->retransmit_timer.function=&retransmit_timer; + newsk->dummy_th.source = skb->h.th->dest; + newsk->dummy_th.dest = skb->h.th->source; + + /* + * Swap these two, they are from our point of view. + */ + + newsk->daddr = saddr; + newsk->saddr = daddr; + + put_sock(newsk->num,newsk); + newsk->dummy_th.res1 = 0; + newsk->dummy_th.doff = 6; + newsk->dummy_th.fin = 0; + newsk->dummy_th.syn = 0; + newsk->dummy_th.rst = 0; + newsk->dummy_th.psh = 0; + newsk->dummy_th.ack = 0; + newsk->dummy_th.urg = 0; + newsk->dummy_th.res2 = 0; + newsk->acked_seq = skb->h.th->seq + 1; + newsk->copied_seq = skb->h.th->seq + 1; + newsk->socket = NULL; + + /* + * Grab the ttl and tos values and use them + */ + + newsk->ip_ttl=sk->ip_ttl; + newsk->ip_tos=skb->ip_hdr->tos; + + /* + * Use 512 or whatever user asked for + */ + + /* + * Note use of sk->user_mss, since user has no direct access to newsk + */ + + rt=ip_rt_route(saddr, NULL,NULL); + + if(rt!=NULL && (rt->rt_flags&RTF_WINDOW)) + newsk->window_clamp = rt->rt_window; + else + newsk->window_clamp = 0; + + if (sk->user_mss) + newsk->mtu = sk->user_mss; + else if(rt!=NULL && (rt->rt_flags&RTF_MSS)) + newsk->mtu = rt->rt_mss - HEADER_SIZE; + else + { +#ifdef CONFIG_INET_SNARL /* Sub Nets Are Local */ + if ((saddr ^ daddr) & default_mask(saddr)) +#else + if ((saddr ^ daddr) & dev->pa_mask) +#endif + newsk->mtu = 576 - HEADER_SIZE; + else + newsk->mtu = MAX_WINDOW; + } + + /* + * But not bigger than device MTU + */ + + newsk->mtu = min(newsk->mtu, dev->mtu - HEADER_SIZE); + + /* + * This will min with what arrived in the packet + */ + + tcp_options(newsk,skb->h.th); + + buff = newsk->prot->wmalloc(newsk, MAX_SYN_SIZE, 1, GFP_ATOMIC); + if (buff == NULL) + { + sk->err = ENOMEM; + newsk->dead = 1; + newsk->state = TCP_CLOSE; + /* And this will destroy it */ + release_sock(newsk); + kfree_skb(skb, FREE_READ); + tcp_statistics.TcpAttemptFails++; + return; + } + + buff->len = sizeof(struct tcphdr)+4; + buff->sk = newsk; + buff->localroute = newsk->localroute; + + t1 =(struct tcphdr *) buff->data; + + /* + * Put in the IP header and routing stuff. + */ + + tmp = sk->prot->build_header(buff, newsk->saddr, newsk->daddr, &ndev, + IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl); + + /* + * Something went wrong. + */ + + if (tmp < 0) + { + sk->err = -tmp; + buff->free = 1; + kfree_skb(buff,FREE_WRITE); + newsk->dead = 1; + newsk->state = TCP_CLOSE; + release_sock(newsk); + skb->sk = sk; + kfree_skb(skb, FREE_READ); + tcp_statistics.TcpAttemptFails++; + return; + } + + buff->len += tmp; + t1 =(struct tcphdr *)((char *)t1 +tmp); + + memcpy(t1, skb->h.th, sizeof(*t1)); + buff->h.seq = newsk->write_seq; + /* + * Swap the send and the receive. + */ + t1->dest = skb->h.th->source; + t1->source = newsk->dummy_th.source; + t1->seq = ntohl(newsk->write_seq++); + t1->ack = 1; + newsk->window = tcp_select_window(newsk); + newsk->sent_seq = newsk->write_seq; + t1->window = ntohs(newsk->window); + t1->res1 = 0; + t1->res2 = 0; + t1->rst = 0; + t1->urg = 0; + t1->psh = 0; + t1->syn = 1; + t1->ack_seq = ntohl(skb->h.th->seq+1); + t1->doff = sizeof(*t1)/4+1; + ptr =(unsigned char *)(t1+1); + ptr[0] = 2; + ptr[1] = 4; + ptr[2] = ((newsk->mtu) >> 8) & 0xff; + ptr[3] =(newsk->mtu) & 0xff; + + tcp_send_check(t1, daddr, saddr, sizeof(*t1)+4, newsk); + newsk->prot->queue_xmit(newsk, ndev, buff, 0); + reset_xmit_timer(newsk, TIME_WRITE , TCP_TIMEOUT_INIT); + skb->sk = newsk; + + /* + * Charge the sock_buff to newsk. + */ + + sk->rmem_alloc -= skb->mem_len; + newsk->rmem_alloc += skb->mem_len; + + skb_queue_tail(&sk->receive_queue,skb); + sk->ack_backlog++; + release_sock(newsk); + tcp_statistics.TcpOutSegs++; +} + + +static void tcp_close(struct sock *sk, int timeout) +{ + /* + * We need to grab some memory, and put together a FIN, + * and then put it into the queue to be sent. + */ + + sk->inuse = 1; + + if(sk->state == TCP_LISTEN) + { + /* Special case */ + tcp_set_state(sk, TCP_CLOSE); + tcp_close_pending(sk); + release_sock(sk); + return; + } + + sk->keepopen = 1; + sk->shutdown = SHUTDOWN_MASK; + + if (!sk->dead) + sk->state_change(sk); + + if (timeout == 0) + { + struct sk_buff *skb; + + /* + * We need to flush the recv. buffs. We do this only on the + * descriptor close, not protocol-sourced closes, because the + * reader process may not have drained the data yet! + */ + + while((skb=skb_dequeue(&sk->receive_queue))!=NULL) + kfree_skb(skb, FREE_READ); + /* + * Get rid off any half-completed packets. + */ + + if (sk->partial) + tcp_send_partial(sk); + } + + + /* + * Timeout is not the same thing - however the code likes + * to send both the same way (sigh). + */ + + if(timeout) + { + tcp_set_state(sk, TCP_CLOSE); /* Dead */ + } + else + { + if(tcp_close_state(sk,1)==1) + { + tcp_send_fin(sk); + } + } + release_sock(sk); +} + + +/* + * This routine takes stuff off of the write queue, + * and puts it in the xmit queue. This happens as incoming acks + * open up the remote window for us. + */ + +static void tcp_write_xmit(struct sock *sk) +{ + struct sk_buff *skb; + + /* + * The bytes will have to remain here. In time closedown will + * empty the write queue and all will be happy + */ + + if(sk->zapped) + return; + + /* + * Anything on the transmit queue that fits the window can + * be added providing we are not + * + * a) retransmitting (Nagle's rule) + * b) exceeding our congestion window. + */ + + while((skb = skb_peek(&sk->write_queue)) != NULL && + before(skb->h.seq, sk->window_seq + 1) && + (sk->retransmits == 0 || + sk->ip_xmit_timeout != TIME_WRITE || + before(skb->h.seq, sk->rcv_ack_seq + 1)) + && sk->packets_out < sk->cong_window) + { + IS_SKB(skb); + skb_unlink(skb); + + /* + * See if we really need to send the packet. + */ + + if (before(skb->h.seq, sk->rcv_ack_seq +1)) + { + /* + * This is acked data. We can discard it. This + * cannot currently occur. + */ + + sk->retransmits = 0; + kfree_skb(skb, FREE_WRITE); + if (!sk->dead) + sk->write_space(sk); + } + else + { + struct tcphdr *th; + struct iphdr *iph; + int size; +/* + * put in the ack seq and window at this point rather than earlier, + * in order to keep them monotonic. We really want to avoid taking + * back window allocations. That's legal, but RFC1122 says it's frowned on. + * Ack and window will in general have changed since this packet was put + * on the write queue. + */ + iph = (struct iphdr *)(skb->data + + skb->dev->hard_header_len); + th = (struct tcphdr *)(((char *)iph) +(iph->ihl << 2)); + size = skb->len - (((unsigned char *) th) - skb->data); + + th->ack_seq = ntohl(sk->acked_seq); + th->window = ntohs(tcp_select_window(sk)); + + tcp_send_check(th, sk->saddr, sk->daddr, size, sk); + + sk->sent_seq = skb->h.seq; + + /* + * IP manages our queue for some crazy reason + */ + + sk->prot->queue_xmit(sk, skb->dev, skb, skb->free); + + /* + * Again we slide the timer wrongly + */ + + reset_xmit_timer(sk, TIME_WRITE, sk->rto); + } + } +} + + +/* + * This routine deals with incoming acks, but not outgoing ones. + */ + +extern __inline__ int tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int len) +{ + unsigned long ack; + int flag = 0; + + /* + * 1 - there was data in packet as well as ack or new data is sent or + * in shutdown state + * 2 - data from retransmit queue was acked and removed + * 4 - window shrunk or data from retransmit queue was acked and removed + */ + + if(sk->zapped) + return(1); /* Dead, cant ack any more so why bother */ + + /* + * Have we discovered a larger window + */ + + ack = ntohl(th->ack_seq); + + if (ntohs(th->window) > sk->max_window) + { + sk->max_window = ntohs(th->window); +#ifdef CONFIG_INET_PCTCP + /* Hack because we don't send partial packets to non SWS + handling hosts */ + sk->mss = min(sk->max_window>>1, sk->mtu); +#else + sk->mss = min(sk->max_window, sk->mtu); +#endif + } + + /* + * We have dropped back to keepalive timeouts. Thus we have + * no retransmits pending. + */ + + if (sk->retransmits && sk->ip_xmit_timeout == TIME_KEEPOPEN) + sk->retransmits = 0; + + /* + * If the ack is newer than sent or older than previous acks + * then we can probably ignore it. + */ + + if (after(ack, sk->sent_seq) || before(ack, sk->rcv_ack_seq)) + { + if(sk->debug) + printk("Ack ignored %lu %lu\n",ack,sk->sent_seq); + + /* + * Keepalive processing. + */ + + if (after(ack, sk->sent_seq)) + { + return(0); + } + + /* + * Restart the keepalive timer. + */ + + if (sk->keepopen) + { + if(sk->ip_xmit_timeout==TIME_KEEPOPEN) + reset_xmit_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN); + } + return(1); + } + + /* + * If there is data set flag 1 + */ + + if (len != th->doff*4) + flag |= 1; + + /* + * See if our window has been shrunk. + */ + + if (after(sk->window_seq, ack+ntohs(th->window))) + { + /* + * We may need to move packets from the send queue + * to the write queue, if the window has been shrunk on us. + * The RFC says you are not allowed to shrink your window + * like this, but if the other end does, you must be able + * to deal with it. + */ + struct sk_buff *skb; + struct sk_buff *skb2; + struct sk_buff *wskb = NULL; + + skb2 = sk->send_head; + sk->send_head = NULL; + sk->send_tail = NULL; + + /* + * This is an artifact of a flawed concept. We want one + * queue and a smarter send routine when we send all. + */ + + flag |= 4; /* Window changed */ + + sk->window_seq = ack + ntohs(th->window); + cli(); + while (skb2 != NULL) + { + skb = skb2; + skb2 = skb->link3; + skb->link3 = NULL; + if (after(skb->h.seq, sk->window_seq)) + { + if (sk->packets_out > 0) + sk->packets_out--; + /* We may need to remove this from the dev send list. */ + if (skb->next != NULL) + { + skb_unlink(skb); + } + /* Now add it to the write_queue. */ + if (wskb == NULL) + skb_queue_head(&sk->write_queue,skb); + else + skb_append(wskb,skb); + wskb = skb; + } + else + { + if (sk->send_head == NULL) + { + sk->send_head = skb; + sk->send_tail = skb; + } + else + { + sk->send_tail->link3 = skb; + sk->send_tail = skb; + } + skb->link3 = NULL; + } + } + sti(); + } + + /* + * Pipe has emptied + */ + + if (sk->send_tail == NULL || sk->send_head == NULL) + { + sk->send_head = NULL; + sk->send_tail = NULL; + sk->packets_out= 0; + } + + /* + * Update the right hand window edge of the host + */ + + sk->window_seq = ack + ntohs(th->window); + + /* + * We don't want too many packets out there. + */ + + if (sk->ip_xmit_timeout == TIME_WRITE && + sk->cong_window < 2048 && after(ack, sk->rcv_ack_seq)) + { + /* + * This is Jacobson's slow start and congestion avoidance. + * SIGCOMM '88, p. 328. Because we keep cong_window in integral + * mss's, we can't do cwnd += 1 / cwnd. Instead, maintain a + * counter and increment it once every cwnd times. It's possible + * that this should be done only if sk->retransmits == 0. I'm + * interpreting "new data is acked" as including data that has + * been retransmitted but is just now being acked. + */ + if (sk->cong_window < sk->ssthresh) + /* + * In "safe" area, increase + */ + sk->cong_window++; + else + { + /* + * In dangerous area, increase slowly. In theory this is + * sk->cong_window += 1 / sk->cong_window + */ + if (sk->cong_count >= sk->cong_window) + { + sk->cong_window++; + sk->cong_count = 0; + } + else + sk->cong_count++; + } + } + + /* + * Remember the highest ack received. + */ + + sk->rcv_ack_seq = ack; + + /* + * If this ack opens up a zero window, clear backoff. It was + * being used to time the probes, and is probably far higher than + * it needs to be for normal retransmission. + */ + + if (sk->ip_xmit_timeout == TIME_PROBE0) + { + sk->retransmits = 0; /* Our probe was answered */ + + /* + * Was it a usable window open ? + */ + + if (skb_peek(&sk->write_queue) != NULL && /* should always be non-null */ + ! before (sk->window_seq, sk->write_queue.next->h.seq)) + { + sk->backoff = 0; + + /* + * Recompute rto from rtt. this eliminates any backoff. + */ + + sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1; + if (sk->rto > 120*HZ) + sk->rto = 120*HZ; + if (sk->rto < 20) /* Was 1*HZ, then 1 - turns out we must allow about + .2 of a second because of BSD delayed acks - on a 100Mb/sec link + .2 of a second is going to need huge windows (SIGH) */ + sk->rto = 20; + } + } + + /* + * See if we can take anything off of the retransmit queue. + */ + + while(sk->send_head != NULL) + { + /* Check for a bug. */ + if (sk->send_head->link3 && + after(sk->send_head->h.seq, sk->send_head->link3->h.seq)) + printk("INET: tcp.c: *** bug send_list out of order.\n"); + + /* + * If our packet is before the ack sequence we can + * discard it as it's confirmed to have arrived the other end. + */ + + if (before(sk->send_head->h.seq, ack+1)) + { + struct sk_buff *oskb; + if (sk->retransmits) + { + /* + * We were retransmitting. don't count this in RTT est + */ + flag |= 2; + + /* + * even though we've gotten an ack, we're still + * retransmitting as long as we're sending from + * the retransmit queue. Keeping retransmits non-zero + * prevents us from getting new data interspersed with + * retransmissions. + */ + + if (sk->send_head->link3) /* Any more queued retransmits? */ + sk->retransmits = 1; + else + sk->retransmits = 0; + } + /* + * Note that we only reset backoff and rto in the + * rtt recomputation code. And that doesn't happen + * if there were retransmissions in effect. So the + * first new packet after the retransmissions is + * sent with the backoff still in effect. Not until + * we get an ack from a non-retransmitted packet do + * we reset the backoff and rto. This allows us to deal + * with a situation where the network delay has increased + * suddenly. I.e. Karn's algorithm. (SIGCOMM '87, p5.) + */ + + /* + * We have one less packet out there. + */ + + if (sk->packets_out > 0) + sk->packets_out --; + /* + * Wake up the process, it can probably write more. + */ + if (!sk->dead) + sk->write_space(sk); + oskb = sk->send_head; + + if (!(flag&2)) /* Not retransmitting */ + { + long m; + + /* + * The following amusing code comes from Jacobson's + * article in SIGCOMM '88. Note that rtt and mdev + * are scaled versions of rtt and mean deviation. + * This is designed to be as fast as possible + * m stands for "measurement". + */ + + m = jiffies - oskb->when; /* RTT */ + if(m<=0) + m=1; /* IS THIS RIGHT FOR <0 ??? */ + m -= (sk->rtt >> 3); /* m is now error in rtt est */ + sk->rtt += m; /* rtt = 7/8 rtt + 1/8 new */ + if (m < 0) + m = -m; /* m is now abs(error) */ + m -= (sk->mdev >> 2); /* similar update on mdev */ + sk->mdev += m; /* mdev = 3/4 mdev + 1/4 new */ + + /* + * Now update timeout. Note that this removes any backoff. + */ + + sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1; + if (sk->rto > 120*HZ) + sk->rto = 120*HZ; + if (sk->rto < 20) /* Was 1*HZ - keep .2 as minimum cos of the BSD delayed acks */ + sk->rto = 20; + sk->backoff = 0; + } + flag |= (2|4); /* 2 is really more like 'don't adjust the rtt + In this case as we just set it up */ + cli(); + oskb = sk->send_head; + IS_SKB(oskb); + sk->send_head = oskb->link3; + if (sk->send_head == NULL) + { + sk->send_tail = NULL; + } + + /* + * We may need to remove this from the dev send list. + */ + + if (oskb->next) + skb_unlink(oskb); + sti(); + kfree_skb(oskb, FREE_WRITE); /* write. */ + if (!sk->dead) + sk->write_space(sk); + } + else + { + break; + } + } + + /* + * XXX someone ought to look at this too.. at the moment, if skb_peek() + * returns non-NULL, we complete ignore the timer stuff in the else + * clause. We ought to organize the code so that else clause can + * (should) be executed regardless, possibly moving the PROBE timer + * reset over. The skb_peek() thing should only move stuff to the + * write queue, NOT also manage the timer functions. + */ + + /* + * Maybe we can take some stuff off of the write queue, + * and put it onto the xmit queue. + */ + if (skb_peek(&sk->write_queue) != NULL) + { + if (after (sk->window_seq+1, sk->write_queue.next->h.seq) && + (sk->retransmits == 0 || + sk->ip_xmit_timeout != TIME_WRITE || + before(sk->write_queue.next->h.seq, sk->rcv_ack_seq + 1)) + && sk->packets_out < sk->cong_window) + { + /* + * Add more data to the send queue. + */ + flag |= 1; + tcp_write_xmit(sk); + } + else if (before(sk->window_seq, sk->write_queue.next->h.seq) && + sk->send_head == NULL && + sk->ack_backlog == 0 && + sk->state != TCP_TIME_WAIT) + { + /* + * Data to queue but no room. + */ + reset_xmit_timer(sk, TIME_PROBE0, sk->rto); + } + } + else + { + /* + * from TIME_WAIT we stay in TIME_WAIT as long as we rx packets + * from TCP_CLOSE we don't do anything + * + * from anything else, if there is write data (or fin) pending, + * we use a TIME_WRITE timeout, else if keepalive we reset to + * a KEEPALIVE timeout, else we delete the timer. + * + * We do not set flag for nominal write data, otherwise we may + * force a state where we start to write itsy bitsy tidbits + * of data. + */ + + switch(sk->state) { + case TCP_TIME_WAIT: + /* + * keep us in TIME_WAIT until we stop getting packets, + * reset the timeout. + */ + reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN); + break; + case TCP_CLOSE: + /* + * don't touch the timer. + */ + break; + default: + /* + * Must check send_head, write_queue, and ack_backlog + * to determine which timeout to use. + */ + if (sk->send_head || skb_peek(&sk->write_queue) != NULL || sk->ack_backlog) { + reset_xmit_timer(sk, TIME_WRITE, sk->rto); + } else if (sk->keepopen) { + reset_xmit_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN); + } else { + del_timer(&sk->retransmit_timer); + sk->ip_xmit_timeout = 0; + } + break; + } + } + + /* + * We have nothing queued but space to send. Send any partial + * packets immediately (end of Nagle rule application). + */ + + if (sk->packets_out == 0 && sk->partial != NULL && + skb_peek(&sk->write_queue) == NULL && sk->send_head == NULL) + { + flag |= 1; + tcp_send_partial(sk); + } + + /* + * In the LAST_ACK case, the other end FIN'd us. We then FIN'd them, and + * we are now waiting for an acknowledge to our FIN. The other end is + * already in TIME_WAIT. + * + * Move to TCP_CLOSE on success. + */ + + if (sk->state == TCP_LAST_ACK) + { + if (!sk->dead) + sk->state_change(sk); + if(sk->debug) + printk("rcv_ack_seq: %lX==%lX, acked_seq: %lX==%lX\n", + sk->rcv_ack_seq,sk->write_seq,sk->acked_seq,sk->fin_seq); + if (sk->rcv_ack_seq == sk->write_seq /*&& sk->acked_seq == sk->fin_seq*/) + { + flag |= 1; + tcp_set_state(sk,TCP_CLOSE); + sk->shutdown = SHUTDOWN_MASK; + } + } + + /* + * Incoming ACK to a FIN we sent in the case of our initiating the close. + * + * Move to FIN_WAIT2 to await a FIN from the other end. Set + * SEND_SHUTDOWN but not RCV_SHUTDOWN as data can still be coming in. + */ + + if (sk->state == TCP_FIN_WAIT1) + { + + if (!sk->dead) + sk->state_change(sk); + if (sk->rcv_ack_seq == sk->write_seq) + { + flag |= 1; + sk->shutdown |= SEND_SHUTDOWN; + tcp_set_state(sk, TCP_FIN_WAIT2); + } + } + + /* + * Incoming ACK to a FIN we sent in the case of a simultaneous close. + * + * Move to TIME_WAIT + */ + + if (sk->state == TCP_CLOSING) + { + + if (!sk->dead) + sk->state_change(sk); + if (sk->rcv_ack_seq == sk->write_seq) + { + flag |= 1; + tcp_time_wait(sk); + } + } + + /* + * Final ack of a three way shake + */ + + if(sk->state==TCP_SYN_RECV) + { + tcp_set_state(sk, TCP_ESTABLISHED); + tcp_options(sk,th); + sk->dummy_th.dest=th->source; + sk->copied_seq = sk->acked_seq; + if(!sk->dead) + sk->state_change(sk); + if(sk->max_window==0) + { + sk->max_window=32; /* Sanity check */ + sk->mss=min(sk->max_window,sk->mtu); + } + } + + /* + * I make no guarantees about the first clause in the following + * test, i.e. "(!flag) || (flag&4)". I'm not entirely sure under + * what conditions "!flag" would be true. However I think the rest + * of the conditions would prevent that from causing any + * unnecessary retransmission. + * Clearly if the first packet has expired it should be + * retransmitted. The other alternative, "flag&2 && retransmits", is + * harder to explain: You have to look carefully at how and when the + * timer is set and with what timeout. The most recent transmission always + * sets the timer. So in general if the most recent thing has timed + * out, everything before it has as well. So we want to go ahead and + * retransmit some more. If we didn't explicitly test for this + * condition with "flag&2 && retransmits", chances are "when + rto < jiffies" + * would not be true. If you look at the pattern of timing, you can + * show that rto is increased fast enough that the next packet would + * almost never be retransmitted immediately. Then you'd end up + * waiting for a timeout to send each packet on the retransmission + * queue. With my implementation of the Karn sampling algorithm, + * the timeout would double each time. The net result is that it would + * take a hideous amount of time to recover from a single dropped packet. + * It's possible that there should also be a test for TIME_WRITE, but + * I think as long as "send_head != NULL" and "retransmit" is on, we've + * got to be in real retransmission mode. + * Note that tcp_do_retransmit is called with all==1. Setting cong_window + * back to 1 at the timeout will cause us to send 1, then 2, etc. packets. + * As long as no further losses occur, this seems reasonable. + */ + + if (((!flag) || (flag&4)) && sk->send_head != NULL && + (((flag&2) && sk->retransmits) || + (sk->send_head->when + sk->rto < jiffies))) + { + if(sk->send_head->when + sk->rto < jiffies) + tcp_retransmit(sk,0); + else + { + tcp_do_retransmit(sk, 1); + reset_xmit_timer(sk, TIME_WRITE, sk->rto); + } + } + + return(1); +} + + +/* + * Process the FIN bit. This now behaves as it is supposed to work + * and the FIN takes effect when it is validly part of sequence + * space. Not before when we get holes. + * + * If we are ESTABLISHED, a received fin moves us to CLOSE-WAIT + * (and thence onto LAST-ACK and finally, CLOSE, we never enter + * TIME-WAIT) + * + * If we are in FINWAIT-1, a received FIN indicates simultaneous + * close and we go into CLOSING (and later onto TIME-WAIT) + * + * If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT. + * + */ + +static int tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th) +{ + sk->fin_seq = th->seq + skb->len + th->syn + th->fin; + + if (!sk->dead) + { + sk->state_change(sk); + sock_wake_async(sk->socket, 1); + } + + switch(sk->state) + { + case TCP_SYN_RECV: + case TCP_SYN_SENT: + case TCP_ESTABLISHED: + /* + * move to CLOSE_WAIT, tcp_data() already handled + * sending the ack. + */ + tcp_set_state(sk,TCP_CLOSE_WAIT); + if (th->rst) + sk->shutdown = SHUTDOWN_MASK; + break; + + case TCP_CLOSE_WAIT: + case TCP_CLOSING: + /* + * received a retransmission of the FIN, do + * nothing. + */ + break; + case TCP_TIME_WAIT: + /* + * received a retransmission of the FIN, + * restart the TIME_WAIT timer. + */ + reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN); + return(0); + case TCP_FIN_WAIT1: + /* + * This case occurs when a simultaneous close + * happens, we must ack the received FIN and + * enter the CLOSING state. + * + * This causes a WRITE timeout, which will either + * move on to TIME_WAIT when we timeout, or resend + * the FIN properly (maybe we get rid of that annoying + * FIN lost hang). The TIME_WRITE code is already correct + * for handling this timeout. + */ + + if(sk->ip_xmit_timeout != TIME_WRITE) + reset_xmit_timer(sk, TIME_WRITE, sk->rto); + tcp_set_state(sk,TCP_CLOSING); + break; + case TCP_FIN_WAIT2: + /* + * received a FIN -- send ACK and enter TIME_WAIT + */ + reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN); + sk->shutdown|=SHUTDOWN_MASK; + tcp_set_state(sk,TCP_TIME_WAIT); + break; + case TCP_CLOSE: + /* + * already in CLOSE + */ + break; + default: + tcp_set_state(sk,TCP_LAST_ACK); + + /* Start the timers. */ + reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN); + return(0); + } + + return(0); +} + + + +/* + * This routine handles the data. If there is room in the buffer, + * it will be have already been moved into it. If there is no + * room, then we will just have to discard the packet. + */ + +extern __inline__ int tcp_data(struct sk_buff *skb, struct sock *sk, + unsigned long saddr, unsigned short len) +{ + struct sk_buff *skb1, *skb2; + struct tcphdr *th; + int dup_dumped=0; + unsigned long new_seq; + unsigned long shut_seq; + + th = skb->h.th; + skb->len = len -(th->doff*4); + + /* + * The bytes in the receive read/assembly queue has increased. Needed for the + * low memory discard algorithm + */ + + sk->bytes_rcv += skb->len; + + if (skb->len == 0 && !th->fin) + { + /* + * Don't want to keep passing ack's back and forth. + * (someone sent us dataless, boring frame) + */ + if (!th->ack) + tcp_send_ack(sk->sent_seq, sk->acked_seq,sk, th, saddr); + kfree_skb(skb, FREE_READ); + return(0); + } + + /* + * We no longer have anyone receiving data on this connection. + */ + +#ifndef TCP_DONT_RST_SHUTDOWN + + if(sk->shutdown & RCV_SHUTDOWN) + { + /* + * FIXME: BSD has some magic to avoid sending resets to + * broken 4.2 BSD keepalives. Much to my surprise a few non + * BSD stacks still have broken keepalives so we want to + * cope with it. + */ + + if(skb->len) /* We don't care if it's just an ack or + a keepalive/window probe */ + { + new_seq= th->seq + skb->len + th->syn; /* Right edge of _data_ part of frame */ + + /* Do this the way 4.4BSD treats it. Not what I'd + regard as the meaning of the spec but it's what BSD + does and clearly they know everything 8) */ + + /* + * This is valid because of two things + * + * a) The way tcp_data behaves at the bottom. + * b) A fin takes effect when read not when received. + */ + + shut_seq=sk->acked_seq+1; /* Last byte */ + + if(after(new_seq,shut_seq)) + { + if(sk->debug) + printk("Data arrived on %p after close [Data right edge %lX, Socket shut on %lX] %d\n", + sk, new_seq, shut_seq, sk->blog); + if(sk->dead) + { + sk->acked_seq = new_seq + th->fin; + tcp_reset(sk->saddr, sk->daddr, skb->h.th, + sk->prot, NULL, skb->dev, sk->ip_tos, sk->ip_ttl); + tcp_statistics.TcpEstabResets++; + tcp_set_state(sk,TCP_CLOSE); + sk->err = EPIPE; + sk->shutdown = SHUTDOWN_MASK; + kfree_skb(skb, FREE_READ); + return 0; + } + } + } + } + +#endif + + /* + * Now we have to walk the chain, and figure out where this one + * goes into it. This is set up so that the last packet we received + * will be the first one we look at, that way if everything comes + * in order, there will be no performance loss, and if they come + * out of order we will be able to fit things in nicely. + * + * [AC: This is wrong. We should assume in order first and then walk + * forwards from the first hole based upon real traffic patterns.] + * + */ + + if (skb_peek(&sk->receive_queue) == NULL) /* Empty queue is easy case */ + { + skb_queue_head(&sk->receive_queue,skb); + skb1= NULL; + } + else + { + for(skb1=sk->receive_queue.prev; ; skb1 = skb1->prev) + { + if(sk->debug) + { + printk("skb1=%p :", skb1); + printk("skb1->h.th->seq = %ld: ", skb1->h.th->seq); + printk("skb->h.th->seq = %ld\n",skb->h.th->seq); + printk("copied_seq = %ld acked_seq = %ld\n", sk->copied_seq, + sk->acked_seq); + } + + /* + * Optimisation: Duplicate frame or extension of previous frame from + * same sequence point (lost ack case). + * The frame contains duplicate data or replaces a previous frame + * discard the previous frame (safe as sk->inuse is set) and put + * the new one in its place. + */ + + if (th->seq==skb1->h.th->seq && skb->len>= skb1->len) + { + skb_append(skb1,skb); + skb_unlink(skb1); + kfree_skb(skb1,FREE_READ); + dup_dumped=1; + skb1=NULL; + break; + } + + /* + * Found where it fits + */ + + if (after(th->seq+1, skb1->h.th->seq)) + { + skb_append(skb1,skb); + break; + } + + /* + * See if we've hit the start. If so insert. + */ + if (skb1 == skb_peek(&sk->receive_queue)) + { + skb_queue_head(&sk->receive_queue, skb); + break; + } + } + } + + /* + * Figure out what the ack value for this frame is + */ + + th->ack_seq = th->seq + skb->len; + if (th->syn) + th->ack_seq++; + if (th->fin) + th->ack_seq++; + + if (before(sk->acked_seq, sk->copied_seq)) + { + printk("*** tcp.c:tcp_data bug acked < copied\n"); + sk->acked_seq = sk->copied_seq; + } + + /* + * Now figure out if we can ack anything. This is very messy because we really want two + * receive queues, a completed and an assembly queue. We also want only one transmit + * queue. + */ + + if ((!dup_dumped && (skb1 == NULL || skb1->acked)) || before(th->seq, sk->acked_seq+1)) + { + if (before(th->seq, sk->acked_seq+1)) + { + int newwindow; + + if (after(th->ack_seq, sk->acked_seq)) + { + newwindow = sk->window-(th->ack_seq - sk->acked_seq); + if (newwindow < 0) + newwindow = 0; + sk->window = newwindow; + sk->acked_seq = th->ack_seq; + } + skb->acked = 1; + + /* + * When we ack the fin, we do the FIN + * processing. + */ + + if (skb->h.th->fin) + { + tcp_fin(skb,sk,skb->h.th); + } + + for(skb2 = skb->next; + skb2 != (struct sk_buff *)&sk->receive_queue; + skb2 = skb2->next) + { + if (before(skb2->h.th->seq, sk->acked_seq+1)) + { + if (after(skb2->h.th->ack_seq, sk->acked_seq)) + { + newwindow = sk->window - + (skb2->h.th->ack_seq - sk->acked_seq); + if (newwindow < 0) + newwindow = 0; + sk->window = newwindow; + sk->acked_seq = skb2->h.th->ack_seq; + } + skb2->acked = 1; + /* + * When we ack the fin, we do + * the fin handling. + */ + if (skb2->h.th->fin) + { + tcp_fin(skb,sk,skb->h.th); + } + + /* + * Force an immediate ack. + */ + + sk->ack_backlog = sk->max_ack_backlog; + } + else + { + break; + } + } + + /* + * This also takes care of updating the window. + * This if statement needs to be simplified. + */ + if (!sk->delay_acks || + sk->ack_backlog >= sk->max_ack_backlog || + sk->bytes_rcv > sk->max_unacked || th->fin) { + /* tcp_send_ack(sk->sent_seq, sk->acked_seq,sk,th, saddr); */ + } + else + { + sk->ack_backlog++; + if(sk->debug) + printk("Ack queued.\n"); + reset_xmit_timer(sk, TIME_WRITE, TCP_ACK_TIME); + } + } + } + + /* + * If we've missed a packet, send an ack. + * Also start a timer to send another. + */ + + if (!skb->acked) + { + + /* + * This is important. If we don't have much room left, + * we need to throw out a few packets so we have a good + * window. Note that mtu is used, not mss, because mss is really + * for the send side. He could be sending us stuff as large as mtu. + */ + + while (sk->prot->rspace(sk) < sk->mtu) + { + skb1 = skb_peek(&sk->receive_queue); + if (skb1 == NULL) + { + printk("INET: tcp.c:tcp_data memory leak detected.\n"); + break; + } + + /* + * Don't throw out something that has been acked. + */ + + if (skb1->acked) + { + break; + } + + skb_unlink(skb1); + kfree_skb(skb1, FREE_READ); + } + tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr); + sk->ack_backlog++; + reset_xmit_timer(sk, TIME_WRITE, TCP_ACK_TIME); + } + else + { + tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr); + } + + /* + * Now tell the user we may have some data. + */ + + if (!sk->dead) + { + if(sk->debug) + printk("Data wakeup.\n"); + sk->data_ready(sk,0); + } + return(0); +} + + +/* + * This routine is only called when we have urgent data + * signalled. Its the 'slow' part of tcp_urg. It could be + * moved inline now as tcp_urg is only called from one + * place. We handle URGent data wrong. We have to - as + * BSD still doesn't use the correction from RFC961. + */ + +static void tcp_check_urg(struct sock * sk, struct tcphdr * th) +{ + unsigned long ptr = ntohs(th->urg_ptr); + + if (ptr) + ptr--; + ptr += th->seq; + + /* ignore urgent data that we've already seen and read */ + if (after(sk->copied_seq, ptr)) + return; + + /* do we already have a newer (or duplicate) urgent pointer? */ + if (sk->urg_data && !after(ptr, sk->urg_seq)) + return; + + /* tell the world about our new urgent pointer */ + if (sk->proc != 0) { + if (sk->proc > 0) { + kill_proc(sk->proc, SIGURG, 1); + } else { + kill_pg(-sk->proc, SIGURG, 1); + } + } + sk->urg_data = URG_NOTYET; + sk->urg_seq = ptr; +} + +/* + * This is the 'fast' part of urgent handling. + */ + +extern __inline__ int tcp_urg(struct sock *sk, struct tcphdr *th, + unsigned long saddr, unsigned long len) +{ + unsigned long ptr; + + /* + * Check if we get a new urgent pointer - normally not + */ + + if (th->urg) + tcp_check_urg(sk,th); + + /* + * Do we wait for any urgent data? - normally not + */ + + if (sk->urg_data != URG_NOTYET) + return 0; + + /* + * Is the urgent pointer pointing into this packet? + */ + + ptr = sk->urg_seq - th->seq + th->doff*4; + if (ptr >= len) + return 0; + + /* + * Ok, got the correct packet, update info + */ + + sk->urg_data = URG_VALID | *(ptr + (unsigned char *) th); + if (!sk->dead) + sk->data_ready(sk,0); + return 0; +} + +/* + * This will accept the next outstanding connection. + */ + +static struct sock *tcp_accept(struct sock *sk, int flags) +{ + struct sock *newsk; + struct sk_buff *skb; + + /* + * We need to make sure that this socket is listening, + * and that it has something pending. + */ + + if (sk->state != TCP_LISTEN) + { + sk->err = EINVAL; + return(NULL); + } + + /* Avoid the race. */ + cli(); + sk->inuse = 1; + + while((skb = tcp_dequeue_established(sk)) == NULL) + { + if (flags & O_NONBLOCK) + { + sti(); + release_sock(sk); + sk->err = EAGAIN; + return(NULL); + } + + release_sock(sk); + interruptible_sleep_on(sk->sleep); + if (current->signal & ~current->blocked) + { + sti(); + sk->err = ERESTARTSYS; + return(NULL); + } + sk->inuse = 1; + } + sti(); + + /* + * Now all we need to do is return skb->sk. + */ + + newsk = skb->sk; + + kfree_skb(skb, FREE_READ); + sk->ack_backlog--; + release_sock(sk); + return(newsk); +} + + +/* + * This will initiate an outgoing connection. + */ + +static int tcp_connect(struct sock *sk, struct sockaddr_in *usin, int addr_len) +{ + struct sk_buff *buff; + struct device *dev=NULL; + unsigned char *ptr; + int tmp; + int atype; + struct tcphdr *t1; + struct rtable *rt; + + if (sk->state != TCP_CLOSE) + { + return(-EISCONN); + } + + if (addr_len < 8) + return(-EINVAL); + + if (usin->sin_family && usin->sin_family != AF_INET) + return(-EAFNOSUPPORT); + + /* + * connect() to INADDR_ANY means loopback (BSD'ism). + */ + + if(usin->sin_addr.s_addr==INADDR_ANY) + usin->sin_addr.s_addr=ip_my_addr(); + + /* + * Don't want a TCP connection going to a broadcast address + */ + + if ((atype=ip_chk_addr(usin->sin_addr.s_addr)) == IS_BROADCAST || atype==IS_MULTICAST) + return -ENETUNREACH; + + sk->inuse = 1; + sk->daddr = usin->sin_addr.s_addr; + sk->write_seq = tcp_init_seq(); + sk->window_seq = sk->write_seq; + sk->rcv_ack_seq = sk->write_seq -1; + sk->err = 0; + sk->dummy_th.dest = usin->sin_port; + release_sock(sk); + + buff = sk->prot->wmalloc(sk,MAX_SYN_SIZE,0, GFP_KERNEL); + if (buff == NULL) + { + return(-ENOMEM); + } + sk->inuse = 1; + buff->len = 24; + buff->sk = sk; + buff->free = 0; + buff->localroute = sk->localroute; + + t1 = (struct tcphdr *) buff->data; + + /* + * Put in the IP header and routing stuff. + */ + + rt=ip_rt_route(sk->daddr, NULL, NULL); + + + /* + * We need to build the routing stuff from the things saved in skb. + */ + + tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev, + IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl); + if (tmp < 0) + { + sk->prot->wfree(sk, buff->mem_addr, buff->mem_len); + release_sock(sk); + return(-ENETUNREACH); + } + + buff->len += tmp; + t1 = (struct tcphdr *)((char *)t1 +tmp); + + memcpy(t1,(void *)&(sk->dummy_th), sizeof(*t1)); + t1->seq = ntohl(sk->write_seq++); + sk->sent_seq = sk->write_seq; + buff->h.seq = sk->write_seq; + t1->ack = 0; + t1->window = 2; + t1->res1=0; + t1->res2=0; + t1->rst = 0; + t1->urg = 0; + t1->psh = 0; + t1->syn = 1; + t1->urg_ptr = 0; + t1->doff = 6; + /* use 512 or whatever user asked for */ + + if(rt!=NULL && (rt->rt_flags&RTF_WINDOW)) + sk->window_clamp=rt->rt_window; + else + sk->window_clamp=0; + + if (sk->user_mss) + sk->mtu = sk->user_mss; + else if(rt!=NULL && (rt->rt_flags&RTF_MTU)) + sk->mtu = rt->rt_mss; + else + { +#ifdef CONFIG_INET_SNARL + if ((sk->saddr ^ sk->daddr) & default_mask(sk->saddr)) +#else + if ((sk->saddr ^ sk->daddr) & dev->pa_mask) +#endif + sk->mtu = 576 - HEADER_SIZE; + else + sk->mtu = MAX_WINDOW; + } + /* + * but not bigger than device MTU + */ + + if(sk->mtu <32) + sk->mtu = 32; /* Sanity limit */ + + sk->mtu = min(sk->mtu, dev->mtu - HEADER_SIZE); + + /* + * Put in the TCP options to say MTU. + */ + + ptr = (unsigned char *)(t1+1); + ptr[0] = 2; + ptr[1] = 4; + ptr[2] = (sk->mtu) >> 8; + ptr[3] = (sk->mtu) & 0xff; + tcp_send_check(t1, sk->saddr, sk->daddr, + sizeof(struct tcphdr) + 4, sk); + + /* + * This must go first otherwise a really quick response will get reset. + */ + + tcp_set_state(sk,TCP_SYN_SENT); + sk->rto = TCP_TIMEOUT_INIT; +#if 0 /* we already did this */ + init_timer(&sk->retransmit_timer); +#endif + sk->retransmit_timer.function=&retransmit_timer; + sk->retransmit_timer.data = (unsigned long)sk; + reset_xmit_timer(sk, TIME_WRITE, sk->rto); /* Timer for repeating the SYN until an answer */ + sk->retransmits = TCP_SYN_RETRIES; + + sk->prot->queue_xmit(sk, dev, buff, 0); + reset_xmit_timer(sk, TIME_WRITE, sk->rto); + tcp_statistics.TcpActiveOpens++; + tcp_statistics.TcpOutSegs++; + + release_sock(sk); + return(0); +} + + +/* This functions checks to see if the tcp header is actually acceptable. */ +extern __inline__ int tcp_sequence(struct sock *sk, struct tcphdr *th, short len, + struct options *opt, unsigned long saddr, struct device *dev) +{ + unsigned long next_seq; + + next_seq = len - 4*th->doff; + if (th->fin) + next_seq++; + /* if we have a zero window, we can't have any data in the packet.. */ + if (next_seq && !sk->window) + goto ignore_it; + next_seq += th->seq; + + /* + * This isn't quite right. sk->acked_seq could be more recent + * than sk->window. This is however close enough. We will accept + * slightly more packets than we should, but it should not cause + * problems unless someone is trying to forge packets. + */ + + /* have we already seen all of this packet? */ + if (!after(next_seq+1, sk->acked_seq)) + goto ignore_it; + /* or does it start beyond the window? */ + if (!before(th->seq, sk->acked_seq + sk->window + 1)) + goto ignore_it; + + /* ok, at least part of this packet would seem interesting.. */ + return 1; + +ignore_it: + if (th->rst) + return 0; + + /* + * Send a reset if we get something not ours and we are + * unsynchronized. Note: We don't do anything to our end. We + * are just killing the bogus remote connection then we will + * connect again and it will work (with luck). + */ + + if (sk->state==TCP_SYN_SENT || sk->state==TCP_SYN_RECV) + { + tcp_reset(sk->saddr,sk->daddr,th,sk->prot,NULL,dev, sk->ip_tos,sk->ip_ttl); + return 1; + } + + /* Try to resync things. */ + tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr); + return 0; +} + +/* + * When we get a reset we do this. + */ + +static int tcp_std_reset(struct sock *sk, struct sk_buff *skb) +{ + sk->zapped = 1; + sk->err = ECONNRESET; + if (sk->state == TCP_SYN_SENT) + sk->err = ECONNREFUSED; + if (sk->state == TCP_CLOSE_WAIT) + sk->err = EPIPE; +#ifdef TCP_DO_RFC1337 + /* + * Time wait assassination protection [RFC1337] + */ + if(sk->state!=TCP_TIME_WAIT) + { + tcp_set_state(sk,TCP_CLOSE); + sk->shutdown = SHUTDOWN_MASK; + } +#else + tcp_set_state(sk,TCP_CLOSE); + sk->shutdown = SHUTDOWN_MASK; +#endif + if (!sk->dead) + sk->state_change(sk); + kfree_skb(skb, FREE_READ); + release_sock(sk); + return(0); +} + +/* + * A TCP packet has arrived. + */ + +int tcp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt, + unsigned long daddr, unsigned short len, + unsigned long saddr, int redo, struct inet_protocol * protocol) +{ + struct tcphdr *th; + struct sock *sk; + int syn_ok=0; + + if (!skb) + { + printk("IMPOSSIBLE 1\n"); + return(0); + } + + if (!dev) + { + printk("IMPOSSIBLE 2\n"); + return(0); + } + + tcp_statistics.TcpInSegs++; + + if(skb->pkt_type!=PACKET_HOST) + { + kfree_skb(skb,FREE_READ); + return(0); + } + + th = skb->h.th; + + /* + * Find the socket. + */ + + sk = get_sock(&tcp_prot, th->dest, saddr, th->source, daddr); + + /* + * If this socket has got a reset it's to all intents and purposes + * really dead. Count closed sockets as dead. + * + * Note: BSD appears to have a bug here. A 'closed' TCP in BSD + * simply drops data. This seems incorrect as a 'closed' TCP doesn't + * exist so should cause resets as if the port was unreachable. + */ + + if (sk!=NULL && (sk->zapped || sk->state==TCP_CLOSE)) + sk=NULL; + + if (!redo) + { + if (tcp_check(th, len, saddr, daddr )) + { + skb->sk = NULL; + kfree_skb(skb,FREE_READ); + /* + * We don't release the socket because it was + * never marked in use. + */ + return(0); + } + th->seq = ntohl(th->seq); + + /* See if we know about the socket. */ + if (sk == NULL) + { + /* + * No such TCB. If th->rst is 0 send a reset (checked in tcp_reset) + */ + tcp_reset(daddr, saddr, th, &tcp_prot, opt,dev,skb->ip_hdr->tos,255); + skb->sk = NULL; + /* + * Discard frame + */ + kfree_skb(skb, FREE_READ); + return(0); + } + + skb->len = len; + skb->acked = 0; + skb->used = 0; + skb->free = 0; + skb->saddr = daddr; + skb->daddr = saddr; + + /* We may need to add it to the backlog here. */ + cli(); + if (sk->inuse) + { + skb_queue_tail(&sk->back_log, skb); + sti(); + return(0); + } + sk->inuse = 1; + sti(); + } + else + { + if (sk==NULL) + { + tcp_reset(daddr, saddr, th, &tcp_prot, opt,dev,skb->ip_hdr->tos,255); + skb->sk = NULL; + kfree_skb(skb, FREE_READ); + return(0); + } + } + + + if (!sk->prot) + { + printk("IMPOSSIBLE 3\n"); + return(0); + } + + + /* + * Charge the memory to the socket. + */ + + if (sk->rmem_alloc + skb->mem_len >= sk->rcvbuf) + { + kfree_skb(skb, FREE_READ); + release_sock(sk); + return(0); + } + + skb->sk=sk; + sk->rmem_alloc += skb->mem_len; + + /* + * This basically follows the flow suggested by RFC793, with the corrections in RFC1122. We + * don't implement precedence and we process URG incorrectly (deliberately so) for BSD bug + * compatibility. We also set up variables more thoroughly [Karn notes in the + * KA9Q code the RFC793 incoming segment rules don't initialise the variables for all paths]. + */ + + if(sk->state!=TCP_ESTABLISHED) /* Skip this lot for normal flow */ + { + + /* + * Now deal with unusual cases. + */ + + if(sk->state==TCP_LISTEN) + { + if(th->ack) /* These use the socket TOS.. might want to be the received TOS */ + tcp_reset(daddr,saddr,th,sk->prot,opt,dev,sk->ip_tos, sk->ip_ttl); + + /* + * We don't care for RST, and non SYN are absorbed (old segments) + * Broadcast/multicast SYN isn't allowed. Note - bug if you change the + * netmask on a running connection it can go broadcast. Even Sun's have + * this problem so I'm ignoring it + */ + + if(th->rst || !th->syn || th->ack || ip_chk_addr(daddr)!=IS_MYADDR) + { + kfree_skb(skb, FREE_READ); + release_sock(sk); + return 0; + } + + /* + * Guess we need to make a new socket up + */ + + tcp_conn_request(sk, skb, daddr, saddr, opt, dev, tcp_init_seq()); + + /* + * Now we have several options: In theory there is nothing else + * in the frame. KA9Q has an option to send data with the syn, + * BSD accepts data with the syn up to the [to be] advertised window + * and Solaris 2.1 gives you a protocol error. For now we just ignore + * it, that fits the spec precisely and avoids incompatibilities. It + * would be nice in future to drop through and process the data. + */ + + release_sock(sk); + return 0; + } + + /* retransmitted SYN? */ + if (sk->state == TCP_SYN_RECV && th->syn && th->seq+1 == sk->acked_seq) + { + kfree_skb(skb, FREE_READ); + release_sock(sk); + return 0; + } + + /* + * SYN sent means we have to look for a suitable ack and either reset + * for bad matches or go to connected + */ + + if(sk->state==TCP_SYN_SENT) + { + /* Crossed SYN or previous junk segment */ + if(th->ack) + { + /* We got an ack, but it's not a good ack */ + if(!tcp_ack(sk,th,saddr,len)) + { + /* Reset the ack - its an ack from a + different connection [ th->rst is checked in tcp_reset()] */ + tcp_statistics.TcpAttemptFails++; + tcp_reset(daddr, saddr, th, + sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl); + kfree_skb(skb, FREE_READ); + release_sock(sk); + return(0); + } + if(th->rst) + return tcp_std_reset(sk,skb); + if(!th->syn) + { + /* A valid ack from a different connection + start. Shouldn't happen but cover it */ + kfree_skb(skb, FREE_READ); + release_sock(sk); + return 0; + } + /* + * Ok.. it's good. Set up sequence numbers and + * move to established. + */ + syn_ok=1; /* Don't reset this connection for the syn */ + sk->acked_seq=th->seq+1; + sk->fin_seq=th->seq; + tcp_send_ack(sk->sent_seq,sk->acked_seq,sk,th,sk->daddr); + tcp_set_state(sk, TCP_ESTABLISHED); + tcp_options(sk,th); + sk->dummy_th.dest=th->source; + sk->copied_seq = sk->acked_seq; + if(!sk->dead) + { + sk->state_change(sk); + sock_wake_async(sk->socket, 0); + } + if(sk->max_window==0) + { + sk->max_window = 32; + sk->mss = min(sk->max_window, sk->mtu); + } + } + else + { + /* See if SYN's cross. Drop if boring */ + if(th->syn && !th->rst) + { + /* Crossed SYN's are fine - but talking to + yourself is right out... */ + if(sk->saddr==saddr && sk->daddr==daddr && + sk->dummy_th.source==th->source && + sk->dummy_th.dest==th->dest) + { + tcp_statistics.TcpAttemptFails++; + return tcp_std_reset(sk,skb); + } + tcp_set_state(sk,TCP_SYN_RECV); + + /* + * FIXME: + * Must send SYN|ACK here + */ + } + /* Discard junk segment */ + kfree_skb(skb, FREE_READ); + release_sock(sk); + return 0; + } + /* + * SYN_RECV with data maybe.. drop through + */ + goto rfc_step6; + } + + /* + * BSD has a funny hack with TIME_WAIT and fast reuse of a port. There is + * a more complex suggestion for fixing these reuse issues in RFC1644 + * but not yet ready for general use. Also see RFC1379. + */ + +#define BSD_TIME_WAIT +#ifdef BSD_TIME_WAIT + if (sk->state == TCP_TIME_WAIT && th->syn && sk->dead && + after(th->seq, sk->acked_seq) && !th->rst) + { + long seq=sk->write_seq; + if(sk->debug) + printk("Doing a BSD time wait\n"); + tcp_statistics.TcpEstabResets++; + sk->rmem_alloc -= skb->mem_len; + skb->sk = NULL; + sk->err=ECONNRESET; + tcp_set_state(sk, TCP_CLOSE); + sk->shutdown = SHUTDOWN_MASK; + release_sock(sk); + sk=get_sock(&tcp_prot, th->dest, saddr, th->source, daddr); + if (sk && sk->state==TCP_LISTEN) + { + sk->inuse=1; + skb->sk = sk; + sk->rmem_alloc += skb->mem_len; + tcp_conn_request(sk, skb, daddr, saddr,opt, dev,seq+128000); + release_sock(sk); + return 0; + } + kfree_skb(skb, FREE_READ); + return 0; + } +#endif + } + + /* + * We are now in normal data flow (see the step list in the RFC) + * Note most of these are inline now. I'll inline the lot when + * I have time to test it hard and look at what gcc outputs + */ + + if(!tcp_sequence(sk,th,len,opt,saddr,dev)) + { + kfree_skb(skb, FREE_READ); + release_sock(sk); + return 0; + } + + if(th->rst) + return tcp_std_reset(sk,skb); + + /* + * !syn_ok is effectively the state test in RFC793. + */ + + if(th->syn && !syn_ok) + { + tcp_reset(daddr,saddr,th, &tcp_prot, opt, dev, skb->ip_hdr->tos, 255); + return tcp_std_reset(sk,skb); + } + + /* + * Process the ACK + */ + + + if(th->ack && !tcp_ack(sk,th,saddr,len)) + { + /* + * Our three way handshake failed. + */ + + if(sk->state==TCP_SYN_RECV) + { + tcp_reset(daddr, saddr, th,sk->prot, opt, dev,sk->ip_tos,sk->ip_ttl); + } + kfree_skb(skb, FREE_READ); + release_sock(sk); + return 0; + } + +rfc_step6: /* I'll clean this up later */ + + /* + * Process urgent data + */ + + if(tcp_urg(sk, th, saddr, len)) + { + kfree_skb(skb, FREE_READ); + release_sock(sk); + return 0; + } + + + /* + * Process the encapsulated data + */ + + if(tcp_data(skb,sk, saddr, len)) + { + kfree_skb(skb, FREE_READ); + release_sock(sk); + return 0; + } + + /* + * And done + */ + + release_sock(sk); + return 0; +} + +/* + * This routine sends a packet with an out of date sequence + * number. It assumes the other end will try to ack it. + */ + +static void tcp_write_wakeup(struct sock *sk) +{ + struct sk_buff *buff; + struct tcphdr *t1; + struct device *dev=NULL; + int tmp; + + if (sk->zapped) + return; /* After a valid reset we can send no more */ + + /* + * Write data can still be transmitted/retransmitted in the + * following states. If any other state is encountered, return. + * [listen/close will never occur here anyway] + */ + + if (sk->state != TCP_ESTABLISHED && + sk->state != TCP_CLOSE_WAIT && + sk->state != TCP_FIN_WAIT1 && + sk->state != TCP_LAST_ACK && + sk->state != TCP_CLOSING + ) + { + return; + } + + buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC); + if (buff == NULL) + return; + + buff->len = sizeof(struct tcphdr); + buff->free = 1; + buff->sk = sk; + buff->localroute = sk->localroute; + + t1 = (struct tcphdr *) buff->data; + + /* Put in the IP header and routing stuff. */ + tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev, + IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl); + if (tmp < 0) + { + sk->prot->wfree(sk, buff->mem_addr, buff->mem_len); + return; + } + + buff->len += tmp; + t1 = (struct tcphdr *)((char *)t1 +tmp); + + memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1)); + + /* + * Use a previous sequence. + * This should cause the other end to send an ack. + */ + + t1->seq = htonl(sk->sent_seq-1); + t1->ack = 1; + t1->res1= 0; + t1->res2= 0; + t1->rst = 0; + t1->urg = 0; + t1->psh = 0; + t1->fin = 0; /* We are sending a 'previous' sequence, and 0 bytes of data - thus no FIN bit */ + t1->syn = 0; + t1->ack_seq = ntohl(sk->acked_seq); + t1->window = ntohs(tcp_select_window(sk)); + t1->doff = sizeof(*t1)/4; + tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk); + /* + * Send it and free it. + * This will prevent the timer from automatically being restarted. + */ + sk->prot->queue_xmit(sk, dev, buff, 1); + tcp_statistics.TcpOutSegs++; +} + +/* + * A window probe timeout has occurred. + */ + +void tcp_send_probe0(struct sock *sk) +{ + if (sk->zapped) + return; /* After a valid reset we can send no more */ + + tcp_write_wakeup(sk); + + sk->backoff++; + sk->rto = min(sk->rto << 1, 120*HZ); + reset_xmit_timer (sk, TIME_PROBE0, sk->rto); + sk->retransmits++; + sk->prot->retransmits ++; +} + +/* + * Socket option code for TCP. + */ + +int tcp_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen) +{ + int val,err; + + if(level!=SOL_TCP) + return ip_setsockopt(sk,level,optname,optval,optlen); + + if (optval == NULL) + return(-EINVAL); + + err=verify_area(VERIFY_READ, optval, sizeof(int)); + if(err) + return err; + + val = get_fs_long((unsigned long *)optval); + + switch(optname) + { + case TCP_MAXSEG: +/* + * values greater than interface MTU won't take effect. however at + * the point when this call is done we typically don't yet know + * which interface is going to be used + */ + if(val<1||val>MAX_WINDOW) + return -EINVAL; + sk->user_mss=val; + return 0; + case TCP_NODELAY: + sk->nonagle=(val==0)?0:1; + return 0; + default: + return(-ENOPROTOOPT); + } +} + +int tcp_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen) +{ + int val,err; + + if(level!=SOL_TCP) + return ip_getsockopt(sk,level,optname,optval,optlen); + + switch(optname) + { + case TCP_MAXSEG: + val=sk->user_mss; + break; + case TCP_NODELAY: + val=sk->nonagle; + break; + default: + return(-ENOPROTOOPT); + } + err=verify_area(VERIFY_WRITE, optlen, sizeof(int)); + if(err) + return err; + put_fs_long(sizeof(int),(unsigned long *) optlen); + + err=verify_area(VERIFY_WRITE, optval, sizeof(int)); + if(err) + return err; + put_fs_long(val,(unsigned long *)optval); + + return(0); +} + + +struct proto tcp_prot = { + sock_wmalloc, + sock_rmalloc, + sock_wfree, + sock_rfree, + sock_rspace, + sock_wspace, + tcp_close, + tcp_read, + tcp_write, + tcp_sendto, + tcp_recvfrom, + ip_build_header, + tcp_connect, + tcp_accept, + ip_queue_xmit, + tcp_retransmit, + tcp_write_wakeup, + tcp_read_wakeup, + tcp_rcv, + tcp_select, +#ifdef _HURD_ + NULL, +#else + tcp_ioctl, +#endif + NULL, + tcp_shutdown, + tcp_setsockopt, + tcp_getsockopt, + 128, + 0, + {NULL,}, + "TCP", + 0, 0 +}; + +/* + * This routine computes a TCP checksum. + */ + +unsigned short tcp_check(struct tcphdr *th, int len, + unsigned long saddr, unsigned long daddr) +{ + unsigned long sum; + + if (saddr == 0) saddr = ip_my_addr(); + +/* + * stupid, gcc complains when I use just one __asm__ block, + * something about too many reloads, but this is just two + * instructions longer than what I want + */ + __asm__(" + addl %%ecx, %%ebx + adcl %%edx, %%ebx + adcl $0, %%ebx + " + : "=b"(sum) + : "0"(daddr), "c"(saddr), "d"((ntohs(len) << 16) + IPPROTO_TCP*256) + : "bx", "cx", "dx" ); + __asm__(" + movl %%ecx, %%edx + cld + cmpl $32, %%ecx + jb 2f + shrl $5, %%ecx + clc +1: lodsl + adcl %%eax, %%ebx + lodsl + adcl %%eax, %%ebx + lodsl + adcl %%eax, %%ebx + lodsl + adcl %%eax, %%ebx + lodsl + adcl %%eax, %%ebx + lodsl + adcl %%eax, %%ebx + lodsl + adcl %%eax, %%ebx + lodsl + adcl %%eax, %%ebx + loop 1b + adcl $0, %%ebx + movl %%edx, %%ecx +2: andl $28, %%ecx + je 4f + shrl $2, %%ecx + clc +3: lodsl + adcl %%eax, %%ebx + loop 3b + adcl $0, %%ebx +4: movl $0, %%eax + testw $2, %%dx + je 5f + lodsw + addl %%eax, %%ebx + adcl $0, %%ebx + movw $0, %%ax +5: test $1, %%edx + je 6f + lodsb + addl %%eax, %%ebx + adcl $0, %%ebx +6: movl %%ebx, %%eax + shrl $16, %%eax + addw %%ax, %%bx + adcw $0, %%bx + " + : "=b"(sum) + : "0"(sum), "c"(len), "S"(th) + : "ax", "bx", "cx", "dx", "si" ); + + /* We only want the bottom 16 bits, but we never cleared the top 16. */ + + return((~sum) & 0xffff); +} + diff --git a/pfinet/linux-inet/tcp.h b/pfinet/linux-inet/tcp.h new file mode 100644 index 00000000..016fa6dd --- /dev/null +++ b/pfinet/linux-inet/tcp.h @@ -0,0 +1,142 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Definitions for the TCP module. + * + * Version: @(#)tcp.h 1.0.5 05/23/93 + * + * Authors: Ross Biro, <bir7@leland.Stanford.Edu> + * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _TCP_H +#define _TCP_H + +#include <linux/tcp.h> + +#define MAX_SYN_SIZE 44 + MAX_HEADER +#define MAX_FIN_SIZE 40 + MAX_HEADER +#define MAX_ACK_SIZE 40 + MAX_HEADER +#define MAX_RESET_SIZE 40 + MAX_HEADER +#define MAX_WINDOW 16384 +#define MIN_WINDOW 2048 +#define MAX_ACK_BACKLOG 2 +#define MIN_WRITE_SPACE 2048 +#define TCP_WINDOW_DIFF 2048 + +/* urg_data states */ +#define URG_VALID 0x0100 +#define URG_NOTYET 0x0200 +#define URG_READ 0x0400 + +#define TCP_RETR1 7 /* + * This is how many retries it does before it + * tries to figure out if the gateway is + * down. + */ + +#define TCP_RETR2 15 /* + * This should take at least + * 90 minutes to time out. + */ + +#define TCP_TIMEOUT_LEN (15*60*HZ) /* should be about 15 mins */ +#define TCP_TIMEWAIT_LEN (60*HZ) /* how long to wait to successfully + * close the socket, about 60 seconds */ +#define TCP_FIN_TIMEOUT (3*60*HZ) /* BSD style FIN_WAIT2 deadlock breaker */ +#define TCP_ACK_TIME (3*HZ) /* time to delay before sending an ACK */ +#define TCP_DONE_TIME 250 /* maximum time to wait before actually + * destroying a socket */ +#define TCP_WRITE_TIME 3000 /* initial time to wait for an ACK, + * after last transmit */ +#define TCP_TIMEOUT_INIT (3*HZ) /* RFC 1122 initial timeout value */ +#define TCP_SYN_RETRIES 5 /* number of times to retry opening a + * connection */ +#define TCP_PROBEWAIT_LEN 100 /* time to wait between probes when + * I've got something to write and + * there is no window */ + +#define TCP_NO_CHECK 0 /* turn to one if you want the default + * to be no checksum */ + + +/* + * TCP option + */ + +#define TCPOPT_NOP 1 /* Padding */ +#define TCPOPT_EOL 0 /* End of options */ +#define TCPOPT_MSS 2 /* Segment size negotiating */ +/* + * We don't use these yet, but they are for PAWS and big windows + */ +#define TCPOPT_WINDOW 3 /* Window scaling */ +#define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */ + + +/* + * The next routines deal with comparing 32 bit unsigned ints + * and worry about wraparound (automatic with unsigned arithmetic). + */ + +extern __inline int before(unsigned long seq1, unsigned long seq2) +{ + return (long)(seq1-seq2) < 0; +} + +extern __inline int after(unsigned long seq1, unsigned long seq2) +{ + return (long)(seq1-seq2) > 0; +} + + +/* is s2<=s1<=s3 ? */ +extern __inline int between(unsigned long seq1, unsigned long seq2, unsigned long seq3) +{ + return (after(seq1+1, seq2) && before(seq1, seq3+1)); +} + + +/* + * List all states of a TCP socket that can be viewed as a "connected" + * state. This now includes TCP_SYN_RECV, although I am not yet fully + * convinced that this is the solution for the 'getpeername(2)' + * problem. Thanks to Stephen A. Wood <saw@cebaf.gov> -FvK + */ +extern __inline const int +tcp_connected(const int state) +{ + return(state == TCP_ESTABLISHED || state == TCP_CLOSE_WAIT || + state == TCP_FIN_WAIT1 || state == TCP_FIN_WAIT2 || + state == TCP_SYN_RECV); +} + + +extern struct proto tcp_prot; + + +extern void tcp_err(int err, unsigned char *header, unsigned long daddr, + unsigned long saddr, struct inet_protocol *protocol); +extern void tcp_shutdown (struct sock *sk, int how); +extern int tcp_rcv(struct sk_buff *skb, struct device *dev, + struct options *opt, unsigned long daddr, + unsigned short len, unsigned long saddr, int redo, + struct inet_protocol *protocol); + +extern int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg); + +extern int tcp_select_window(struct sock *sk); +extern void tcp_send_check(struct tcphdr *th, unsigned long saddr, + unsigned long daddr, int len, struct sock *sk); +extern void tcp_send_probe0(struct sock *sk); +extern void tcp_enqueue_partial(struct sk_buff *, struct sock *); +extern struct sk_buff * tcp_dequeue_partial(struct sock *); + + +#endif /* _TCP_H */ diff --git a/pfinet/linux-inet/timer.c b/pfinet/linux-inet/timer.c new file mode 100644 index 00000000..4fbbc74b --- /dev/null +++ b/pfinet/linux-inet/timer.c @@ -0,0 +1,264 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * TIMER - implementation of software timers for IP. + * + * Version: @(#)timer.c 1.0.7 05/25/93 + * + * Authors: Ross Biro, <bir7@leland.Stanford.Edu> + * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> + * Corey Minyard <wf-rch!minyard@relay.EU.net> + * Fred Baumgarten, <dc6iq@insu1.etec.uni-karlsruhe.de> + * Florian La Roche, <flla@stud.uni-sb.de> + * + * Fixes: + * Alan Cox : To avoid destroying a wait queue as we use it + * we defer destruction until the destroy timer goes + * off. + * Alan Cox : Destroy socket doesn't write a status value to the + * socket buffer _AFTER_ freeing it! Also sock ensures + * the socket will get removed BEFORE this is called + * otherwise if the timer TIME_DESTROY occurs inside + * of inet_bh() with this socket being handled it goes + * BOOM! Have to stop timer going off if net_bh is + * active or the destroy causes crashes. + * Alan Cox : Cleaned up unused code. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/socket.h> +#include <linux/in.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/timer.h> +#include <asm/system.h> +#include <linux/interrupt.h> +#include <linux/inet.h> +#include <linux/netdevice.h> +#include "ip.h" +#include "protocol.h" +#include "tcp.h" +#include <linux/skbuff.h> +#include "sock.h" +#include "arp.h" + +void delete_timer (struct sock *t) +{ + unsigned long flags; + + save_flags (flags); + cli(); + + t->timeout = 0; + del_timer (&t->timer); + + restore_flags (flags); +} + +void reset_timer (struct sock *t, int timeout, unsigned long len) +{ + delete_timer (t); + t->timeout = timeout; +#if 1 + /* FIXME: ??? */ + if ((int) len < 0) /* prevent close to infinite timers. THEY _DO_ */ + len = 3; /* happen (negative values ?) - don't ask me why ! -FB */ +#endif + t->timer.expires = len; + add_timer (&t->timer); +} + + +/* + * Now we will only be called whenever we need to do + * something, but we must be sure to process all of the + * sockets that need it. + */ + +void net_timer (unsigned long data) +{ + struct sock *sk = (struct sock*)data; + int why = sk->timeout; + + /* + * only process if socket is not in use + */ + + cli(); + if (sk->inuse || in_bh) + { + sk->timer.expires = 10; + add_timer(&sk->timer); + sti(); + return; + } + + sk->inuse = 1; + sti(); + + /* Always see if we need to send an ack. */ + + if (sk->ack_backlog && !sk->zapped) + { + sk->prot->read_wakeup (sk); + if (! sk->dead) + sk->data_ready(sk,0); + } + + /* Now we need to figure out why the socket was on the timer. */ + + switch (why) + { + case TIME_DONE: + if (! sk->dead || sk->state != TCP_CLOSE) + { + printk ("non dead socket in time_done\n"); + release_sock (sk); + break; + } + destroy_sock (sk); + break; + + case TIME_DESTROY: + /* + * We've waited for a while for all the memory associated with + * the socket to be freed. + */ + if(sk->wmem_alloc!=0 || sk->rmem_alloc!=0) + { + sk->wmem_alloc++; /* So it DOESN'T go away */ + destroy_sock (sk); + sk->wmem_alloc--; /* Might now have hit 0 - fall through and do it again if so */ + sk->inuse = 0; /* This will be ok, the destroy won't totally work */ + } + if(sk->wmem_alloc==0 && sk->rmem_alloc==0) + destroy_sock(sk); /* Socket gone, DON'T update sk->inuse! */ + break; + case TIME_CLOSE: + /* We've waited long enough, close the socket. */ + sk->state = TCP_CLOSE; + delete_timer (sk); + /* Kill the ARP entry in case the hardware has changed. */ + arp_destroy (sk->daddr, 0); + if (!sk->dead) + sk->state_change(sk); + sk->shutdown = SHUTDOWN_MASK; + reset_timer (sk, TIME_DESTROY, TCP_DONE_TIME); + release_sock (sk); + break; +#if 0 + case TIME_PROBE0: + tcp_send_probe0(sk); + release_sock (sk); + break; + case TIME_WRITE: /* try to retransmit. */ + /* It could be we got here because we needed to send an ack. + * So we need to check for that. + */ + { + struct sk_buff *skb; + unsigned long flags; + + save_flags(flags); + cli(); + skb = sk->send_head; + if (!skb) + { + restore_flags(flags); + } + else + { + if (jiffies < skb->when + sk->rto) + { + reset_timer (sk, TIME_WRITE, skb->when + sk->rto - jiffies); + restore_flags(flags); + release_sock (sk); + break; + } + restore_flags(flags); + /* printk("timer: seq %d retrans %d out %d cong %d\n", sk->send_head->h.seq, + sk->retransmits, sk->packets_out, sk->cong_window); */ + sk->prot->retransmit (sk, 0); + if ((sk->state == TCP_ESTABLISHED && sk->retransmits && !(sk->retransmits & 7)) + || (sk->state != TCP_ESTABLISHED && sk->retransmits > TCP_RETR1)) + { + arp_destroy (sk->daddr, 0); + ip_route_check (sk->daddr); + } + if (sk->state != TCP_ESTABLISHED && sk->retransmits > TCP_RETR2) + { + sk->err = ETIMEDOUT; + if (sk->state == TCP_FIN_WAIT1 || sk->state == TCP_FIN_WAIT2 || sk->state == TCP_CLOSING) + { + sk->state = TCP_TIME_WAIT; + reset_timer (sk, TIME_CLOSE, TCP_TIMEWAIT_LEN); + } + else + { + sk->prot->close (sk, 1); + break; + } + } + } + release_sock (sk); + break; + } + case TIME_KEEPOPEN: + /* + * this reset_timer() call is a hack, this is not + * how KEEPOPEN is supposed to work. + */ + reset_timer (sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN); + + /* Send something to keep the connection open. */ + if (sk->prot->write_wakeup) + sk->prot->write_wakeup (sk); + sk->retransmits++; + if (sk->shutdown == SHUTDOWN_MASK) + { + sk->prot->close (sk, 1); + sk->state = TCP_CLOSE; + } + if ((sk->state == TCP_ESTABLISHED && sk->retransmits && !(sk->retransmits & 7)) + || (sk->state != TCP_ESTABLISHED && sk->retransmits > TCP_RETR1)) + { + arp_destroy (sk->daddr, 0); + ip_route_check (sk->daddr); + release_sock (sk); + break; + } + if (sk->state != TCP_ESTABLISHED && sk->retransmits > TCP_RETR2) + { + arp_destroy (sk->daddr, 0); + sk->err = ETIMEDOUT; + if (sk->state == TCP_FIN_WAIT1 || sk->state == TCP_FIN_WAIT2) + { + sk->state = TCP_TIME_WAIT; + if (!sk->dead) + sk->state_change(sk); + release_sock (sk); + } + else + { + sk->prot->close (sk, 1); + } + break; + } + release_sock (sk); + break; +#endif + default: + printk ("net_timer: timer expired - reason %d is unknown\n", why); + release_sock (sk); + break; + } +} + diff --git a/pfinet/linux-inet/udp.c b/pfinet/linux-inet/udp.c new file mode 100644 index 00000000..420f24b3 --- /dev/null +++ b/pfinet/linux-inet/udp.c @@ -0,0 +1,740 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * The User Datagram Protocol (UDP). + * + * Version: @(#)udp.c 1.0.13 06/02/93 + * + * Authors: Ross Biro, <bir7@leland.Stanford.Edu> + * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> + * + * Fixes: + * Alan Cox : verify_area() calls + * Alan Cox : stopped close while in use off icmp + * messages. Not a fix but a botch that + * for udp at least is 'valid'. + * Alan Cox : Fixed icmp handling properly + * Alan Cox : Correct error for oversized datagrams + * Alan Cox : Tidied select() semantics. + * Alan Cox : udp_err() fixed properly, also now + * select and read wake correctly on errors + * Alan Cox : udp_send verify_area moved to avoid mem leak + * Alan Cox : UDP can count its memory + * Alan Cox : send to an unknown connection causes + * an ECONNREFUSED off the icmp, but + * does NOT close. + * Alan Cox : Switched to new sk_buff handlers. No more backlog! + * Alan Cox : Using generic datagram code. Even smaller and the PEEK + * bug no longer crashes it. + * Fred Van Kempen : Net2e support for sk->broadcast. + * Alan Cox : Uses skb_free_datagram + * Alan Cox : Added get/set sockopt support. + * Alan Cox : Broadcasting without option set returns EACCES. + * Alan Cox : No wakeup calls. Instead we now use the callbacks. + * Alan Cox : Use ip_tos and ip_ttl + * Alan Cox : SNMP Mibs + * Alan Cox : MSG_DONTROUTE, and 0.0.0.0 support. + * Matt Dillon : UDP length checks. + * Alan Cox : Smarter af_inet used properly. + * Alan Cox : Use new kernel side addressing. + * Alan Cox : Incorrect return on truncated datagram receive. + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <asm/system.h> +#include <asm/segment.h> +#include <linux/types.h> +#include <linux/sched.h> +#include <linux/fcntl.h> +#include <linux/socket.h> +#include <linux/sockios.h> +#include <linux/in.h> +#include <linux/errno.h> +#include <linux/timer.h> +#include <linux/termios.h> +#include <linux/mm.h> +#include <linux/config.h> +#include <linux/inet.h> +#include <linux/netdevice.h> +#include "snmp.h" +#include "ip.h" +#include "protocol.h" +#include "tcp.h" +#include <linux/skbuff.h> +#include "sock.h" +#include "udp.h" +#include "icmp.h" +#include "route.h" + +/* + * SNMP MIB for the UDP layer + */ + +struct udp_mib udp_statistics; + + +static int udp_deliver(struct sock *sk, struct udphdr *uh, struct sk_buff *skb, struct device *dev, long saddr, long daddr, int len); + +#define min(a,b) ((a)<(b)?(a):(b)) + + +/* + * This routine is called by the ICMP module when it gets some + * sort of error condition. If err < 0 then the socket should + * be closed and the error returned to the user. If err > 0 + * it's just the icmp type << 8 | icmp code. + * Header points to the ip header of the error packet. We move + * on past this. Then (as it used to claim before adjustment) + * header points to the first 8 bytes of the udp header. We need + * to find the appropriate port. + */ + +void udp_err(int err, unsigned char *header, unsigned long daddr, + unsigned long saddr, struct inet_protocol *protocol) +{ + struct udphdr *th; + struct sock *sk; + struct iphdr *ip=(struct iphdr *)header; + + header += 4*ip->ihl; + + /* + * Find the 8 bytes of post IP header ICMP included for us + */ + + th = (struct udphdr *)header; + + sk = get_sock(&udp_prot, th->source, daddr, th->dest, saddr); + + if (sk == NULL) + return; /* No socket for error */ + + if (err & 0xff00 ==(ICMP_SOURCE_QUENCH << 8)) + { /* Slow down! */ + if (sk->cong_window > 1) + sk->cong_window = sk->cong_window/2; + return; + } + + /* + * Various people wanted BSD UDP semantics. Well they've come + * back out because they slow down response to stuff like dead + * or unreachable name servers and they screw term users something + * chronic. Oh and it violates RFC1122. So basically fix your + * client code people. + */ + +#ifdef CONFIG_I_AM_A_BROKEN_BSD_WEENIE + /* + * It's only fatal if we have connected to them. I'm not happy + * with this code. Some BSD comparisons need doing. + */ + + if (icmp_err_convert[err & 0xff].fatal && sk->state == TCP_ESTABLISHED) + { + sk->err = icmp_err_convert[err & 0xff].errno; + sk->error_report(sk); + } +#else + if (icmp_err_convert[err & 0xff].fatal) + { + sk->err = icmp_err_convert[err & 0xff].error; + sk->error_report(sk); + } +#endif +} + + + +/* + * Generate UDP checksums. These may be disabled, eg for fast NFS over ethernet + * We default them enabled.. if you turn them off you either know what you are + * doing or get burned... + */ + +static void udp_send_check(struct udphdr *uh, unsigned long saddr, + unsigned long daddr, int len, struct sock *sk) +{ + uh->check = 0; + if (sk && sk->no_check) + return; + uh->check = udp_check(uh, len, saddr, daddr); + + /* + * FFFF and 0 are the same, pick the right one as 0 in the + * actual field means no checksum. + */ + + if (uh->check == 0) + uh->check = 0xffff; +} + + +static int udp_send(struct sock *sk, struct sockaddr_in *sin, + unsigned char *from, int len, int rt) +{ + struct sk_buff *skb; + struct device *dev; + struct udphdr *uh; + unsigned char *buff; + unsigned long saddr; + int size, tmp; + int ttl; + + /* + * Allocate an sk_buff copy of the packet. + */ + + size = sk->prot->max_header + len; + skb = sock_alloc_send_skb(sk, size, 0, &tmp); + + + if (skb == NULL) + return tmp; + + skb->sk = NULL; /* to avoid changing sk->saddr */ + skb->free = 1; + skb->localroute = sk->localroute|(rt&MSG_DONTROUTE); + + /* + * Now build the IP and MAC header. + */ + + buff = skb->data; + saddr = sk->saddr; + dev = NULL; + ttl = sk->ip_ttl; +#ifdef CONFIG_IP_MULTICAST + if (MULTICAST(sin->sin_addr.s_addr)) + ttl = sk->ip_mc_ttl; +#endif + tmp = sk->prot->build_header(skb, saddr, sin->sin_addr.s_addr, + &dev, IPPROTO_UDP, sk->opt, skb->mem_len,sk->ip_tos,ttl); + + skb->sk=sk; /* So memory is freed correctly */ + + /* + * Unable to put a header on the packet. + */ + + if (tmp < 0 ) + { + sk->prot->wfree(sk, skb->mem_addr, skb->mem_len); + return(tmp); + } + + buff += tmp; + saddr = skb->saddr; /*dev->pa_addr;*/ + skb->len = tmp + sizeof(struct udphdr) + len; /* len + UDP + IP + MAC */ + skb->dev = dev; + + /* + * Fill in the UDP header. + */ + + uh = (struct udphdr *) buff; + uh->len = htons(len + sizeof(struct udphdr)); + uh->source = sk->dummy_th.source; + uh->dest = sin->sin_port; + buff = (unsigned char *) (uh + 1); + + /* + * Copy the user data. + */ + + memcpy_fromfs(buff, from, len); + + /* + * Set up the UDP checksum. + */ + + udp_send_check(uh, saddr, sin->sin_addr.s_addr, skb->len - tmp, sk); + + /* + * Send the datagram to the interface. + */ + + udp_statistics.UdpOutDatagrams++; + + sk->prot->queue_xmit(sk, dev, skb, 1); + return(len); +} + + +static int udp_sendto(struct sock *sk, unsigned char *from, int len, int noblock, + unsigned flags, struct sockaddr_in *usin, int addr_len) +{ + struct sockaddr_in sin; + int tmp; + + /* + * Check the flags. We support no flags for UDP sending + */ + if (flags&~MSG_DONTROUTE) + return(-EINVAL); + /* + * Get and verify the address. + */ + + if (usin) + { + if (addr_len < sizeof(sin)) + return(-EINVAL); + memcpy(&sin,usin,sizeof(sin)); + if (sin.sin_family && sin.sin_family != AF_INET) + return(-EINVAL); + if (sin.sin_port == 0) + return(-EINVAL); + } + else + { + if (sk->state != TCP_ESTABLISHED) + return(-EINVAL); + sin.sin_family = AF_INET; + sin.sin_port = sk->dummy_th.dest; + sin.sin_addr.s_addr = sk->daddr; + } + + /* + * BSD socket semantics. You must set SO_BROADCAST to permit + * broadcasting of data. + */ + + if(sin.sin_addr.s_addr==INADDR_ANY) + sin.sin_addr.s_addr=ip_my_addr(); + + if(!sk->broadcast && ip_chk_addr(sin.sin_addr.s_addr)==IS_BROADCAST) + return -EACCES; /* Must turn broadcast on first */ + + sk->inuse = 1; + + /* Send the packet. */ + tmp = udp_send(sk, &sin, from, len, flags); + + /* The datagram has been sent off. Release the socket. */ + release_sock(sk); + return(tmp); +} + +/* + * In BSD SOCK_DGRAM a write is just like a send. + */ + +static int udp_write(struct sock *sk, unsigned char *buff, int len, int noblock, + unsigned flags) +{ + return(udp_sendto(sk, buff, len, noblock, flags, NULL, 0)); +} + + +#ifndef _HURD_ +/* + * IOCTL requests applicable to the UDP protocol + */ + +int udp_ioctl(struct sock *sk, int cmd, unsigned long arg) +{ + int err; + switch(cmd) + { + case TIOCOUTQ: + { + unsigned long amount; + + if (sk->state == TCP_LISTEN) return(-EINVAL); + amount = sk->prot->wspace(sk)/*/2*/; + err=verify_area(VERIFY_WRITE,(void *)arg, + sizeof(unsigned long)); + if(err) + return(err); + put_fs_long(amount,(unsigned long *)arg); + return(0); + } + + case TIOCINQ: + { + struct sk_buff *skb; + unsigned long amount; + + if (sk->state == TCP_LISTEN) return(-EINVAL); + amount = 0; + skb = skb_peek(&sk->receive_queue); + if (skb != NULL) { + /* + * We will only return the amount + * of this packet since that is all + * that will be read. + */ + amount = skb->len; + } + err=verify_area(VERIFY_WRITE,(void *)arg, + sizeof(unsigned long)); + if(err) + return(err); + put_fs_long(amount,(unsigned long *)arg); + return(0); + } + + default: + return(-EINVAL); + } + return(0); +} +#endif + +/* + * This should be easy, if there is something there we\ + * return it, otherwise we block. + */ + +int udp_recvfrom(struct sock *sk, unsigned char *to, int len, + int noblock, unsigned flags, struct sockaddr_in *sin, + int *addr_len) +{ + int copied = 0; + int truesize; + struct sk_buff *skb; + int er; + + /* + * Check any passed addresses + */ + + if (addr_len) + *addr_len=sizeof(*sin); + + /* + * From here the generic datagram does a lot of the work. Come + * the finished NET3, it will do _ALL_ the work! + */ + + skb=skb_recv_datagram(sk,flags,noblock,&er); + if(skb==NULL) + return er; + + truesize = skb->len; + copied = min(len, truesize); + + /* + * FIXME : should use udp header size info value + */ + + skb_copy_datagram(skb,sizeof(struct udphdr),to,copied); + sk->stamp=skb->stamp; + + /* Copy the address. */ + if (sin) + { + sin->sin_family = AF_INET; + sin->sin_port = skb->h.uh->source; + sin->sin_addr.s_addr = skb->daddr; + } + + skb_free_datagram(skb); + release_sock(sk); + return(truesize); +} + +/* + * Read has the same semantics as recv in SOCK_DGRAM + */ + +int udp_read(struct sock *sk, unsigned char *buff, int len, int noblock, + unsigned flags) +{ + return(udp_recvfrom(sk, buff, len, noblock, flags, NULL, NULL)); +} + + +int udp_connect(struct sock *sk, struct sockaddr_in *usin, int addr_len) +{ + struct rtable *rt; + unsigned long sa; + if (addr_len < sizeof(*usin)) + return(-EINVAL); + + if (usin->sin_family && usin->sin_family != AF_INET) + return(-EAFNOSUPPORT); + if (usin->sin_addr.s_addr==INADDR_ANY) + usin->sin_addr.s_addr=ip_my_addr(); + + if(!sk->broadcast && ip_chk_addr(usin->sin_addr.s_addr)==IS_BROADCAST) + return -EACCES; /* Must turn broadcast on first */ + + rt=ip_rt_route(usin->sin_addr.s_addr, NULL, &sa); + if(rt==NULL) + return -ENETUNREACH; + sk->saddr = sa; /* Update source address */ + sk->daddr = usin->sin_addr.s_addr; + sk->dummy_th.dest = usin->sin_port; + sk->state = TCP_ESTABLISHED; + return(0); +} + + +static void udp_close(struct sock *sk, int timeout) +{ + sk->inuse = 1; + sk->state = TCP_CLOSE; + if (sk->dead) + destroy_sock(sk); + else + release_sock(sk); +} + + +/* + * All we need to do is get the socket, and then do a checksum. + */ + +int udp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt, + unsigned long daddr, unsigned short len, + unsigned long saddr, int redo, struct inet_protocol *protocol) +{ + struct sock *sk; + struct udphdr *uh; + unsigned short ulen; + int addr_type = IS_MYADDR; + + if(!dev || dev->pa_addr!=daddr) + addr_type=ip_chk_addr(daddr); + + /* + * Get the header. + */ + uh = (struct udphdr *) skb->h.uh; + + ip_statistics.IpInDelivers++; + + /* + * Validate the packet and the UDP length. + */ + + ulen = ntohs(uh->len); + + if (ulen > len || len < sizeof(*uh) || ulen < sizeof(*uh)) + { + printk("UDP: short packet: %d/%d\n", ulen, len); + udp_statistics.UdpInErrors++; + kfree_skb(skb, FREE_WRITE); + return(0); + } + + if (uh->check && udp_check(uh, len, saddr, daddr)) + { + /* <mea@utu.fi> wants to know, who sent it, to + go and stomp on the garbage sender... */ + printk("UDP: bad checksum. From %08lX:%d to %08lX:%d ulen %d\n", + ntohl(saddr),ntohs(uh->source), + ntohl(daddr),ntohs(uh->dest), + ulen); + udp_statistics.UdpInErrors++; + kfree_skb(skb, FREE_WRITE); + return(0); + } + + + len=ulen; + +#ifdef CONFIG_IP_MULTICAST + if (addr_type!=IS_MYADDR) + { + /* + * Multicasts and broadcasts go to each listener. + */ + struct sock *sknext=NULL; + sk=get_sock_mcast(udp_prot.sock_array[ntohs(uh->dest)&(SOCK_ARRAY_SIZE-1)], uh->dest, + saddr, uh->source, daddr); + if(sk) + { + do + { + struct sk_buff *skb1; + + sknext=get_sock_mcast(sk->next, uh->dest, saddr, uh->source, daddr); + if(sknext) + skb1=skb_clone(skb,GFP_ATOMIC); + else + skb1=skb; + if(skb1) + udp_deliver(sk, uh, skb1, dev,saddr,daddr,len); + sk=sknext; + } + while(sknext!=NULL); + } + else + kfree_skb(skb, FREE_READ); + return 0; + } +#endif + sk = get_sock(&udp_prot, uh->dest, saddr, uh->source, daddr); + if (sk == NULL) + { + udp_statistics.UdpNoPorts++; + if (addr_type == IS_MYADDR) + { + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0, dev); + } + /* + * Hmm. We got an UDP broadcast to a port to which we + * don't wanna listen. Ignore it. + */ + skb->sk = NULL; + kfree_skb(skb, FREE_WRITE); + return(0); + } + + return udp_deliver(sk,uh,skb,dev, saddr, daddr, len); +} + +static int udp_deliver(struct sock *sk, struct udphdr *uh, struct sk_buff *skb, struct device *dev, long saddr, long daddr, int len) +{ + skb->sk = sk; + skb->dev = dev; + skb->len = len; + + /* + * These are supposed to be switched. + */ + + skb->daddr = saddr; + skb->saddr = daddr; + + + /* + * Charge it to the socket, dropping if the queue is full. + */ + + skb->len = len - sizeof(*uh); + + if (sock_queue_rcv_skb(sk,skb)<0) + { + udp_statistics.UdpInErrors++; + ip_statistics.IpInDiscards++; + ip_statistics.IpInDelivers--; + skb->sk = NULL; + kfree_skb(skb, FREE_WRITE); + release_sock(sk); + return(0); + } + udp_statistics.UdpInDatagrams++; + release_sock(sk); + return(0); +} + + +struct proto udp_prot = { + sock_wmalloc, + sock_rmalloc, + sock_wfree, + sock_rfree, + sock_rspace, + sock_wspace, + udp_close, + udp_read, + udp_write, + udp_sendto, + udp_recvfrom, + ip_build_header, + udp_connect, + NULL, + ip_queue_xmit, + NULL, + NULL, + NULL, + udp_rcv, + datagram_select, +#ifdef _HURD_ + NULL, +#else + udp_ioctl, +#endif + NULL, + NULL, + ip_setsockopt, + ip_getsockopt, + 128, + 0, + {NULL,}, + "UDP", + 0, 0 +}; + +static unsigned short udp_check(struct udphdr *uh, int len, unsigned long saddr, unsigned long daddr) +{ + unsigned long sum; + + __asm__( "\t addl %%ecx,%%ebx\n" + "\t adcl %%edx,%%ebx\n" + "\t adcl $0, %%ebx\n" + : "=b"(sum) + : "0"(daddr), "c"(saddr), "d"((ntohs(len) << 16) + IPPROTO_UDP*256) + : "cx","bx","dx" ); + + if (len > 3) + { + __asm__("\tclc\n" + "1:\n" + "\t lodsl\n" + "\t adcl %%eax, %%ebx\n" + "\t loop 1b\n" + "\t adcl $0, %%ebx\n" + : "=b"(sum) , "=S"(uh) + : "0"(sum), "c"(len/4) ,"1"(uh) + : "ax", "cx", "bx", "si" ); + } + + /* + * Convert from 32 bits to 16 bits. + */ + + __asm__("\t movl %%ebx, %%ecx\n" + "\t shrl $16,%%ecx\n" + "\t addw %%cx, %%bx\n" + "\t adcw $0, %%bx\n" + : "=b"(sum) + : "0"(sum) + : "bx", "cx"); + + /* + * Check for an extra word. + */ + + if ((len & 2) != 0) + { + __asm__("\t lodsw\n" + "\t addw %%ax,%%bx\n" + "\t adcw $0, %%bx\n" + : "=b"(sum), "=S"(uh) + : "0"(sum) ,"1"(uh) + : "si", "ax", "bx"); + } + + /* + * Now check for the extra byte. + */ + + if ((len & 1) != 0) + { + __asm__("\t lodsb\n" + "\t movb $0,%%ah\n" + "\t addw %%ax,%%bx\n" + "\t adcw $0, %%bx\n" + : "=b"(sum) + : "0"(sum) ,"S"(uh) + : "si", "ax", "bx"); + } + + /* + * We only want the bottom 16 bits, but we never cleared the top 16. + */ + + return((~sum) & 0xffff); +} diff --git a/pfinet/linux-inet/udp.h b/pfinet/linux-inet/udp.h new file mode 100644 index 00000000..6bfbb3cb --- /dev/null +++ b/pfinet/linux-inet/udp.h @@ -0,0 +1,50 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Definitions for the UDP module. + * + * Version: @(#)udp.h 1.0.2 05/07/93 + * + * Authors: Ross Biro, <bir7@leland.Stanford.Edu> + * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> + * + * Fixes: + * Alan Cox : Turned on udp checksums. I don't want to + * chase 'memory corruption' bugs that aren't! + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _UDP_H +#define _UDP_H + +#include <linux/udp.h> + + +#define UDP_NO_CHECK 0 + + +extern struct proto udp_prot; + + +extern void udp_err(int err, unsigned char *header, unsigned long daddr, + unsigned long saddr, struct inet_protocol *protocol); +extern int udp_recvfrom(struct sock *sk, unsigned char *to, + int len, int noblock, unsigned flags, + struct sockaddr_in *sin, int *addr_len); +extern int udp_read(struct sock *sk, unsigned char *buff, + int len, int noblock, unsigned flags); +extern int udp_connect(struct sock *sk, + struct sockaddr_in *usin, int addr_len); +extern int udp_rcv(struct sk_buff *skb, struct device *dev, + struct options *opt, unsigned long daddr, + unsigned short len, unsigned long saddr, int redo, + struct inet_protocol *protocol); +extern int udp_ioctl(struct sock *sk, int cmd, unsigned long arg); + + +#endif /* _UDP_H */ diff --git a/pfinet/linux-inet/utils.c b/pfinet/linux-inet/utils.c new file mode 100644 index 00000000..60bbb9f8 --- /dev/null +++ b/pfinet/linux-inet/utils.c @@ -0,0 +1,91 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Various kernel-resident INET utility functions; mainly + * for format conversion and debugging output. + * + * Version: @(#)utils.c 1.0.7 05/18/93 + * + * Author: Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> + * + * Fixes: + * Alan Cox : verify_area check. + * Alan Cox : removed old debugging. + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <asm/segment.h> +#include <asm/system.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/string.h> +#include <linux/mm.h> +#include <linux/socket.h> +#include <linux/in.h> +#include <linux/errno.h> +#include <linux/stat.h> +#include <stdarg.h> +#include <linux/inet.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include "ip.h" +#include "protocol.h" +#include "tcp.h" +#include <linux/skbuff.h> + + +/* + * Display an IP address in readable format. + */ + +char *in_ntoa(unsigned long in) +{ + static char buff[18]; + char *p; + + p = (char *) ∈ + sprintf(buff, "%d.%d.%d.%d", + (p[0] & 255), (p[1] & 255), (p[2] & 255), (p[3] & 255)); + return(buff); +} + + +/* + * Convert an ASCII string to binary IP. + */ + +unsigned long in_aton(char *str) +{ + unsigned long l; + unsigned int val; + int i; + + l = 0; + for (i = 0; i < 4; i++) + { + l <<= 8; + if (*str != '\0') + { + val = 0; + while (*str != '\0' && *str != '.') + { + val *= 10; + val += *str - '0'; + str++; + } + l |= val; + if (*str != '\0') + str++; + } + } + return(htonl(l)); +} + diff --git a/pfinet/linux/autoconf.h b/pfinet/linux/autoconf.h new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/pfinet/linux/autoconf.h diff --git a/pfinet/linux/config.h b/pfinet/linux/config.h new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/pfinet/linux/config.h diff --git a/pfinet/linux/errno.h b/pfinet/linux/errno.h new file mode 100644 index 00000000..7dab9bab --- /dev/null +++ b/pfinet/linux/errno.h @@ -0,0 +1,8 @@ +#ifndef _HACK_ERRNO_H +#define _HACK_ERRNO_H + +#include <errno.h> + +#define ERESTARTSYS EINTR + +#endif diff --git a/pfinet/linux/etherdevice.h b/pfinet/linux/etherdevice.h new file mode 100644 index 00000000..41073fcb --- /dev/null +++ b/pfinet/linux/etherdevice.h @@ -0,0 +1,41 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. NET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Definitions for the Ethernet handlers. + * + * Version: @(#)eth.h 1.0.4 05/13/93 + * + * Authors: Ross Biro, <bir7@leland.Stanford.Edu> + * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> + * + * Relocated to include/linux where it belongs by Alan Cox + * <gw4pts@gw4pts.ampr.org> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * WARNING: This move may well be temporary. This file will get merged with others RSN. + * + */ +#ifndef _LINUX_ETHERDEVICE_H +#define _LINUX_ETHERDEVICE_H + + +#include <linux/if_ether.h> + +#ifdef __KERNEL__ +extern int eth_header(unsigned char *buff, struct device *dev, + unsigned short type, void *daddr, + void *saddr, unsigned len, + struct sk_buff *skb); +extern int eth_rebuild_header(void *buff, struct device *dev, + unsigned long raddr, struct sk_buff *skb); +extern unsigned short eth_type_trans(struct sk_buff *skb, struct device *dev); + +#endif + +#endif /* _LINUX_ETHERDEVICE_H */ diff --git a/pfinet/linux/fcntl.h b/pfinet/linux/fcntl.h new file mode 100644 index 00000000..cd304557 --- /dev/null +++ b/pfinet/linux/fcntl.h @@ -0,0 +1 @@ +#include <fcntl.h> diff --git a/pfinet/linux/icmp.h b/pfinet/linux/icmp.h new file mode 100644 index 00000000..334c756d --- /dev/null +++ b/pfinet/linux/icmp.h @@ -0,0 +1,81 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Definitions for the ICMP protocol. + * + * Version: @(#)icmp.h 1.0.3 04/28/93 + * + * Author: Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _LINUX_ICMP_H +#define _LINUX_ICMP_H + +#define ICMP_ECHOREPLY 0 /* Echo Reply */ +#define ICMP_DEST_UNREACH 3 /* Destination Unreachable */ +#define ICMP_SOURCE_QUENCH 4 /* Source Quench */ +#define ICMP_REDIRECT 5 /* Redirect (change route) */ +#define ICMP_ECHO 8 /* Echo Request */ +#define ICMP_TIME_EXCEEDED 11 /* Time Exceeded */ +#define ICMP_PARAMETERPROB 12 /* Parameter Problem */ +#define ICMP_TIMESTAMP 13 /* Timestamp Request */ +#define ICMP_TIMESTAMPREPLY 14 /* Timestamp Reply */ +#define ICMP_INFO_REQUEST 15 /* Information Request */ +#define ICMP_INFO_REPLY 16 /* Information Reply */ +#define ICMP_ADDRESS 17 /* Address Mask Request */ +#define ICMP_ADDRESSREPLY 18 /* Address Mask Reply */ + + +/* Codes for UNREACH. */ +#define ICMP_NET_UNREACH 0 /* Network Unreachable */ +#define ICMP_HOST_UNREACH 1 /* Host Unreachable */ +#define ICMP_PROT_UNREACH 2 /* Protocol Unreachable */ +#define ICMP_PORT_UNREACH 3 /* Port Unreachable */ +#define ICMP_FRAG_NEEDED 4 /* Fragmentation Needed/DF set */ +#define ICMP_SR_FAILED 5 /* Source Route failed */ +#define ICMP_NET_UNKNOWN 6 +#define ICMP_HOST_UNKNOWN 7 +#define ICMP_HOST_ISOLATED 8 +#define ICMP_NET_ANO 9 +#define ICMP_HOST_ANO 10 +#define ICMP_NET_UNR_TOS 11 +#define ICMP_HOST_UNR_TOS 12 + +/* Codes for REDIRECT. */ +#define ICMP_REDIR_NET 0 /* Redirect Net */ +#define ICMP_REDIR_HOST 1 /* Redirect Host */ +#define ICMP_REDIR_NETTOS 2 /* Redirect Net for TOS */ +#define ICMP_REDIR_HOSTTOS 3 /* Redirect Host for TOS */ + +/* Codes for TIME_EXCEEDED. */ +#define ICMP_EXC_TTL 0 /* TTL count exceeded */ +#define ICMP_EXC_FRAGTIME 1 /* Fragment Reass time exceeded */ + + +struct icmphdr { + unsigned char type; + unsigned char code; + unsigned short checksum; + union { + struct { + unsigned short id; + unsigned short sequence; + } echo; + unsigned long gateway; + } un; +}; + + +struct icmp_err { + int error; + unsigned fatal:1; +}; + + +#endif /* _LINUX_ICMP_H */ diff --git a/pfinet/linux/if.h b/pfinet/linux/if.h new file mode 100644 index 00000000..cb6b4e05 --- /dev/null +++ b/pfinet/linux/if.h @@ -0,0 +1,156 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Global definitions for the INET interface module. + * + * Version: @(#)if.h 1.0.2 04/18/93 + * + * Authors: Original taken from Berkeley UNIX 4.3, (c) UCB 1982-1988 + * Ross Biro, <bir7@leland.Stanford.Edu> + * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _LINUX_IF_H +#define _LINUX_IF_H + +#include <linux/types.h> /* for "caddr_t" et al */ +#include <linux/socket.h> /* for "struct sockaddr" et al */ + +/* Standard interface flags. */ +#define IFF_UP 0x1 /* interface is up */ +#define IFF_BROADCAST 0x2 /* broadcast address valid */ +#define IFF_DEBUG 0x4 /* turn on debugging */ +#define IFF_LOOPBACK 0x8 /* is a loopback net */ +#define IFF_POINTOPOINT 0x10 /* interface is has p-p link */ +#define IFF_NOTRAILERS 0x20 /* avoid use of trailers */ +#define IFF_RUNNING 0x40 /* resources allocated */ +#define IFF_NOARP 0x80 /* no ARP protocol */ +#define IFF_PROMISC 0x100 /* receive all packets */ +/* Not supported */ +#define IFF_ALLMULTI 0x200 /* receive all multicast packets*/ + +#define IFF_MASTER 0x400 /* master of a load balancer */ +#define IFF_SLAVE 0x800 /* slave of a load balancer */ + +#define IFF_MULTICAST 0x1000 /* Supports multicast */ + +/* + * The ifaddr structure contains information about one address + * of an interface. They are maintained by the different address + * families, are allocated and attached when an address is set, + * and are linked together so all addresses for an interface can + * be located. + */ + +struct ifaddr +{ + struct sockaddr ifa_addr; /* address of interface */ + union { + struct sockaddr ifu_broadaddr; + struct sockaddr ifu_dstaddr; + } ifa_ifu; + struct iface *ifa_ifp; /* back-pointer to interface */ + struct ifaddr *ifa_next; /* next address for interface */ +}; + +#define ifa_broadaddr ifa_ifu.ifu_broadaddr /* broadcast address */ +#define ifa_dstaddr ifa_ifu.ifu_dstaddr /* other end of link */ + +/* + * Device mapping structure. I'd just gone off and designed a + * beautiful scheme using only loadable modules with arguments + * for driver options and along come the PCMCIA people 8) + * + * Ah well. The get() side of this is good for WDSETUP, and it'll + * be handy for debugging things. The set side is fine for now and + * being very small might be worth keeping for clean configuration. + */ + +struct ifmap +{ + unsigned long mem_start; + unsigned long mem_end; + unsigned short base_addr; + unsigned char irq; + unsigned char dma; + unsigned char port; + /* 3 bytes spare */ +}; + +/* + * Interface request structure used for socket + * ioctl's. All interface ioctl's must have parameter + * definitions which begin with ifr_name. The + * remainder may be interface specific. + */ + +struct ifreq +{ +#define IFHWADDRLEN 6 +#define IFNAMSIZ 16 + union + { + char ifrn_name[IFNAMSIZ]; /* if name, e.g. "en0" */ + char ifrn_hwaddr[IFHWADDRLEN]; /* Obsolete */ + } ifr_ifrn; + + union { + struct sockaddr ifru_addr; + struct sockaddr ifru_dstaddr; + struct sockaddr ifru_broadaddr; + struct sockaddr ifru_netmask; + struct sockaddr ifru_hwaddr; + short ifru_flags; + int ifru_metric; + int ifru_mtu; + struct ifmap ifru_map; + char ifru_slave[IFNAMSIZ]; /* Just fits the size */ + caddr_t ifru_data; + } ifr_ifru; +}; + +#define ifr_name ifr_ifrn.ifrn_name /* interface name */ +#define old_ifr_hwaddr ifr_ifrn.ifrn_hwaddr /* interface hardware */ +#define ifr_hwaddr ifr_ifru.ifru_hwaddr /* MAC address */ +#define ifr_addr ifr_ifru.ifru_addr /* address */ +#define ifr_dstaddr ifr_ifru.ifru_dstaddr /* other end of p-p lnk */ +#define ifr_broadaddr ifr_ifru.ifru_broadaddr /* broadcast address */ +#define ifr_netmask ifr_ifru.ifru_netmask /* interface net mask */ +#define ifr_flags ifr_ifru.ifru_flags /* flags */ +#define ifr_metric ifr_ifru.ifru_metric /* metric */ +#define ifr_mtu ifr_ifru.ifru_mtu /* mtu */ +#define ifr_map ifr_ifru.ifru_map /* device map */ +#define ifr_slave ifr_ifru.ifru_slave /* slave device */ +#define ifr_data ifr_ifru.ifru_data /* for use by interface */ + +/* + * Structure used in SIOCGIFCONF request. + * Used to retrieve interface configuration + * for machine (useful for programs which + * must know all networks accessible). + */ + +struct ifconf +{ + int ifc_len; /* size of buffer */ + union + { + caddr_t ifcu_buf; + struct ifreq *ifcu_req; + } ifc_ifcu; +}; +#define ifc_buf ifc_ifcu.ifcu_buf /* buffer address */ +#define ifc_req ifc_ifcu.ifcu_req /* array of structures */ + + +/* BSD UNIX expects to find these here, so here we go: */ +#include <linux/if_arp.h> +#include <linux/route.h> + +#endif /* _NET_IF_H */ diff --git a/pfinet/linux/if_arp.h b/pfinet/linux/if_arp.h new file mode 100644 index 00000000..75f86b61 --- /dev/null +++ b/pfinet/linux/if_arp.h @@ -0,0 +1,91 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Global definitions for the ARP (RFC 826) protocol. + * + * Version: @(#)if_arp.h 1.0.1 04/16/93 + * + * Authors: Original taken from Berkeley UNIX 4.3, (c) UCB 1986-1988 + * Portions taken from the KA9Q/NOS (v2.00m PA0GRI) source. + * Ross Biro, <bir7@leland.Stanford.Edu> + * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> + * Florian La Roche. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _LINUX_IF_ARP_H +#define _LINUX_IF_ARP_H + +/* ARP protocol HARDWARE identifiers. */ +#define ARPHRD_NETROM 0 /* from KA9Q: NET/ROM pseudo */ +#define ARPHRD_ETHER 1 /* Ethernet 10Mbps */ +#define ARPHRD_EETHER 2 /* Experimental Ethernet */ +#define ARPHRD_AX25 3 /* AX.25 Level 2 */ +#define ARPHRD_PRONET 4 /* PROnet token ring */ +#define ARPHRD_CHAOS 5 /* Chaosnet */ +#define ARPHRD_IEEE802 6 /* IEEE 802.2 Ethernet- huh? */ +#define ARPHRD_ARCNET 7 /* ARCnet */ +#define ARPHRD_APPLETLK 8 /* APPLEtalk */ +/* Dummy types for non ARP hardware */ +#define ARPHRD_SLIP 256 +#define ARPHRD_CSLIP 257 +#define ARPHRD_SLIP6 258 +#define ARPHRD_CSLIP6 259 +#define ARPHRD_RSRVD 260 /* Notional KISS type */ +#define ARPHRD_ADAPT 264 +#define ARPHRD_PPP 512 +#define ARPHRD_TUNNEL 768 /* IPIP tunnel */ + +/* ARP protocol opcodes. */ +#define ARPOP_REQUEST 1 /* ARP request */ +#define ARPOP_REPLY 2 /* ARP reply */ +#define ARPOP_RREQUEST 3 /* RARP request */ +#define ARPOP_RREPLY 4 /* RARP reply */ + + +/* ARP ioctl request. */ +struct arpreq { + struct sockaddr arp_pa; /* protocol address */ + struct sockaddr arp_ha; /* hardware address */ + int arp_flags; /* flags */ + struct sockaddr arp_netmask; /* netmask (only for proxy arps) */ +}; + +/* ARP Flag values. */ +#define ATF_COM 0x02 /* completed entry (ha valid) */ +#define ATF_PERM 0x04 /* permanent entry */ +#define ATF_PUBL 0x08 /* publish entry */ +#define ATF_USETRAILERS 0x10 /* has requested trailers */ +#define ATF_NETMASK 0x20 /* want to use a netmask (only + for proxy entries) */ + +/* + * This structure defines an ethernet arp header. + */ + +struct arphdr +{ + unsigned short ar_hrd; /* format of hardware address */ + unsigned short ar_pro; /* format of protocol address */ + unsigned char ar_hln; /* length of hardware address */ + unsigned char ar_pln; /* length of protocol address */ + unsigned short ar_op; /* ARP opcode (command) */ + +#if 0 + /* + * Ethernet looks like this : This bit is variable sized however... + */ + unsigned char ar_sha[ETH_ALEN]; /* sender hardware address */ + unsigned char ar_sip[4]; /* sender IP address */ + unsigned char ar_tha[ETH_ALEN]; /* target hardware address */ + unsigned char ar_tip[4]; /* target IP address */ +#endif + +}; + +#endif /* _LINUX_IF_ARP_H */ diff --git a/pfinet/linux/if_ether.h b/pfinet/linux/if_ether.h new file mode 100644 index 00000000..b87b1785 --- /dev/null +++ b/pfinet/linux/if_ether.h @@ -0,0 +1,80 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Global definitions for the Ethernet IEEE 802.3 interface. + * + * Version: @(#)if_ether.h 1.0.1a 02/08/94 + * + * Author: Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> + * Donald Becker, <becker@super.org> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _LINUX_IF_ETHER_H +#define _LINUX_IF_ETHER_H + + +/* IEEE 802.3 Ethernet magic constants. The frame sizes omit the preamble + and FCS/CRC (frame check sequence). */ +#define ETH_ALEN 6 /* Octets in one ethernet addr */ +#define ETH_HLEN 14 /* Total octets in header. */ +#define ETH_ZLEN 60 /* Min. octets in frame sans FCS */ +#define ETH_DATA_LEN 1500 /* Max. octets in payload */ +#define ETH_FRAME_LEN 1514 /* Max. octets in frame sans FCS */ + + +/* These are the defined Ethernet Protocol ID's. */ +#define ETH_P_LOOP 0x0060 /* Ethernet Loopback packet */ +#define ETH_P_ECHO 0x0200 /* Ethernet Echo packet */ +#define ETH_P_PUP 0x0400 /* Xerox PUP packet */ +#define ETH_P_IP 0x0800 /* Internet Protocol packet */ +#define ETH_P_ARP 0x0806 /* Address Resolution packet */ +#define ETH_P_RARP 0x8035 /* Reverse Addr Res packet */ +#define ETH_P_X25 0x0805 /* CCITT X.25 */ +#define ETH_P_ATALK 0x809B /* Appletalk DDP */ +#define ETH_P_IPX 0x8137 /* IPX over DIX */ +#define ETH_P_802_3 0x0001 /* Dummy type for 802.3 frames */ +#define ETH_P_AX25 0x0002 /* Dummy protocol id for AX.25 */ +#define ETH_P_ALL 0x0003 /* Every packet (be careful!!!) */ +#define ETH_P_802_2 0x0004 /* 802.2 frames */ +#define ETH_P_SNAP 0x0005 /* Internal only */ +/* This is an Ethernet frame header. */ +struct ethhdr { + unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ + unsigned char h_source[ETH_ALEN]; /* source ether addr */ + unsigned short h_proto; /* packet type ID field */ +}; + +/* Ethernet statistics collection data. */ +struct enet_statistics{ + int rx_packets; /* total packets received */ + int tx_packets; /* total packets transmitted */ + int rx_errors; /* bad packets received */ + int tx_errors; /* packet transmit problems */ + int rx_dropped; /* no space in linux buffers */ + int tx_dropped; /* no space available in linux */ + int multicast; /* multicast packets received */ + int collisions; + + /* detailed rx_errors: */ + int rx_length_errors; + int rx_over_errors; /* receiver ring buff overflow */ + int rx_crc_errors; /* recved pkt with crc error */ + int rx_frame_errors; /* recv'd frame alignment error */ + int rx_fifo_errors; /* recv'r fifo overrun */ + int rx_missed_errors; /* receiver missed packet */ + + /* detailed tx_errors */ + int tx_aborted_errors; + int tx_carrier_errors; + int tx_fifo_errors; + int tx_heartbeat_errors; + int tx_window_errors; +}; + +#endif /* _LINUX_IF_ETHER_H */ diff --git a/pfinet/linux/igmp.h b/pfinet/linux/igmp.h new file mode 100644 index 00000000..6ca7e019 --- /dev/null +++ b/pfinet/linux/igmp.h @@ -0,0 +1,67 @@ +/* + * Linux NET3: Internet Gateway Management Protocol [IGMP] + * + * Authors: + * Alan Cox <Alan.Cox@linux.org> + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _LINUX_IGMP_H +#define _LINUX_IGMP_H + +/* + * IGMP protocol structures + */ + +struct igmphdr +{ + unsigned char type; + unsigned char unused; + unsigned short csum; + unsigned long group; +}; + +#define IGMP_HOST_MEMBERSHIP_QUERY 0x11 /* From RFC1112 */ +#define IGMP_HOST_MEMBERSHIP_REPORT 0x12 /* Ditto */ +#define IGMP_HOST_LEAVE_MESSAGE 0x17 /* An extra BSD seems to send */ + + /* 224.0.0.1 */ +#define IGMP_ALL_HOSTS htonl(0xE0000001L) + +/* + * struct for keeping the multicast list in + */ + +#ifdef __KERNEL__ +struct ip_mc_socklist +{ + unsigned long multiaddr[IP_MAX_MEMBERSHIPS]; /* This is a speed trade off */ + struct device *multidev[IP_MAX_MEMBERSHIPS]; +}; + +struct ip_mc_list +{ + struct device *interface; + unsigned long multiaddr; + struct ip_mc_list *next; + struct timer_list timer; + int tm_running; + int users; +}; + +extern struct ip_mc_list *ip_mc_head; + + +extern int igmp_rcv(struct sk_buff *, struct device *, struct options *, unsigned long, unsigned short, + unsigned long, int , struct inet_protocol *); +extern void ip_mc_drop_device(struct device *dev); +extern int ip_mc_join_group(struct sock *sk, struct device *dev, unsigned long addr); +extern int ip_mc_leave_group(struct sock *sk, struct device *dev,unsigned long addr); +extern void ip_mc_drop_socket(struct sock *sk); +#endif +#endif diff --git a/pfinet/linux/in.h b/pfinet/linux/in.h new file mode 100644 index 00000000..260020a6 --- /dev/null +++ b/pfinet/linux/in.h @@ -0,0 +1 @@ +#include <netinet/in.h> diff --git a/pfinet/linux/inet.h b/pfinet/linux/inet.h new file mode 100644 index 00000000..27aa5521 --- /dev/null +++ b/pfinet/linux/inet.h @@ -0,0 +1,6 @@ +#ifndef _HACK_INET_H_ +#define _HACK_INET_H_ + +char *in_ntoa (u_long); + +#endif diff --git a/pfinet/linux/interrupt.h b/pfinet/linux/interrupt.h new file mode 100644 index 00000000..01f11a8c --- /dev/null +++ b/pfinet/linux/interrupt.h @@ -0,0 +1,11 @@ +#ifndef _HACK_INTERRUPT_H_ +#define _HACK_INTERRUPT_H_ + +#include <linux/netdevice.h> +#include "pfinet.h" + +#define NET_BH 1 + +extern void mark_bh (int); + +#endif diff --git a/pfinet/linux/ip.h b/pfinet/linux/ip.h new file mode 100644 index 00000000..bcc1bdea --- /dev/null +++ b/pfinet/linux/ip.h @@ -0,0 +1,121 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Definitions for the IP protocol. + * + * Version: @(#)ip.h 1.0.2 04/28/93 + * + * Authors: Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _LINUX_IP_H +#define _LINUX_IP_H + + +#define IPOPT_END 0 +#define IPOPT_NOOP 1 +#define IPOPT_SEC 130 +#define IPOPT_LSRR 131 +#define IPOPT_SSRR 137 +#define IPOPT_RR 7 +#define IPOPT_SID 136 +#define IPOPT_TIMESTAMP 68 + + +#define MAXTTL 255 + +struct timestamp { + __u8 len; + __u8 ptr; + union { +#if defined(__i386__) + __u8 flags:4, + overflow:4; +#elif defined(__mc68000__) + __u8 overflow:4, + flags:4; +#elif defined(__MIPSEL__) + __u8 flags:4, + overflow:4; +#elif defined(__MIPSEB__) + __u8 overflow:4, + flags:4; +#elif defined(__alpha__) + __u8 flags:4, + overflow:4; +#elif defined(__sparc__) + __u8 overflow:4, + flags:4; +#else +#error "Adjust this structure to match your CPU" +#endif + __u8 full_char; + } x; + __u32 data[9]; +}; + + +#define MAX_ROUTE 16 + +struct route { + char route_size; + char pointer; + unsigned long route[MAX_ROUTE]; +}; + + +struct options { + struct route record_route; + struct route loose_route; + struct route strict_route; + struct timestamp tstamp; + unsigned short security; + unsigned short compartment; + unsigned short handling; + unsigned short stream; + unsigned tcc; +}; + + +struct iphdr { +#if defined(__i386__) + __u8 ihl:4, + version:4; +#elif defined (__mc68000__) + __u8 version:4, + ihl:4; +#elif defined(__MIPSEL__) + __u8 ihl:4, + version:4; +#elif defined(__MIPSEB__) + __u8 version:4, + ihl:4; +#elif defined(__alpha__) + __u8 ihl:4, + version:4; +#elif defined (__sparc__) + __u8 version:4, + ihl:4; +#else +#error "Adjust this structure to match your CPU" +#endif + __u8 tos; + __u16 tot_len; + __u16 id; + __u16 frag_off; + __u8 ttl; + __u8 protocol; + __u16 check; + __u32 saddr; + __u32 daddr; + /*The options start here. */ +}; + + +#endif /* _LINUX_IP_H */ diff --git a/pfinet/linux/ip_fw.h b/pfinet/linux/ip_fw.h new file mode 100644 index 00000000..f80cccbf --- /dev/null +++ b/pfinet/linux/ip_fw.h @@ -0,0 +1,147 @@ +/* + * IP firewalling code. This is taken from 4.4BSD. Please note the + * copyright message below. As per the GPL it must be maintained + * and the licenses thus do not conflict. While this port is subject + * to the GPL I also place my modifications under the original + * license in recognition of the original copyright. + * + * Ported from BSD to Linux, + * Alan Cox 22/Nov/1994. + * Merged and included the FreeBSD-Current changes at Ugen's request + * (but hey it's a lot cleaner now). Ugen would prefer in some ways + * we waited for his final product but since Linux 1.2.0 is about to + * appear it's not practical - Read: It works, it's not clean but please + * don't consider it to be his standard of finished work. + * Alan. + * + * All the real work was done by ..... + */ + +/* + * Copyright (c) 1993 Daniel Boulet + * Copyright (c) 1994 Ugen J.S.Antsilevich + * + * Redistribution and use in source forms, with and without modification, + * are permitted provided that this entire comment appears intact. + * + * Redistribution in binary form may occur without any restrictions. + * Obviously, it would be nice if you gave credit where credit is due + * but requiring it would be too onerous. + * + * This software is provided ``AS IS'' without any warranties of any kind. + */ + +/* + * Format of an IP firewall descriptor + * + * src, dst, src_mask, dst_mask are always stored in network byte order. + * flags and num_*_ports are stored in host byte order (of course). + * Port numbers are stored in HOST byte order. + */ + +#ifndef _IP_FW_H +#define _IP_FW_H + +struct ip_fw +{ + struct ip_fw *fw_next; /* Next firewall on chain */ + struct in_addr fw_src, fw_dst; /* Source and destination IP addr */ + struct in_addr fw_smsk, fw_dmsk; /* Mask for src and dest IP addr */ + struct in_addr fw_via; /* IP address of interface "via" */ + unsigned short fw_flg; /* Flags word */ + unsigned short fw_nsp, fw_ndp; /* N'of src ports and # of dst ports */ + /* in ports array (dst ports follow */ + /* src ports; max of 10 ports in all; */ + /* count of 0 means match all ports) */ +#define IP_FW_MAX_PORTS 10 /* A reasonable maximum */ + unsigned short fw_pts[IP_FW_MAX_PORTS]; /* Array of port numbers to match */ + unsigned long fw_pcnt,fw_bcnt; /* Packet and byte counters */ +}; + +/* + * Values for "flags" field . + */ + +#define IP_FW_F_ALL 0x000 /* This is a universal packet firewall*/ +#define IP_FW_F_TCP 0x001 /* This is a TCP packet firewall */ +#define IP_FW_F_UDP 0x002 /* This is a UDP packet firewall */ +#define IP_FW_F_ICMP 0x003 /* This is a ICMP packet firewall */ +#define IP_FW_F_KIND 0x003 /* Mask to isolate firewall kind */ +#define IP_FW_F_ACCEPT 0x004 /* This is an accept firewall (as * + * opposed to a deny firewall)* + * */ +#define IP_FW_F_SRNG 0x008 /* The first two src ports are a min * + * and max range (stored in host byte * + * order). * + * */ +#define IP_FW_F_DRNG 0x010 /* The first two dst ports are a min * + * and max range (stored in host byte * + * order). * + * (ports[0] <= port <= ports[1]) * + * */ +#define IP_FW_F_PRN 0x020 /* In verbose mode print this firewall*/ +#define IP_FW_F_BIDIR 0x040 /* For bidirectional firewalls */ +#define IP_FW_F_TCPSYN 0x080 /* For tcp packets-check SYN only */ +#define IP_FW_F_ICMPRPL 0x100 /* Send back icmp unreachable packet */ +#define IP_FW_F_MASK 0x1FF /* All possible flag bits mask */ + +/* + * New IP firewall options for [gs]etsockopt at the RAW IP level. + * Unlike BSD Linux inherits IP options so you don't have to use + * a raw socket for this. Instead we check rights in the calls. + */ + +#define IP_FW_BASE_CTL 64 + +#define IP_FW_ADD_BLK (IP_FW_BASE_CTL) +#define IP_FW_ADD_FWD (IP_FW_BASE_CTL+1) +#define IP_FW_CHK_BLK (IP_FW_BASE_CTL+2) +#define IP_FW_CHK_FWD (IP_FW_BASE_CTL+3) +#define IP_FW_DEL_BLK (IP_FW_BASE_CTL+4) +#define IP_FW_DEL_FWD (IP_FW_BASE_CTL+5) +#define IP_FW_FLUSH_BLK (IP_FW_BASE_CTL+6) +#define IP_FW_FLUSH_FWD (IP_FW_BASE_CTL+7) +#define IP_FW_ZERO_BLK (IP_FW_BASE_CTL+8) +#define IP_FW_ZERO_FWD (IP_FW_BASE_CTL+9) +#define IP_FW_POLICY_BLK (IP_FW_BASE_CTL+10) +#define IP_FW_POLICY_FWD (IP_FW_BASE_CTL+11) + +#define IP_ACCT_ADD (IP_FW_BASE_CTL+16) +#define IP_ACCT_DEL (IP_FW_BASE_CTL+17) +#define IP_ACCT_FLUSH (IP_FW_BASE_CTL+18) +#define IP_ACCT_ZERO (IP_FW_BASE_CTL+19) + +struct ip_fwpkt +{ + struct iphdr fwp_iph; /* IP header */ + union { + struct tcphdr fwp_tcph; /* TCP header or */ + struct udphdr fwp_udph; /* UDP header */ + } fwp_protoh; + struct in_addr fwp_via; /* interface address */ +}; + +/* + * Main firewall chains definitions and global var's definitions. + */ + +#ifdef __KERNEL__ + +#include <linux/config.h> + +#ifdef CONFIG_IP_FIREWALL +extern struct ip_fw *ip_fw_blk_chain; +extern struct ip_fw *ip_fw_fwd_chain; +extern int ip_fw_blk_policy; +extern int ip_fw_fwd_policy; +extern int ip_fw_ctl(int, void *, int); +#endif +#ifdef CONFIG_IP_ACCT +extern struct ip_fw *ip_acct_chain; +extern void ip_acct_cnt(struct iphdr *, struct device *, struct ip_fw *); +extern int ip_acct_ctl(int, void *, int); +#endif +extern int ip_fw_chk(struct iphdr *, struct device *rif,struct ip_fw *, int, int); +#endif /* KERNEL */ + +#endif /* _IP_FW_H */ diff --git a/pfinet/linux/ipx.h b/pfinet/linux/ipx.h new file mode 100644 index 00000000..d3bff83b --- /dev/null +++ b/pfinet/linux/ipx.h @@ -0,0 +1,78 @@ +#ifndef _IPX_H_ +#define _IPX_H_ +#include <linux/sockios.h> +#define IPX_NODE_LEN 6 +#define IPX_MTU 576 + +struct sockaddr_ipx +{ + short sipx_family; + short sipx_port; + unsigned long sipx_network; + unsigned char sipx_node[IPX_NODE_LEN]; + unsigned char sipx_type; + unsigned char sipx_zero; /* 16 byte fill */ +}; + +/* + * So we can fit the extra info for SIOCSIFADDR into the address nicely + */ + +#define sipx_special sipx_port +#define sipx_action sipx_zero +#define IPX_DLTITF 0 +#define IPX_CRTITF 1 + +typedef struct ipx_route_definition +{ + unsigned long ipx_network; + unsigned long ipx_router_network; + unsigned char ipx_router_node[IPX_NODE_LEN]; +} ipx_route_definition; + +typedef struct ipx_interface_definition +{ + unsigned long ipx_network; + unsigned char ipx_device[16]; + unsigned char ipx_dlink_type; +#define IPX_FRAME_NONE 0 +#define IPX_FRAME_SNAP 1 +#define IPX_FRAME_8022 2 +#define IPX_FRAME_ETHERII 3 +#define IPX_FRAME_8023 4 + unsigned char ipx_special; +#define IPX_SPECIAL_NONE 0 +#define IPX_PRIMARY 1 +#define IPX_INTERNAL 2 + unsigned char ipx_node[IPX_NODE_LEN]; +} ipx_interface_definition; + +typedef struct ipx_config_data +{ + unsigned char ipxcfg_auto_select_primary; + unsigned char ipxcfg_auto_create_interfaces; +} ipx_config_data; + +/* + * OLD Route Definition for backward compatibility. + */ + +struct ipx_route_def +{ + unsigned long ipx_network; + unsigned long ipx_router_network; +#define IPX_ROUTE_NO_ROUTER 0 + unsigned char ipx_router_node[IPX_NODE_LEN]; + unsigned char ipx_device[16]; + unsigned short ipx_flags; +#define IPX_RT_SNAP 8 +#define IPX_RT_8022 4 +#define IPX_RT_BLUEBOOK 2 +#define IPX_RT_ROUTED 1 +}; + +#define SIOCAIPXITFCRT (SIOCPROTOPRIVATE) +#define SIOCAIPXPRISLT (SIOCPROTOPRIVATE+1) +#define SIOCIPXCFGDATA (SIOCPROTOPRIVATE+2) +#endif + diff --git a/pfinet/linux/kernel.h b/pfinet/linux/kernel.h new file mode 100644 index 00000000..dcd5acf3 --- /dev/null +++ b/pfinet/linux/kernel.h @@ -0,0 +1,44 @@ +#ifndef _HACK_KERNEL_H +#define _HACK_KERNEL_H + +#include <stdio.h> +#include <linux/sched.h> +#include <stdlib.h> +#include <assert.h> + +#define printk printf + +extern inline int +getname (const char *name, char **newp) +{ + *newp = malloc (strlen (name) + 1); + strcpy (*newp, name); + return 0; +} + +extern inline void +putname (char *p) +{ + free (p); +} + +/* These two functions are used only to send SIGURG. But I can't + find any SIGIO code at all. So we'll just punt on that; clearly + Linux is missing the point. SIGURG should only be sent for + sockets that have explicitly requested it. */ +extern inline int +kill_proc (int pid, int signo, int priv) +{ + assert (signo == SIGURG); + return 0; +} + +extern inline int +kill_pg (int pgrp, int signo, int priv) +{ + assert (signo == SIGURG); + return 0; +} + + +#endif diff --git a/pfinet/linux/major.h b/pfinet/linux/major.h new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/pfinet/linux/major.h diff --git a/pfinet/linux/malloc.h b/pfinet/linux/malloc.h new file mode 100644 index 00000000..06930659 --- /dev/null +++ b/pfinet/linux/malloc.h @@ -0,0 +1,10 @@ +#ifndef _HACK_MALLOC_H_ +#define _HACK_MALLOC_H_ + +#include <linux/mm.h> + +#define kfree_s(a,b) (free (a)) +#define kfree(a) (free (a)) +#define kmalloc(a,b) (malloc (a)) + +#endif diff --git a/pfinet/linux/mm.h b/pfinet/linux/mm.h new file mode 100644 index 00000000..0fb18a59 --- /dev/null +++ b/pfinet/linux/mm.h @@ -0,0 +1,16 @@ +#ifndef _HACK_MM_H_ +#define _HACK_MM_H_ + +#include <linux/kernel.h> +#include <linux/sched.h> + +/* All memory addresses are presumptively valid, because they are + all internal. */ +#define verify_area(a,b,c) 0 + +#define VERIFY_READ 0 +#define VERIFY_WRITE 0 +#define GFP_ATOMIC 0 +#define GFP_KERNEL 0 + +#endif diff --git a/pfinet/linux/net.h b/pfinet/linux/net.h new file mode 100644 index 00000000..341d0253 --- /dev/null +++ b/pfinet/linux/net.h @@ -0,0 +1,146 @@ +/* + * NET An implementation of the SOCKET network access protocol. + * This is the master header file for the Linux NET layer, + * or, in plain English: the networking handling part of the + * kernel. + * + * Version: @(#)net.h 1.0.3 05/25/93 + * + * Authors: Orest Zborowski, <obz@Kodak.COM> + * Ross Biro, <bir7@leland.Stanford.Edu> + * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _LINUX_NET_H +#define _LINUX_NET_H + + +#include <linux/wait.h> +#include <linux/socket.h> + + +#define NSOCKETS 2000 /* Dynamic, this is MAX LIMIT */ +#define NSOCKETS_UNIX 128 /* unix domain static limit */ +#define NPROTO 16 /* should be enough for now.. */ + + +#define SYS_SOCKET 1 /* sys_socket(2) */ +#define SYS_BIND 2 /* sys_bind(2) */ +#define SYS_CONNECT 3 /* sys_connect(2) */ +#define SYS_LISTEN 4 /* sys_listen(2) */ +#define SYS_ACCEPT 5 /* sys_accept(2) */ +#define SYS_GETSOCKNAME 6 /* sys_getsockname(2) */ +#define SYS_GETPEERNAME 7 /* sys_getpeername(2) */ +#define SYS_SOCKETPAIR 8 /* sys_socketpair(2) */ +#define SYS_SEND 9 /* sys_send(2) */ +#define SYS_RECV 10 /* sys_recv(2) */ +#define SYS_SENDTO 11 /* sys_sendto(2) */ +#define SYS_RECVFROM 12 /* sys_recvfrom(2) */ +#define SYS_SHUTDOWN 13 /* sys_shutdown(2) */ +#define SYS_SETSOCKOPT 14 /* sys_setsockopt(2) */ +#define SYS_GETSOCKOPT 15 /* sys_getsockopt(2) */ + + +typedef enum { + SS_FREE = 0, /* not allocated */ + SS_UNCONNECTED, /* unconnected to any socket */ + SS_CONNECTING, /* in process of connecting */ + SS_CONNECTED, /* connected to socket */ + SS_DISCONNECTING /* in process of disconnecting */ +} socket_state; + +#define SO_ACCEPTCON (1<<16) /* performed a listen */ +#define SO_WAITDATA (1<<17) /* wait data to read */ +#define SO_NOSPACE (1<<18) /* no space to write */ + +#ifdef __KERNEL__ +/* + * Internal representation of a socket. not all the fields are used by + * all configurations: + * + * server client + * conn client connected to server connected to + * iconn list of clients -unused- + * awaiting connections + * wait sleep for clients, sleep for connection, + * sleep for i/o sleep for i/o + */ +struct socket { + short type; /* SOCK_STREAM, ... */ + socket_state state; + long flags; +#ifdef _HURD_ + int userflags; /* O_* */ + int refcnt; + mach_port_t identity; +#endif + struct proto_ops *ops; /* protocols do most everything */ + void *data; /* protocol data */ + struct socket *conn; /* server socket connected to */ + struct socket *iconn; /* incomplete client conn.s */ + struct socket *next; + struct wait_queue **wait; /* ptr to place to wait on */ + struct inode *inode; + struct fasync_struct *fasync_list; /* Asynchronous wake up list */ +}; + +#define SOCK_INODE(S) ((S)->inode) + +struct proto_ops { + int family; + + int (*create) (struct socket *sock, int protocol); + int (*dup) (struct socket *newsock, struct socket *oldsock); + int (*release) (struct socket *sock, struct socket *peer); + int (*bind) (struct socket *sock, struct sockaddr *umyaddr, + int sockaddr_len); + int (*connect) (struct socket *sock, struct sockaddr *uservaddr, + int sockaddr_len, int flags); + int (*socketpair) (struct socket *sock1, struct socket *sock2); + int (*accept) (struct socket *sock, struct socket *newsock, + int flags); + int (*getname) (struct socket *sock, struct sockaddr *uaddr, + int *usockaddr_len, int peer); + int (*read) (struct socket *sock, char *ubuf, int size, + int nonblock); + int (*write) (struct socket *sock, char *ubuf, int size, + int nonblock); + int (*select) (struct socket *sock, int sel_type, + select_table *wait); + int (*ioctl) (struct socket *sock, unsigned int cmd, + unsigned long arg); + int (*listen) (struct socket *sock, int len); + int (*send) (struct socket *sock, void *buff, int len, int nonblock, + unsigned flags); + int (*recv) (struct socket *sock, void *buff, int len, int nonblock, + unsigned flags); + int (*sendto) (struct socket *sock, void *buff, int len, int nonblock, + unsigned flags, struct sockaddr *, int addr_len); + int (*recvfrom) (struct socket *sock, void *buff, int len, int nonblock, + unsigned flags, struct sockaddr *, int *addr_len); + int (*shutdown) (struct socket *sock, int flags); + int (*setsockopt) (struct socket *sock, int level, int optname, + char *optval, int optlen); + int (*getsockopt) (struct socket *sock, int level, int optname, + char *optval, int *optlen); + int (*fcntl) (struct socket *sock, unsigned int cmd, + unsigned long arg); +}; + +struct net_proto { + char *name; /* Protocol name */ + void (*init_func)(struct net_proto *); /* Bootstrap */ +}; + +extern int sock_awaitconn(struct socket *mysock, struct socket *servsock, int flags); +extern int sock_wake_async(struct socket *sock, int how); +extern int sock_register(int family, struct proto_ops *ops); +extern int sock_unregister(int family); +extern struct socket *sock_alloc(void); +extern void sock_release(struct socket *sock); +#endif /* __KERNEL__ */ +#endif /* _LINUX_NET_H */ diff --git a/pfinet/linux/netdevice.h b/pfinet/linux/netdevice.h new file mode 100644 index 00000000..dcca542d --- /dev/null +++ b/pfinet/linux/netdevice.h @@ -0,0 +1,235 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Definitions for the Interfaces handler. + * + * Version: @(#)dev.h 1.0.10 08/12/93 + * + * Authors: Ross Biro, <bir7@leland.Stanford.Edu> + * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> + * Corey Minyard <wf-rch!minyard@relay.EU.net> + * Donald J. Becker, <becker@super.org> + * Alan Cox, <A.Cox@swansea.ac.uk> + * Bjorn Ekwall. <bj0rn@blox.se> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Moved to /usr/include/linux for NET3 + */ +#ifndef _LINUX_NETDEVICE_H +#define _LINUX_NETDEVICE_H + +#include <linux/if.h> +#include <linux/if_ether.h> +#include <linux/skbuff.h> + +/* for future expansion when we will have different priorities. */ +#define DEV_NUMBUFFS 3 +#define MAX_ADDR_LEN 7 +#define MAX_HEADER 18 + +#define IS_MYADDR 1 /* address is (one of) our own */ +#define IS_LOOPBACK 2 /* address is for LOOPBACK */ +#define IS_BROADCAST 3 /* address is a valid broadcast */ +#define IS_INVBCAST 4 /* Wrong netmask bcast not for us (unused)*/ +#define IS_MULTICAST 5 /* Multicast IP address */ + +/* + * We tag these structures with multicasts. + */ + +struct dev_mc_list +{ + struct dev_mc_list *next; + char dmi_addr[MAX_ADDR_LEN]; + unsigned short dmi_addrlen; + unsigned short dmi_users; +}; + +/* + * The DEVICE structure. + * Actually, this whole structure is a big mistake. It mixes I/O + * data with strictly "high-level" data, and it has to know about + * almost every data structure used in the INET module. + */ +struct device +{ + + /* + * This is the first field of the "visible" part of this structure + * (i.e. as seen by users in the "Space.c" file). It is the name + * the interface. + */ + char *name; + + /* I/O specific fields - FIXME: Merge these and struct ifmap into one */ + unsigned long rmem_end; /* shmem "recv" end */ + unsigned long rmem_start; /* shmem "recv" start */ + unsigned long mem_end; /* sahared mem end */ + unsigned long mem_start; /* shared mem start */ + unsigned long base_addr; /* device I/O address */ + unsigned char irq; /* device IRQ number */ + + /* Low-level status flags. */ + volatile unsigned char start, /* start an operation */ + tbusy, /* transmitter busy */ + interrupt; /* interrupt arrived */ + + struct device *next; + + /* The device initialization function. Called only once. */ + int (*init)(struct device *dev); + + /* Some hardware also needs these fields, but they are not part of the + usual set specified in Space.c. */ + unsigned char if_port; /* Selectable AUI, TP,..*/ + unsigned char dma; /* DMA channel */ + + struct enet_statistics* (*get_stats)(struct device *dev); + + /* + * This marks the end of the "visible" part of the structure. All + * fields hereafter are internal to the system, and may change at + * will (read: may be cleaned up at will). + */ + + /* These may be needed for future network-power-down code. */ + unsigned long trans_start; /* Time (in jiffies) of last Tx */ + unsigned long last_rx; /* Time of last Rx */ + + unsigned short flags; /* interface flags (a la BSD) */ + unsigned short family; /* address family ID (AF_INET) */ + unsigned short metric; /* routing metric (not used) */ + unsigned short mtu; /* interface MTU value */ + unsigned short type; /* interface hardware type */ + unsigned short hard_header_len; /* hardware hdr length */ + void *priv; /* pointer to private data */ + + /* Interface address info. */ + unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */ + unsigned char dev_addr[MAX_ADDR_LEN]; /* hw address */ + unsigned char addr_len; /* hardware address length */ + unsigned long pa_addr; /* protocol address */ + unsigned long pa_brdaddr; /* protocol broadcast addr */ + unsigned long pa_dstaddr; /* protocol P-P other side addr */ + unsigned long pa_mask; /* protocol netmask */ + unsigned short pa_alen; /* protocol address length */ + + struct dev_mc_list *mc_list; /* Multicast mac addresses */ + int mc_count; /* Number of installed mcasts */ + + struct ip_mc_list *ip_mc_list; /* IP multicast filter chain */ + + /* For load balancing driver pair support */ + + unsigned long pkt_queue; /* Packets queued */ + struct device *slave; /* Slave device */ + + + /* Pointer to the interface buffers. */ + struct sk_buff_head buffs[DEV_NUMBUFFS]; + + /* Pointers to interface service routines. */ + int (*open)(struct device *dev); + int (*stop)(struct device *dev); + int (*hard_start_xmit) (struct sk_buff *skb, + struct device *dev); + int (*hard_header) (unsigned char *buff, + struct device *dev, + unsigned short type, + void *daddr, + void *saddr, + unsigned len, + struct sk_buff *skb); + int (*rebuild_header)(void *eth, struct device *dev, + unsigned long raddr, struct sk_buff *skb); + unsigned short (*type_trans) (struct sk_buff *skb, + struct device *dev); +#define HAVE_MULTICAST + void (*set_multicast_list)(struct device *dev, + int num_addrs, void *addrs); +#define HAVE_SET_MAC_ADDR + int (*set_mac_address)(struct device *dev, void *addr); +#define HAVE_PRIVATE_IOCTL + int (*do_ioctl)(struct device *dev, struct ifreq *ifr, int cmd); +#define HAVE_SET_CONFIG + int (*set_config)(struct device *dev, struct ifmap *map); + +}; + + +struct packet_type { + unsigned short type; /* This is really htons(ether_type). */ + struct device * dev; + int (*func) (struct sk_buff *, struct device *, + struct packet_type *); + void *data; + struct packet_type *next; +}; + + +#ifdef __KERNEL__ + +#include <linux/notifier.h> + +/* Used by dev_rint */ +#define IN_SKBUFF 1 + +extern volatile char in_bh; + +extern struct device loopback_dev; +extern struct device *dev_base; +extern struct packet_type *ptype_base; + + +extern int ip_addr_match(unsigned long addr1, unsigned long addr2); +extern int ip_chk_addr(unsigned long addr); +extern struct device *ip_dev_check(unsigned long daddr); +extern unsigned long ip_my_addr(void); +extern unsigned long ip_get_mask(unsigned long addr); + +extern void dev_add_pack(struct packet_type *pt); +extern void dev_remove_pack(struct packet_type *pt); +extern struct device *dev_get(char *name); +extern int dev_open(struct device *dev); +extern int dev_close(struct device *dev); +extern void dev_queue_xmit(struct sk_buff *skb, struct device *dev, + int pri); +#define HAVE_NETIF_RX 1 +extern void netif_rx(struct sk_buff *skb); +/* The old interface to netif_rx(). */ +extern int dev_rint(unsigned char *buff, long len, int flags, + struct device * dev); +extern void dev_transmit(void); +extern int in_net_bh(void); +extern void net_bh(void *tmp); +extern void dev_tint(struct device *dev); +extern int dev_get_info(char *buffer, char **start, off_t offset, int length); +extern int dev_ioctl(unsigned int cmd, void *); + +extern void dev_init(void); + +/* These functions live elsewhere (drivers/net/net_init.c, but related) */ + +extern void ether_setup(struct device *dev); +extern int ether_config(struct device *dev, struct ifmap *map); +/* Support for loadable net-drivers */ +extern int register_netdev(struct device *dev); +extern void unregister_netdev(struct device *dev); +extern int register_netdevice_notifier(struct notifier_block *nb); +extern int unregister_netdevice_notifier(struct notifier_block *nb); +/* Functions used for multicast support */ +extern void dev_mc_upload(struct device *dev); +extern void dev_mc_delete(struct device *dev, void *addr, int alen, int all); +extern void dev_mc_add(struct device *dev, void *addr, int alen, int newonly); +extern void dev_mc_discard(struct device *dev); +/* This is the wrong place but it'll do for the moment */ +extern void ip_mc_allhost(struct device *dev); +#endif /* __KERNEL__ */ + +#endif /* _LINUX_DEV_H */ diff --git a/pfinet/linux/notifier.h b/pfinet/linux/notifier.h new file mode 100644 index 00000000..78a44649 --- /dev/null +++ b/pfinet/linux/notifier.h @@ -0,0 +1,96 @@ +/* + * Routines to manage notifier chains for passing status changes to any + * interested routines. We need this instead of hard coded call lists so + * that modules can poke their nose into the innards. The network devices + * needed them so here they are for the rest of you. + * + * Alan Cox <Alan.Cox@linux.org> + */ + +#ifndef _LINUX_NOTIFIER_H +#define _LINUX_NOTIFIER_H +#include <linux/errno.h> + +struct notifier_block +{ + int (*notifier_call)(unsigned long, void *); + struct notifier_block *next; + int priority; +}; + + +#ifdef __KERNEL__ + +#define NOTIFY_DONE 0x0000 /* Don't care */ +#define NOTIFY_OK 0x0001 /* Suits me */ +#define NOTIFY_STOP_MASK 0x8000 /* Don't call further */ +#define NOTIFY_BAD (NOTIFY_STOP_MASK|0x0002) /* Bad/Veto action */ + +extern __inline__ int notifier_chain_register(struct notifier_block **list, struct notifier_block *n) +{ + while(*list) + { + if(n->priority > (*list)->priority) + break; + list= &((*list)->next); + } + n->next = *list; + *list=n; + return 0; +} + +/* + * Warning to any non GPL module writers out there.. these functions are + * GPL'd + */ + +extern __inline__ int notifier_chain_unregister(struct notifier_block **nl, struct notifier_block *n) +{ + while((*nl)!=NULL) + { + if((*nl)==n) + { + *nl=n->next; + return 0; + } + nl=&((*nl)->next); + } + return -ENOENT; +} + +/* + * This is one of these things that is generally shorter inline + */ + +extern __inline__ int notifier_call_chain(struct notifier_block **n, unsigned long val, void *v) +{ + int ret=NOTIFY_DONE; + struct notifier_block *nb = *n; + while(nb) + { + ret=nb->notifier_call(val,v); + if(ret&NOTIFY_STOP_MASK) + return ret; + nb=nb->next; + } + return ret; +} + + +/* + * Declared notifiers so far. I can imagine quite a few more chains + * over time (eg laptop power reset chains, reboot chain (to clean + * device units up), device [un]mount chain, module load/unload chain, + * low memory chain, screenblank chain (for plug in modular screenblankers) + * VC switch chains (for loadable kernel svgalib VC switch helpers) etc... + */ + +/* netdevice notifier chain */ +#define NETDEV_UP 0x0001 /* For now you can't veto a device up/down */ +#define NETDEV_DOWN 0x0002 +#define NETDEV_REBOOT 0x0003 /* Tell a protocol stack a network interface + detected a hardware crash and restarted + - we can use this eg to kick tcp sessions + once done */ +#endif +#endif diff --git a/pfinet/linux/param.h b/pfinet/linux/param.h new file mode 100644 index 00000000..39efaf0d --- /dev/null +++ b/pfinet/linux/param.h @@ -0,0 +1 @@ +#include <sys/param.h> diff --git a/pfinet/linux/route.h b/pfinet/linux/route.h new file mode 100644 index 00000000..3cadd206 --- /dev/null +++ b/pfinet/linux/route.h @@ -0,0 +1,69 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Global definitions for the IP router interface. + * + * Version: @(#)route.h 1.0.3 05/27/93 + * + * Authors: Original taken from Berkeley UNIX 4.3, (c) UCB 1986-1988 + * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _LINUX_ROUTE_H +#define _LINUX_ROUTE_H + +#include <linux/if.h> + + +/* This structure gets passed by the SIOCADDRTOLD and SIOCDELRTOLD calls. */ + +struct old_rtentry { + unsigned long rt_genmask; + struct sockaddr rt_dst; + struct sockaddr rt_gateway; + short rt_flags; + short rt_refcnt; + unsigned long rt_use; + char *rt_dev; +}; + +/* This structure gets passed by the SIOCADDRT and SIOCDELRT calls. */ +struct rtentry { + unsigned long rt_hash; /* hash key for lookups */ + struct sockaddr rt_dst; /* target address */ + struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */ + struct sockaddr rt_genmask; /* target network mask (IP) */ + short rt_flags; + short rt_refcnt; + unsigned long rt_use; + struct ifnet *rt_ifp; + short rt_metric; /* +1 for binary compatibility! */ + char *rt_dev; /* forcing the device at add */ + unsigned long rt_mss; /* per route MTU/Window */ + unsigned long rt_window; /* Window clamping */ +}; + + +#define RTF_UP 0x0001 /* route usable */ +#define RTF_GATEWAY 0x0002 /* destination is a gateway */ +#define RTF_HOST 0x0004 /* host entry (net otherwise) */ +#define RTF_REINSTATE 0x0008 /* reinstate route after tmout */ +#define RTF_DYNAMIC 0x0010 /* created dyn. (by redirect) */ +#define RTF_MODIFIED 0x0020 /* modified dyn. (by redirect) */ +#define RTF_MSS 0x0040 /* specific MSS for this route */ +#define RTF_WINDOW 0x0080 /* per route window clamping */ + +/* + * REMOVE THESE BY 1.2.0 !!!!!!!!!!!!!!!!! + */ + +#define RTF_MTU RTF_MSS +#define rt_mtu rt_mss + +#endif /* _LINUX_ROUTE_H */ diff --git a/pfinet/linux/sched.h b/pfinet/linux/sched.h new file mode 100644 index 00000000..acc60944 --- /dev/null +++ b/pfinet/linux/sched.h @@ -0,0 +1,81 @@ +#ifndef _HACK_SCHED_H +#define _HACK_SCHED_H + +#include <linux/wait.h> +#include <sys/signal.h> +#include <hurd/hurd_types.h> +#include <linux/kernel.h> +#include <linux/net.h> +#include <sys/time.h> +#include "mapped-time.h" +#include <assert.h> +#include <mach.h> +#include <asm/system.h> + +#define jiffies (fetch_jiffies ()) +extern struct task_struct *current; +extern struct task_struct current_contents; + +struct task_struct +{ + uid_t pgrp, pid; + int flags; + int timeout; + int signal; + int blocked; + int state; + int isroot; +}; + +/* FLAGS in task_struct's. */ +#define PF_EXITING 1 +/* STATE in task_struct's. */ +#define TASK_INTERRUPTIBLE 1 +#define TASK_RUNNING 2 + +extern inline int +suser () +{ + return current->isroot; +}; + +void wake_up_interruptible (struct wait_queue **); +void interruptible_sleep_on (struct wait_queue **); + +void select_wait (struct wait_queue **, select_table *); + +void schedule (void); + +#define SEL_IN SELECT_READ +#define SEL_OUT SELECT_WRITE +#define SEL_EX SELECT_URG + +/* This function is used only to send SIGPIPE to the current + task. In all such cases, EPIPE is returned anyhow. In the + Hurd, servers are not responsible for SIGPIPE; the library + does that itself upon receiving EPIPE. So we can just + NOP such calls. */ +extern inline int +send_sig (u_long signo, struct task_struct *task, int priv) +{ + assert (signo == SIGPIPE); + assert (task == current); + return 0; +} + +int fetch_current_time (void); +struct timeval fetch_xtime (void); + +#define xtime (fetch_xtime ()) +#define CURRENT_TIME (xtime.tv_sec) + +static struct timeval _xtime_buf; + +extern inline struct timeval +fetch_xtime () +{ + maptime_read (mapped_time, &_xtime_buf); + return _xtime_buf; +} + +#endif diff --git a/pfinet/linux/skbuff.h b/pfinet/linux/skbuff.h new file mode 100644 index 00000000..817f89d7 --- /dev/null +++ b/pfinet/linux/skbuff.h @@ -0,0 +1,286 @@ +/* + * Definitions for the 'struct sk_buff' memory handlers. + * + * Authors: + * Alan Cox, <gw4pts@gw4pts.ampr.org> + * Florian La Roche, <rzsfl@rz.uni-sb.de> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _LINUX_SKBUFF_H +#define _LINUX_SKBUFF_H +#include <linux/malloc.h> +#include <linux/wait.h> +#include <linux/time.h> +#include <linux/config.h> + +#undef CONFIG_SKB_CHECK + +#define HAVE_ALLOC_SKB /* For the drivers to know */ + + +#define FREE_READ 1 +#define FREE_WRITE 0 + + +struct sk_buff_head { + struct sk_buff * volatile next; + struct sk_buff * volatile prev; +#if CONFIG_SKB_CHECK + int magic_debug_cookie; +#endif +}; + + +struct sk_buff { + struct sk_buff * volatile next; + struct sk_buff * volatile prev; +#if CONFIG_SKB_CHECK + int magic_debug_cookie; +#endif + struct sk_buff * volatile link3; + struct sock *sk; + volatile unsigned long when; /* used to compute rtt's */ + struct timeval stamp; + struct device *dev; + struct sk_buff *mem_addr; + union { + struct tcphdr *th; + struct ethhdr *eth; + struct iphdr *iph; + struct udphdr *uh; + unsigned char *raw; + unsigned long seq; + } h; + struct iphdr *ip_hdr; /* For IPPROTO_RAW */ + unsigned long mem_len; + unsigned long len; + unsigned long fraglen; + struct sk_buff *fraglist; /* Fragment list */ + unsigned long truesize; + unsigned long saddr; + unsigned long daddr; + unsigned long raddr; /* next hop addr */ + volatile char acked, + used, + free, + arp; + unsigned char tries,lock,localroute,pkt_type; +#define PACKET_HOST 0 /* To us */ +#define PACKET_BROADCAST 1 +#define PACKET_MULTICAST 2 +#define PACKET_OTHERHOST 3 /* Unmatched promiscuous */ + unsigned short users; /* User count - see datagram.c (and soon seqpacket.c/stream.c) */ + unsigned short pkt_class; /* For drivers that need to cache the packet type with the skbuff (new PPP) */ +#ifdef CONFIG_SLAVE_BALANCING + unsigned short in_dev_queue; +#endif + unsigned long padding[0]; + unsigned char data[0]; +}; + +#define SK_WMEM_MAX 32767 +#define SK_RMEM_MAX 32767 + +#ifdef CONFIG_SKB_CHECK +#define SK_FREED_SKB 0x0DE2C0DE +#define SK_GOOD_SKB 0xDEC0DED1 +#define SK_HEAD_SKB 0x12231298 +#endif + +#ifdef __KERNEL__ +/* + * Handling routines are only of interest to the kernel + */ + +#include <asm/system.h> + +#if 0 +extern void print_skb(struct sk_buff *); +#endif +extern void kfree_skb(struct sk_buff *skb, int rw); +extern void skb_queue_head_init(struct sk_buff_head *list); +extern void skb_queue_head(struct sk_buff_head *list,struct sk_buff *buf); +extern void skb_queue_tail(struct sk_buff_head *list,struct sk_buff *buf); +extern struct sk_buff * skb_dequeue(struct sk_buff_head *list); +extern void skb_insert(struct sk_buff *old,struct sk_buff *newsk); +extern void skb_append(struct sk_buff *old,struct sk_buff *newsk); +extern void skb_unlink(struct sk_buff *buf); +extern struct sk_buff * skb_peek_copy(struct sk_buff_head *list); +extern struct sk_buff * alloc_skb(unsigned int size, int priority); +extern void kfree_skbmem(struct sk_buff *skb, unsigned size); +extern struct sk_buff * skb_clone(struct sk_buff *skb, int priority); +extern void skb_device_lock(struct sk_buff *skb); +extern void skb_device_unlock(struct sk_buff *skb); +extern void dev_kfree_skb(struct sk_buff *skb, int mode); +extern int skb_device_locked(struct sk_buff *skb); +/* + * Peek an sk_buff. Unlike most other operations you _MUST_ + * be careful with this one. A peek leaves the buffer on the + * list and someone else may run off with it. For an interrupt + * type system cli() peek the buffer copy the data and sti(); + */ +static __inline__ struct sk_buff *skb_peek(struct sk_buff_head *list_) +{ + struct sk_buff *list = (struct sk_buff *)list_; + return (list->next != list)? list->next : NULL; +} + +#if CONFIG_SKB_CHECK +extern int skb_check(struct sk_buff *skb,int,int, char *); +#define IS_SKB(skb) skb_check((skb), 0, __LINE__,__FILE__) +#define IS_SKB_HEAD(skb) skb_check((skb), 1, __LINE__,__FILE__) +#else +#define IS_SKB(skb) +#define IS_SKB_HEAD(skb) + +extern __inline__ void skb_queue_head_init(struct sk_buff_head *list) +{ + list->prev = (struct sk_buff *)list; + list->next = (struct sk_buff *)list; +} + +/* + * Insert an sk_buff at the start of a list. + */ + +extern __inline__ void skb_queue_head(struct sk_buff_head *list_,struct sk_buff *newsk) +{ + unsigned long flags; + struct sk_buff *list = (struct sk_buff *)list_; + + save_flags(flags); + cli(); + newsk->next = list->next; + newsk->prev = list; + newsk->next->prev = newsk; + newsk->prev->next = newsk; + restore_flags(flags); +} + +/* + * Insert an sk_buff at the end of a list. + */ + +extern __inline__ void skb_queue_tail(struct sk_buff_head *list_, struct sk_buff *newsk) +{ + unsigned long flags; + struct sk_buff *list = (struct sk_buff *)list_; + + save_flags(flags); + cli(); + + newsk->next = list; + newsk->prev = list->prev; + + newsk->next->prev = newsk; + newsk->prev->next = newsk; + + restore_flags(flags); +} + +/* + * Remove an sk_buff from a list. This routine is also interrupt safe + * so you can grab read and free buffers as another process adds them. + */ + +extern __inline__ struct sk_buff *skb_dequeue(struct sk_buff_head *list_) +{ + long flags; + struct sk_buff *result; + struct sk_buff *list = (struct sk_buff *)list_; + + save_flags(flags); + cli(); + + result = list->next; + if (result == list) { + restore_flags(flags); + return NULL; + } + + result->next->prev = list; + list->next = result->next; + + result->next = NULL; + result->prev = NULL; + + restore_flags(flags); + + return result; +} + +/* + * Insert a packet before another one in a list. + */ + +extern __inline__ void skb_insert(struct sk_buff *old, struct sk_buff *newsk) +{ + unsigned long flags; + + save_flags(flags); + cli(); + newsk->next = old; + newsk->prev = old->prev; + old->prev = newsk; + newsk->prev->next = newsk; + + restore_flags(flags); +} + +/* + * Place a packet after a given packet in a list. + */ + +extern __inline__ void skb_append(struct sk_buff *old, struct sk_buff *newsk) +{ + unsigned long flags; + + save_flags(flags); + cli(); + + newsk->prev = old; + newsk->next = old->next; + newsk->next->prev = newsk; + old->next = newsk; + + restore_flags(flags); +} + +/* + * Remove an sk_buff from its list. Works even without knowing the list it + * is sitting on, which can be handy at times. It also means that THE LIST + * MUST EXIST when you unlink. Thus a list must have its contents unlinked + * _FIRST_. + */ + +extern __inline__ void skb_unlink(struct sk_buff *skb) +{ + unsigned long flags; + + save_flags(flags); + cli(); + + if(skb->prev && skb->next) + { + skb->next->prev = skb->prev; + skb->prev->next = skb->next; + skb->next = NULL; + skb->prev = NULL; + } + restore_flags(flags); +} + +#endif + +extern struct sk_buff * skb_recv_datagram(struct sock *sk,unsigned flags,int noblock, int *err); +extern int datagram_select(struct sock *sk, int sel_type, select_table *wait); +extern void skb_copy_datagram(struct sk_buff *from, int offset, char *to,int size); +extern void skb_free_datagram(struct sk_buff *skb); + +#endif /* __KERNEL__ */ +#endif /* _LINUX_SKBUFF_H */ diff --git a/pfinet/linux/socket.h b/pfinet/linux/socket.h new file mode 100644 index 00000000..22dd05ce --- /dev/null +++ b/pfinet/linux/socket.h @@ -0,0 +1,27 @@ +#ifndef _HACK_SOCKET_H_ +#define _HACK_SOCKET_H_ + +#include <sys/socket.h> +#include <sys/ioctl.h> + +#define IP_MAX_MEMBERSHIPS 10 + +#define IPTOS_LOWDELAY 0x10 +#define IPTOS_THROUGHPUT 0x08 +#define IPTOS_RELIABILITY 0x04 + +#define SOPRI_INTERACTIVE 0 +#define SOPRI_NORMAL 1 +#define SOPRI_BACKGROUND 2 + +#define SOL_IP IPPROTO_IP +#define SOL_TCP IPPROTO_TCP + +/* TCP options */ +#define TCP_NODELAY 1 +#define TCP_MAXSEG 2 + +#define SO_NO_CHECK 11 +#define SO_PRIORITY 12 + +#endif diff --git a/pfinet/linux/sockios.h b/pfinet/linux/sockios.h new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/pfinet/linux/sockios.h diff --git a/pfinet/linux/stat.h b/pfinet/linux/stat.h new file mode 100644 index 00000000..5165069b --- /dev/null +++ b/pfinet/linux/stat.h @@ -0,0 +1 @@ +#include <sys/stat.h> diff --git a/pfinet/linux/string.h b/pfinet/linux/string.h new file mode 100644 index 00000000..3b2f5900 --- /dev/null +++ b/pfinet/linux/string.h @@ -0,0 +1 @@ +#include <string.h> diff --git a/pfinet/linux/tcp.h b/pfinet/linux/tcp.h new file mode 100644 index 00000000..32ef0ad1 --- /dev/null +++ b/pfinet/linux/tcp.h @@ -0,0 +1,112 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Definitions for the TCP protocol. + * + * Version: @(#)tcp.h 1.0.2 04/28/93 + * + * Author: Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _LINUX_TCP_H +#define _LINUX_TCP_H + + +#define HEADER_SIZE 64 /* maximum header size */ + + +struct tcphdr { + __u16 source; + __u16 dest; + __u32 seq; + __u32 ack_seq; +#if defined(__i386__) + __u16 res1:4, + doff:4, + fin:1, + syn:1, + rst:1, + psh:1, + ack:1, + urg:1, + res2:2; +#elif defined(__mc68000__) + __u16 res2:2, + urg:1, + ack:1, + psh:1, + rst:1, + syn:1, + fin:1, + doff:4, + res1:4; +#elif defined(__MIPSEL__) + __u16 res1:4, + doff:4, + fin:1, + syn:1, + rst:1, + psh:1, + ack:1, + urg:1, + res2:2; +#elif defined(__MIPSEB__) + __u16 res2:2, + urg:1, + ack:1, + psh:1, + rst:1, + syn:1, + fin:1, + doff:4, + res1:4; +#elif defined(__alpha__) + __u16 res1:4, + doff:4, + fin:1, + syn:1, + rst:1, + psh:1, + ack:1, + urg:1, + res2:2; +#elif defined(__sparc__) + __u16 res2:2, + urg:1, + ack:1, + psh:1, + rst:1, + syn:1, + fin:1, + doff:4, + res1:4; +#else +#error "Adjust this structure for your cpu alignment rules" +#endif + __u16 window; + __u16 check; + __u16 urg_ptr; +}; + + +enum { + TCP_ESTABLISHED = 1, + TCP_SYN_SENT, + TCP_SYN_RECV, + TCP_FIN_WAIT1, + TCP_FIN_WAIT2, + TCP_TIME_WAIT, + TCP_CLOSE, + TCP_CLOSE_WAIT, + TCP_LAST_ACK, + TCP_LISTEN, + TCP_CLOSING /* now a valid state */ +}; + +#endif /* _LINUX_TCP_H */ diff --git a/pfinet/linux/termios.h b/pfinet/linux/termios.h new file mode 100644 index 00000000..9e269565 --- /dev/null +++ b/pfinet/linux/termios.h @@ -0,0 +1 @@ +#include <termios.h> diff --git a/pfinet/linux/time.h b/pfinet/linux/time.h new file mode 100644 index 00000000..50e13783 --- /dev/null +++ b/pfinet/linux/time.h @@ -0,0 +1,13 @@ +#ifndef _HACK_TIME_H_ +#define _HACK_TIME_H_ + +#include <sys/time.h> +#include "mapped-time.h" + +extern inline void +do_gettimeofday (struct timeval *tp) +{ + maptime_read (mapped_time, &_xtime_buf); +} + +#endif diff --git a/pfinet/linux/timer.h b/pfinet/linux/timer.h new file mode 100644 index 00000000..2458746e --- /dev/null +++ b/pfinet/linux/timer.h @@ -0,0 +1,27 @@ +#ifndef _HACK_TIMER_H_ +#define _HACK_TIMER_H_ + +#include <cthreads.h> + +enum tstate +{ + TIMER_INACTIVE, + TIMER_STARTING, + TIMER_STARTED, + TIMER_EXPIRED, + TIMER_FUNCTION_RUNNING, +}; + +struct timer_list +{ + struct timer_list *next, **prevp; + unsigned long expires; + unsigned long data; + void (*function)(unsigned long); +}; + +void add_timer (struct timer_list *); +int del_timer (struct timer_list *); +void init_timer (struct timer_list *); + +#endif diff --git a/pfinet/linux/types.h b/pfinet/linux/types.h new file mode 100644 index 00000000..c978fb07 --- /dev/null +++ b/pfinet/linux/types.h @@ -0,0 +1,16 @@ +#ifndef _HACK_TYPES_H +#define _HACK_TYPES_H + +#include <sys/types.h> +typedef unsigned char __u8; +typedef unsigned short __u16; +typedef unsigned long __u32; + +/* Hackery */ +struct inode +{ + uid_t i_uid; +}; + + +#endif diff --git a/pfinet/linux/udp.h b/pfinet/linux/udp.h new file mode 100644 index 00000000..471301a2 --- /dev/null +++ b/pfinet/linux/udp.h @@ -0,0 +1,29 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Definitions for the UDP protocol. + * + * Version: @(#)udp.h 1.0.2 04/28/93 + * + * Author: Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _LINUX_UDP_H +#define _LINUX_UDP_H + + +struct udphdr { + unsigned short source; + unsigned short dest; + unsigned short len; + unsigned short check; +}; + + +#endif /* _LINUX_UDP_H */ diff --git a/pfinet/linux/un.h b/pfinet/linux/un.h new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/pfinet/linux/un.h diff --git a/pfinet/linux/wait.h b/pfinet/linux/wait.h new file mode 100644 index 00000000..15759ad2 --- /dev/null +++ b/pfinet/linux/wait.h @@ -0,0 +1,23 @@ +#ifndef _HACK_WAIT_H_ +#define _HACK_WAIT_H_ + +#include <cthreads.h> + +struct wait_queue +{ + struct condition c; +}; + +struct select_table_elt +{ + struct condition *dependent_condition; + struct select_table_elt *next; +}; + +typedef struct select_table_struct +{ + struct condition master_condition; + struct select_table_elt *head; +} select_table; + +#endif diff --git a/pfinet/loopback.c b/pfinet/loopback.c new file mode 100644 index 00000000..0d3681fb --- /dev/null +++ b/pfinet/loopback.c @@ -0,0 +1,96 @@ +/* Loopback "device" for pfinet + Copyright (C) 1996, 1998 Free Software Foundation, Inc. + Written by Thomas Bushnell, n/BSG. + + This file is part of the GNU Hurd. + + The GNU Hurd is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2, or (at + your option) any later version. + + The GNU Hurd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. */ + +#include <linux/netdevice.h> +#include <netinet/in.h> +#include <arpa/inet.h> + +#include "pfinet.h" + +struct device loopback_dev; + +int +loopback_xmit (struct sk_buff *skb, struct device *dev) +{ + int done; + + if (!skb || !dev) + return 0; + + if (dev->tbusy) + return 1; + + dev->tbusy; + + done = dev_rint (skb->data, skb->len, 0, dev); + dev_kfree_skb (skb, FREE_WRITE); + + while (done != 1) + done = dev_rint (0, 0, 0, dev); + + dev->tbusy = 0; + return 0; +} + +u_int16_t +loopback_type_trans (struct sk_buff *skb, struct device *dev) +{ + return htons (ETH_P_IP); +} + + +void +setup_loopback_device (char *name) +{ + int i; + + loopback_dev.name = name; + for (i = 0; i < DEV_NUMBUFFS; i++) + skb_queue_head_init (&loopback_dev.buffs[i]); + + loopback_dev.open = 0; + loopback_dev.stop = 0; + loopback_dev.hard_start_xmit = loopback_xmit; + loopback_dev.hard_header = 0; + loopback_dev.rebuild_header = 0; + loopback_dev.type_trans = loopback_type_trans; + loopback_dev.get_stats = 0; + loopback_dev.set_multicast_list = 0; + + loopback_dev.type = 0; + loopback_dev.addr_len = 0; + loopback_dev.flags = IFF_LOOPBACK | IFF_BROADCAST | IFF_UP; + loopback_dev.family = AF_INET; + + loopback_dev.mtu = 2000; + + /* Defaults */ + loopback_dev.pa_addr = inet_addr ("127.0.0.1"); + loopback_dev.pa_brdaddr = inet_addr ("127.255.255.255"); + loopback_dev.pa_mask = inet_addr ("255.0.0.0"); + loopback_dev.pa_alen = sizeof (unsigned long); + + loopback_dev.next = dev_base; + dev_base = &loopback_dev; + + /* Add the route */ + ip_rt_add (RTF_HOST, loopback_dev.pa_addr, 0xffffffff, 0, &loopback_dev, + loopback_dev.mtu, 0); +} diff --git a/pfinet/main.c b/pfinet/main.c new file mode 100644 index 00000000..0232dd66 --- /dev/null +++ b/pfinet/main.c @@ -0,0 +1,254 @@ +/* + Copyright (C) 1995, 1996, 1997, 1999 Free Software Foundation, Inc. + Written by Michael I. Bushnell, p/BSG. + + This file is part of the GNU Hurd. + + The GNU Hurd is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2, or (at + your option) any later version. + + The GNU Hurd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. */ + +#include "pfinet.h" +#include <unistd.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <error.h> +#include <argp.h> +#include <hurd/startup.h> +#include <string.h> + +int trivfs_fstype = FSTYPE_MISC; +int trivfs_fsid; +int trivfs_support_read = 0; +int trivfs_support_write = 0; +int trivfs_support_exec = 0; +int trivfs_allow_open = 0; +struct port_class *trivfs_protid_portclasses[1]; +int trivfs_protid_nportclasses = 1; +struct port_class *trivfs_cntl_portclasses[1]; +int trivfs_cntl_nportclasses = 1; + +struct port_class *shutdown_notify_class; + +/* Option parser. */ +extern struct argp pfinet_argp; + +int +pfinet_demuxer (mach_msg_header_t *inp, + mach_msg_header_t *outp) +{ + extern int io_server (mach_msg_header_t *, mach_msg_header_t *); + extern int socket_server (mach_msg_header_t *, mach_msg_header_t *); + extern int startup_notify_server (mach_msg_header_t *, mach_msg_header_t *); + + return (io_server (inp, outp) + || socket_server (inp, outp) + || trivfs_demuxer (inp, outp) + || startup_notify_server (inp, outp)); +} + +/* The system is going down; destroy all the extant port rights. That + will cause net channels and such to close promptly. */ +error_t +S_startup_dosync (mach_port_t handle) +{ + struct port_info *inpi = ports_lookup_port (pfinet_bucket, handle, + shutdown_notify_class); + + if (!inpi) + return EOPNOTSUPP; + + ports_class_iterate (socketport_class, ports_destroy_right); + return 0; +} + +void +sigterm_handler (int signo) +{ + ports_class_iterate (socketport_class, ports_destroy_right); + sleep (10); + signal (SIGTERM, SIG_DFL); + raise (SIGTERM); +} + +void +arrange_shutdown_notification () +{ + error_t err; + mach_port_t initport, notify; + process_t procserver; + struct port_info *pi; + + shutdown_notify_class = ports_create_class (0, 0); + + signal (SIGTERM, sigterm_handler); + + /* Arrange to get notified when the system goes down, + but if we fail for some reason, just silently give up. No big deal. */ + + err = ports_create_port (shutdown_notify_class, pfinet_bucket, + sizeof (struct port_info), &pi); + if (err) + return; + + procserver = getproc (); + if (!procserver) + return; + + err = proc_getmsgport (procserver, 1, &initport); + mach_port_deallocate (mach_task_self (), procserver); + if (err) + return; + + notify = ports_get_right (pi); + ports_port_deref (pi); + startup_request_notification (initport, notify, + MACH_MSG_TYPE_MAKE_SEND, + program_invocation_short_name); + mach_port_deallocate (mach_task_self (), initport); +} + +static char *already_open = 0; + +/* Return an open device called NAME. If NMAE is 0, and there is a single + active device, it is returned, otherwise an error. + XXX hacky single-interface version. */ +error_t +find_device (char *name, struct device **device) +{ + if (already_open) + if (!name || strcmp (already_open, (*device)->name) == 0) + { + *device = ðer_dev; + return 0; + } + else + return EBUSY; /* XXXACK */ + else if (! name) + return ENXIO; /* XXX */ + + name = already_open = strdup (name); + + setup_ethernet_device (name); + + /* Default mask is 255.255.255.0. XXX should be class dependent. */ + { + char addr[4] = {255, 255, 255, 0}; + ether_dev.pa_mask = *(u_long *)addr; + } + + /* Turn on device. */ + dev_open (ðer_dev); + + *device = ðer_dev; + + return 0; +} + +/* Call FUN with each active device. If a call to FUN returns a + non-zero value, this function will return immediately. Otherwise 0 is + returned. + XXX hacky single-interface version. */ +error_t +enumerate_devices (error_t (*fun) (struct device *dev)) +{ + if (already_open) + return (*fun) (ðer_dev); + else + return 0; +} + +int +main (int argc, + char **argv) +{ + error_t err; + mach_port_t bootstrap; + + pfinet_bucket = ports_create_bucket (); + trivfs_protid_portclasses[0] = ports_create_class (trivfs_clean_protid, 0); + trivfs_cntl_portclasses[0] = ports_create_class (trivfs_clean_cntl, 0); + addrport_class = ports_create_class (clean_addrport, 0); + socketport_class = ports_create_class (clean_socketport, 0); + trivfs_fsid = getpid (); + mach_port_allocate (mach_task_self (), MACH_PORT_RIGHT_RECEIVE, + &fsys_identity); + + /* Generic initialization */ + + init_devices (); + init_time (); + cthread_detach (cthread_fork (input_work_thread, 0)); + inet_proto_init (0); + + arrange_shutdown_notification (); + + setup_loopback_device ("loopback"); + + /* Parse options. */ + argp_parse (&pfinet_argp, argc, argv, 0,0,0); + + /* Talk to parent and link us in. */ + task_get_bootstrap_port (mach_task_self (), &bootstrap); + if (bootstrap == MACH_PORT_NULL) + error (1, 0, "Must be started as a translator"); + + err = trivfs_startup (bootstrap, 0, + trivfs_cntl_portclasses[0], pfinet_bucket, + trivfs_protid_portclasses[0], pfinet_bucket, 0); + if (err) + error (1, errno, "contacting parent"); + + /* Launch */ + ports_manage_port_operations_multithread (pfinet_bucket, + pfinet_demuxer, + 0, 0, 0); + return 0; +} + +void +trivfs_modify_stat (struct trivfs_protid *cred, + struct stat *st) +{ +} + +error_t +trivfs_goaway (struct trivfs_control *cntl, int flags) +{ + if (flags & FSYS_GOAWAY_FORCE) + exit (0); + else + { + /* Stop new requests. */ + ports_inhibit_class_rpcs (trivfs_cntl_portclasses[0]); + ports_inhibit_class_rpcs (trivfs_protid_portclasses[0]); + ports_inhibit_class_rpcs (socketport_class); + + if (ports_count_class (socketport_class) != 0) + { + /* We won't go away, so start things going again... */ + ports_enable_class (socketport_class); + ports_resume_class_rpcs (trivfs_cntl_portclasses[0]); + ports_resume_class_rpcs (trivfs_protid_portclasses[0]); + + return EBUSY; + } + + /* There are no sockets, so we can die without breaking anybody + too badly. We don't let user ports on the /servers/socket/2 + file keep us alive because those get cached in every process + that ever makes a PF_INET socket, libc copes with getting + MACH_SEND_INVALID_DEST and looking up the new translator. */ + exit (0); + } +} diff --git a/pfinet/mapped-time.h b/pfinet/mapped-time.h new file mode 100644 index 00000000..bcbfc6d4 --- /dev/null +++ b/pfinet/mapped-time.h @@ -0,0 +1,30 @@ +#ifndef _MAPPED_TIME_H_ +#define _MAPPED_TIME_H_ + +#include <maptime.h> + +#define HZ 100 + +extern volatile struct mapped_time_value *mapped_time; +extern long long root_jiffies; + +extern inline int +read_mapped_secs () +{ + return mapped_time->seconds; +} + +extern inline int +fetch_jiffies () +{ + struct timeval tv; + long long j; + + maptime_read (mapped_time, &tv); + + j = (long long) tv.tv_sec * HZ + ((long long) tv.tv_usec * HZ) / 1000000; + return j - root_jiffies; +} + + +#endif diff --git a/pfinet/misc.c b/pfinet/misc.c new file mode 100644 index 00000000..6eb96b89 --- /dev/null +++ b/pfinet/misc.c @@ -0,0 +1,189 @@ +/* + Copyright (C) 1995, 1996 Free Software Foundation, Inc. + Written by Michael I. Bushnell, p/BSG. + + This file is part of the GNU Hurd. + + The GNU Hurd is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2, or (at + your option) any later version. + + The GNU Hurd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. */ + +#include "pfinet.h" +#include <string.h> + +/* Create a sock_user structure, initialized from SOCK and ISROOT. + If NOINSTALL is set, don't put it in the portset. */ +struct sock_user * +make_sock_user (struct socket *sock, int isroot, int noinstall) +{ + struct sock_user *user; + + if (noinstall) + errno = ports_create_port_noinstall (socketport_class, pfinet_bucket, + sizeof (struct sock_user), &user); + else + errno = ports_create_port (socketport_class, pfinet_bucket, + sizeof (struct sock_user), &user); + if (errno) + return 0; + + user->isroot = isroot; + user->sock = sock; + sock->refcnt++; + return user; +} + +/* Create a sockaddr port. Fill in *ADDR and *ADDRTYPE accordingly. + The address should come from SOCK; PEER is 0 if we want this socket's + name and 1 if we want the peer's name. */ +error_t +make_sockaddr_port (struct socket *sock, + int peer, + mach_port_t *addr, + mach_msg_type_name_t *addrtype) +{ + char buf[128]; + int buflen = 128; + error_t err; + struct sock_addr *addrstruct; + + err = (*sock->ops->getname) (sock, (struct sockaddr *)buf, &buflen, peer); + if (err) + return err; + + err = ports_create_port (addrport_class, pfinet_bucket, + sizeof (struct sock_addr) + buflen, &addrstruct); + if (err) + return err; + addrstruct->len = buflen; + bcopy (buf, addrstruct->address, buflen); + *addr = ports_get_right (addrstruct); + *addrtype = MACH_MSG_TYPE_MAKE_SEND; + ports_port_deref (addrstruct); + return 0; +} + +struct sock_user * +begin_using_socket_port (mach_port_t port) +{ + return ports_lookup_port (pfinet_bucket, port, socketport_class); +} + +void +end_using_socket_port (struct sock_user *user) +{ + if (user) + ports_port_deref (user); +} + +struct sock_addr * +begin_using_sockaddr_port (mach_port_t port) +{ + return ports_lookup_port (pfinet_bucket, port, addrport_class); +} + +void +end_using_sockaddr_port (struct sock_addr *addr) +{ + if (addr) + ports_port_deref (addr); +} + +/* Nothing need be done here. */ +void +clean_addrport (void *arg) +{ +} + +/* Release the reference on the referenced socket. */ +void +clean_socketport (void *arg) +{ + struct sock_user *user = arg; + + mutex_lock (&global_lock); + + user->sock->refcnt--; + if (user->sock->refcnt == 0) + sock_release (user->sock); + + mutex_unlock (&global_lock); +} + +struct socket * +sock_alloc (void) +{ + struct socket *sock; + struct wait_queue *wait, **waitp; + + sock = malloc (sizeof (struct wait_queue) + + sizeof (struct wait_queue *) + + sizeof (struct socket)); + wait = (void *)sock + sizeof (struct socket); + waitp = (void *)wait + sizeof (struct wait_queue); + + bzero (sock, sizeof (struct socket)); + sock->identity = MACH_PORT_NULL; + sock->state = SS_UNCONNECTED; + sock->wait = waitp; + + condition_init (&wait->c); + + *waitp = wait; + + return sock; +} + +static inline void sock_release_peer(struct socket *peer) +{ + peer->state = SS_DISCONNECTING; + wake_up_interruptible(peer->wait); + sock_wake_async(peer, 1); +} + +void +sock_release (struct socket *sock) +{ + int oldstate; + struct socket *peersock, *nextsock; + + if ((oldstate = sock->state) != SS_UNCONNECTED) + sock->state = SS_DISCONNECTING; + + /* + * Wake up anyone waiting for connections. + */ + + for (peersock = sock->iconn; peersock; peersock = nextsock) + { + nextsock = peersock->next; + sock_release_peer(peersock); + } + + /* + * Wake up anyone we're connected to. First, we release the + * protocol, to give it a chance to flush data, etc. + */ + + peersock = (oldstate == SS_CONNECTED) ? sock->conn : NULL; + if (sock->ops) + (*sock->ops->release) (sock, peersock); + if (peersock) + sock_release_peer(peersock); + + if (sock->identity != MACH_PORT_NULL) + mach_port_destroy (mach_task_self (), sock->identity); + free (sock); +} + + diff --git a/pfinet/mutations.h b/pfinet/mutations.h new file mode 100644 index 00000000..8c0df887 --- /dev/null +++ b/pfinet/mutations.h @@ -0,0 +1,38 @@ +/* + Copyright (C) 1995 Free Software Foundation, Inc. + Written by Michael I. Bushnell, p/BSG. + + This file is part of the GNU Hurd. + + The GNU Hurd is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2, or (at + your option) any later version. + + The GNU Hurd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. */ + +/* Only CPP macro definitions should go in this file. */ + +#define IO_SELECT_REPLY_PORT + +#define IO_INTRAN sock_user_t begin_using_socket_port (io_t) +#define IO_DESTRUCTOR end_using_socket_port (sock_user_t) + +#define SOCKET_INTRAN sock_user_t begin_using_socket_port (socket_t) +#define SOCKET_DESTRUCTOR end_using_socket_port (sock_user_t) + +#define ADDRPORT_INTRAN sock_addr_t begin_using_sockaddr_port (addr_port_t) +#define ADDRPORT_DESTRUCTOR end_using_sockaddr_port (sock_addr_t) + +#define PF_INTRAN trivfs_protid_t trivfs_begin_using_protid (pf_t) +#define PF_DESTRUCTOR trivfs_end_using_protid (trivfs_protid_t) + +#define IO_IMPORTS import "pfinet.h"; +#define SOCKET_IMPORTS import "pfinet.h"; diff --git a/pfinet/options.c b/pfinet/options.c new file mode 100644 index 00000000..b308dc36 --- /dev/null +++ b/pfinet/options.c @@ -0,0 +1,302 @@ +/* Pfinet option parsing + + Copyright (C) 1996, 1997 Free Software Foundation, Inc. + + Written by Miles Bader <miles@gnu.ai.mit.edu> + + This file is part of the GNU Hurd. + + The GNU Hurd is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2, or (at + your option) any later version. + + The GNU Hurd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include <stdlib.h> +#include <string.h> +#include <hurd.h> +#include <argp.h> +#include <argz.h> +#include <netinet/in.h> +#include <arpa/inet.h> + +#include "pfinet.h" + +/* Our interface to the set of devices. */ +extern error_t find_device (char *name, struct device **device); +extern error_t enumerate_devices (error_t (*fun) (struct device *dev)); + +/* Pfinet options. Used for both startup and runtime. */ +static const struct argp_option options[] = +{ + {"interface", 'i', "DEVICE", 0, "Network interface to use", 1}, + {0,0,0,0,"These apply to a given interface:", 2}, + {"address", 'a', "ADDRESS", 0, "Set the network address"}, + {"netmask", 'm', "MASK", 0, "Set the netmask"}, + {"gateway", 'g', "ADDRESS", 0, "Set the default gateway"}, + {"shutdown", 's', 0, 0, "Shut it down"}, + {0} +}; + +static const char doc[] = "Interface-specific options before the first \ +interface specification apply to the first following interface; otherwise \ +they apply to the previously specified interface."; + +/* Used to describe a particular interface during argument parsing. */ +struct parse_interface +{ + /* The network interface in question. */ + struct device *device; + + /* New values to apply to it. */ + unsigned long address, netmask, gateway; +}; + +/* Used to hold data during argument parsing. */ +struct parse_hook +{ + /* A list of specified interfaces and their corresponding options. */ + struct parse_interface *interfaces; + size_t num_interfaces; + + /* Interface to which options apply. If the device field isn't filled in + then it should be by the next --interface option. */ + struct parse_interface *curint; +}; + +/* Adds an empty interface slot to H, and sets H's current interface to it, or + returns an error. */ +static error_t +parse_hook_add_interface (struct parse_hook *h) +{ + struct parse_interface *new = + realloc (h->interfaces, + (h->num_interfaces + 1) * sizeof (struct parse_interface)); + if (! new) + return ENOMEM; + h->interfaces = new; + h->num_interfaces++; + h->curint = new + h->num_interfaces - 1; + h->curint->device = 0; + h->curint->address = INADDR_NONE; + h->curint->netmask = INADDR_NONE; + h->curint->gateway = INADDR_NONE; + return 0; +} + +static error_t +parse_opt (int opt, char *arg, struct argp_state *state) +{ + error_t err = 0; + struct parse_hook *h = state->hook; + + /* Return _ERR from this routine, and in the special case of OPT being + ARGP_KEY_SUCCESS, remember to free H first. */ +#define RETURN(_err) \ + do { if (opt == ARGP_KEY_SUCCESS) \ + { err = (_err); goto free_hook; } \ + else \ + return _err; } while (0) + + /* Print a parsing error message and (if exiting is turned off) return the + error code ERR. */ +#define PERR(err, fmt, args...) \ + do { argp_error (state, fmt , ##args); RETURN (err); } while (0) + + /* Like PERR but for non-parsing errors. */ +#define FAIL(rerr, status, perr, fmt, args...) \ + do{ argp_failure (state, status, perr, fmt , ##args); RETURN (rerr); } while(0) + + /* Parse STR and return the corresponding internet address. If STR is not + a valid internet address, signal an error mentioned TYPE. */ +#define ADDR(str, type) \ + ({ unsigned long addr = inet_addr (str); \ + if (addr == INADDR_NONE) PERR (EINVAL, "Malformed %s", type); \ + addr; }) + + switch (opt) + { + struct parse_interface *in; + + case 'i': + /* An interface. */ + err = 0; + if (h->curint->device) + /* The current interface slot is not available. */ + { + /* First see if a previously specified one is being re-specified. */ + for (in = h->interfaces; in < h->interfaces + h->num_interfaces; in++) + if (strcmp (in->device->name, arg) == 0) + /* Re-use an old slot. */ + { + h->curint = in; + return 0; + } + + /* Add a new interface entry. */ + err = parse_hook_add_interface (h); + } + in = h->curint; + + if (! err) + err = find_device (arg, &in->device); + if (err) + FAIL (err, 10, err, "%s", arg); + + break; + + case 'a': + h->curint->address = ADDR (arg, "address"); + if (!IN_CLASSA (ntohl (h->curint->address)) + && !IN_CLASSB (ntohl (h->curint->address)) + && !IN_CLASSC (ntohl (h->curint->address))) + { + if (IN_MULTICAST (ntohl (h->curint->address))) + FAIL (EINVAL, 1, 0, + "%s: Cannot set interface address to multicast address", + arg); + else + FAIL (EINVAL, 1, 0, + "%s: Illegal or undefined network address", arg); + } + break; + case 'm': + h->curint->netmask = ADDR (arg, "netmask"); break; + case 'g': + h->curint->gateway = ADDR (arg, "gateway"); break; + + case ARGP_KEY_INIT: + /* Initialize our parsing state. */ + h = malloc (sizeof (struct parse_hook)); + if (! h) + FAIL (ENOMEM, 11, ENOMEM, "option parsing"); + + h->interfaces = 0; + h->num_interfaces = 0; + err = parse_hook_add_interface (h); + if (err) + FAIL (err, 12, err, "option parsing"); + + state->hook = h; + break; + + case ARGP_KEY_SUCCESS: + in = h->curint; + if (! in->device) + /* No specific interface specified; is that ok? */ + if (in->address != INADDR_NONE || in->netmask != INADDR_NONE + || in->gateway != INADDR_NONE) + /* Some options were specified, so we need an interface. See if + there's a single extant interface to use as a default. */ + { + err = find_device (0, &in->device); + if (err) + FAIL (err, 13, 0, "No default interface"); + } + + /* Check for bogus option combinations. */ + for (in = h->interfaces; in < h->interfaces + h->num_interfaces; in++) + if (in->netmask != INADDR_NONE + && in->address == INADDR_NONE && in->device->pa_addr == 0) + /* Specifying a netmask for an address-less interface is a no-no. */ + FAIL (EDESTADDRREQ, 14, 0, "Cannot set netmask"); + + /* Successfully finished parsing, return a result. */ + for (in = h->interfaces; in < h->interfaces + h->num_interfaces; in++) + { + struct device *dev = in->device; + if (in->address != INADDR_NONE || in->netmask != INADDR_NONE) + { + if (dev->pa_addr != 0) + /* There's already an address, delete the old entry. */ + ip_rt_del (dev->pa_addr & dev->pa_mask, dev); + + if (in->address != INADDR_NONE) + dev->pa_addr = in->address; + + if (in->netmask != INADDR_NONE) + dev->pa_mask = in->netmask; + else + { + if (IN_CLASSA (ntohl (dev->pa_addr))) + dev->pa_mask = htonl (IN_CLASSA_NET); + else if (IN_CLASSB (ntohl (dev->pa_addr))) + dev->pa_mask = htonl (IN_CLASSB_NET); + else if (IN_CLASSC (ntohl (dev->pa_addr))) + dev->pa_mask = htonl (IN_CLASSC_NET); + else + abort (); + } + + dev->family = AF_INET; + dev->pa_brdaddr = dev->pa_addr | ~dev->pa_mask; + + ip_rt_add (0, dev->pa_addr & dev->pa_mask, dev->pa_mask, + 0, dev, 0, 0); + } + if (in->gateway != INADDR_NONE) + { + ip_rt_del (0, dev); + ip_rt_add (RTF_GATEWAY, 0, 0, in->gateway, dev, 0, 0); + } + } + /* Fall through to free hook. */ + + case ARGP_KEY_ERROR: + /* Parsing error occured, free everything. */ + free_hook: + free (h->interfaces); + free (h); + break; + + default: + return ARGP_ERR_UNKNOWN; + } + + return err; +} + +struct argp +pfinet_argp = { options, parse_opt, 0, doc }; + +struct argp *trivfs_runtime_argp = &pfinet_argp; + +error_t +trivfs_append_args (struct trivfs_control *fsys, char **argz, size_t *argz_len) +{ + error_t add_dev_opts (struct device *dev) + { + error_t err = 0; + +#define ADD_OPT(fmt, args...) \ + do { char buf[100]; \ + if (! err) { \ + snprintf (buf, sizeof buf, fmt , ##args); \ + err = argz_add (argz, argz_len, buf); } } while (0) +#define ADD_ADDR_OPT(name, addr) \ + do { struct in_addr i; \ + i.s_addr = (addr); \ + ADD_OPT ("--%s=%s", name, inet_ntoa (i)); } while (0) + + ADD_OPT ("--interface=%s", dev->name); + if (dev->pa_addr != 0) + ADD_ADDR_OPT ("address", dev->pa_addr); + if (dev->pa_mask != 0) + ADD_ADDR_OPT ("netmask", dev->pa_mask); + + /* XXX how do we figure out the default gateway? */ +#undef ADD_OPT + + return err; + } + + return enumerate_devices (add_dev_opts); +} diff --git a/pfinet/pfinet.h b/pfinet/pfinet.h new file mode 100644 index 00000000..36756615 --- /dev/null +++ b/pfinet/pfinet.h @@ -0,0 +1,93 @@ +/* + Copyright (C) 1995, 1996, 1999 Free Software Foundation, Inc. + Written by Michael I. Bushnell, p/BSG. + + This file is part of the GNU Hurd. + + The GNU Hurd is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2, or (at + your option) any later version. + + The GNU Hurd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. */ + +#ifndef PFINET_H_ +#define PFINET_H_ + +#include <device/device.h> +#include <hurd/ports.h> +#include <linux/netdevice.h> +#include <hurd/trivfs.h> +#include <sys/mman.h> + +extern device_t master_device; + +extern struct proto_ops *proto_ops; + +struct mutex global_lock; +struct mutex packet_queue_lock; + +struct port_bucket *pfinet_bucket; +struct port_class *addrport_class; +struct port_class *socketport_class; + +mach_port_t fsys_identity; + +extern struct device ether_dev; +extern struct device loopback_dev; + +/* A port on SOCK. Multiple sock_user's can point to the same socket. */ +struct sock_user +{ + struct port_info pi; + int isroot; + struct socket *sock; +}; + +/* Socket address ports. */ +struct sock_addr +{ + struct port_info pi; + size_t len; + struct sockaddr address[0]; +}; + +void setup_loopback_device (char *); + +int ethernet_demuxer (mach_msg_header_t *, mach_msg_header_t *); +void setup_ethernet_device (char *); +void become_task_protid (struct trivfs_protid *); +void become_task (struct sock_user *); +struct sock_user *make_sock_user (struct socket *, int, int); +error_t make_sockaddr_port (struct socket *, int, + mach_port_t *, mach_msg_type_name_t *); +void init_devices (void); +any_t input_work_thread (any_t); +void init_time (void); +void inet_proto_init (struct net_proto *); +void ip_rt_add (short, u_long, u_long, u_long, struct device *, + u_short, u_long); +void ip_rt_del (u_long, struct device *); +int tcp_readable (struct sock *); + + +struct sock_user *begin_using_socket_port (socket_t); +struct sock_addr *begin_using_sockaddr_port (socket_t); +void end_using_socket_port (struct sock_user *); +void end_using_sockaddr_port (struct sock_addr *); +void clean_addrport (void *); +void clean_socketport (void *); + +/* MiG bogosity */ +typedef struct sock_user *sock_user_t; +typedef struct sock_addr *sock_addr_t; +typedef struct trivfs_protid *trivfs_protid_t; + +#endif diff --git a/pfinet/sched.c b/pfinet/sched.c new file mode 100644 index 00000000..41059b3f --- /dev/null +++ b/pfinet/sched.c @@ -0,0 +1,87 @@ +/* + Copyright (C) 1995, 1996 Free Software Foundation, Inc. + Written by Michael I. Bushnell, p/BSG. + + This file is part of the GNU Hurd. + + The GNU Hurd is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2, or (at + your option) any later version. + + The GNU Hurd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. */ + +#include <asm/system.h> +#include <linux/sched.h> +#include "pfinet.h" + +struct mutex global_lock = MUTEX_INITIALIZER; +struct mutex packet_queue_lock = MUTEX_INITIALIZER; + +struct task_struct current_contents; +struct task_struct *current = ¤t_contents; + +void +interruptible_sleep_on (struct wait_queue **p) +{ + int cancel; + + cancel = hurd_condition_wait (&(*p)->c, &global_lock); + if (cancel) + current->signal = 1; +} + +void +wake_up_interruptible (struct wait_queue **p) +{ + /* tcp.c uses an unitialized wait queue; don't bomb + if we see it. */ + if (*p) + condition_broadcast (&(*p)->c); +} + + +/* Wake up the owner of the SOCK. If HOW is zero, then just + send SIGIO. If HOW is one, then send SIGIO only if the + SO_WAITDATA flag is off. If HOW is two, then send SIGIO + only if the SO_NOSPACE flag is on, and also clear it. */ +int +sock_wake_async (struct socket *sock, int how) +{ + /* For now, do nothing. XXX */ + return 0; +} + + +/* Set the contents of current appropriately for an RPC being undertaken + by USER. */ +void +become_task (struct sock_user *user) +{ + /* These fields are not really used currently. */ + current->pgrp = current->pid = 0; + + current->flags = 0; + current->timeout = 0; + current->signal = current->blocked = 0; + current->state = TASK_RUNNING; + current->isroot = user->isroot; +} + +void +become_task_protid (struct trivfs_protid *protid) +{ + current->pgrp = current->pid = 0; + current->flags = 0; + current->timeout = 0; + current->signal = current->blocked = 0; + current->state = TASK_RUNNING; + current->isroot = protid->isroot; +} diff --git a/pfinet/socket-ops.c b/pfinet/socket-ops.c new file mode 100644 index 00000000..3db4985f --- /dev/null +++ b/pfinet/socket-ops.c @@ -0,0 +1,526 @@ +/* Interface functions for the socket.defs interface. + Copyright (C) 1995, 1996, 1997, 1999 Free Software Foundation, Inc. + Written by Michael I. Bushnell, p/BSG. + + This file is part of the GNU Hurd. + + The GNU Hurd is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2, or (at + your option) any later version. + + The GNU Hurd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. */ + +#include <hurd/trivfs.h> +#include <string.h> + +#include "pfinet.h" +#include "socket_S.h" + + +error_t +S_socket_create (struct trivfs_protid *master, + int sock_type, + int protocol, + mach_port_t *port, + mach_msg_type_name_t *porttype) +{ + struct sock_user *user; + struct socket *sock; + error_t err; + + if (!master) + return EOPNOTSUPP; + + /* Don't allow bogus SOCK_PACKET here. */ + + if ((sock_type != SOCK_STREAM + && sock_type != SOCK_DGRAM + && sock_type != SOCK_SEQPACKET + && sock_type != SOCK_RAW) + || protocol < 0) + return EINVAL; + + mutex_lock (&global_lock); + + become_task_protid (master); + + sock = sock_alloc (); + + sock->type = sock_type; + sock->ops = proto_ops; + + err = - (*sock->ops->create) (sock, protocol); + if (err) + sock_release (sock); + else + { + user = make_sock_user (sock, master->isroot, 0); + *port = ports_get_right (user); + *porttype = MACH_MSG_TYPE_MAKE_SEND; + ports_port_deref (user); + } + + mutex_unlock (&global_lock); + + return err; +} + + +/* Listen on a socket. */ +error_t +S_socket_listen (struct sock_user *user, int queue_limit) +{ + if (!user) + return EOPNOTSUPP; + + mutex_lock (&global_lock); + + become_task (user); + + if (user->sock->state == SS_UNCONNECTED) + { + if (user->sock->ops && user->sock->ops->listen) + (*user->sock->ops->listen) (user->sock, queue_limit); + user->sock->flags |= SO_ACCEPTCON; + mutex_unlock (&global_lock); + return 0; + } + else + { + mutex_unlock (&global_lock); + return EINVAL; + } +} + +error_t +S_socket_accept (struct sock_user *user, + mach_port_t *new_port, + mach_msg_type_name_t *new_port_type, + mach_port_t *addr_port, + mach_msg_type_name_t *addr_port_type) +{ + struct sock_user *newuser; + struct socket *sock, *newsock; + error_t err; + + if (!user) + return EOPNOTSUPP; + + mutex_lock (&global_lock); + + become_task (user); + + sock = user->sock; + newsock = 0; + err = 0; + + if ((sock->state != SS_UNCONNECTED) + || (!(sock->flags & SO_ACCEPTCON))) + err = EINVAL; + else if (!(newsock = sock_alloc ())) + err = ENOMEM; + + if (err) + goto out; + + newsock->type = sock->type; + newsock->ops = sock->ops; + + err = - (*sock->ops->dup) (newsock, sock); + if (err) + goto out; + + err = - (*sock->ops->accept) (sock, newsock, sock->userflags); + if (err) + goto out; + + err = make_sockaddr_port (newsock, 1, addr_port, addr_port_type); + if (err) + goto out; + + newuser = make_sock_user (newsock, user->isroot, 0); + *new_port = ports_get_right (newuser); + *new_port_type = MACH_MSG_TYPE_MAKE_SEND; + ports_port_deref (newuser); + + out: + if (err && newsock) + sock_release (newsock); + mutex_unlock (&global_lock); + return err; +} + +error_t +S_socket_connect (struct sock_user *user, + struct sock_addr *addr) +{ + struct socket *sock; + error_t err; + + if (!user || !addr) + return EOPNOTSUPP; + + sock = user->sock; + + mutex_lock (&global_lock); + + become_task (user); + + err = 0; + + if (sock->state == SS_CONNECTED + && sock->type != SOCK_DGRAM) + err = EISCONN; + else if (sock->state != SS_UNCONNECTED + && sock->state != SS_CONNECTING + && sock->state != SS_CONNECTED) + err = EINVAL; + + if (!err) + err = - (*sock->ops->connect) (sock, addr->address, addr->len, + sock->userflags); + + mutex_unlock (&global_lock); + + /* MiG should do this for us, but it doesn't. */ + if (!err) + mach_port_deallocate (mach_task_self (), addr->pi.port_right); + + return err; +} + +error_t +S_socket_bind (struct sock_user *user, + struct sock_addr *addr) +{ + error_t err; + + if (!user) + return EOPNOTSUPP; + if (! addr) + return EADDRNOTAVAIL; + + mutex_lock (&global_lock); + become_task (user); + err = - (*user->sock->ops->bind) (user->sock, addr->address, addr->len); + mutex_unlock (&global_lock); + + /* MiG should do this for us, but it doesn't. */ + if (!err) + mach_port_deallocate (mach_task_self (), addr->pi.port_right); + + return err; +} + +error_t +S_socket_name (struct sock_user *user, + mach_port_t *addr_port, + mach_msg_type_name_t *addr_port_name) +{ + if (!user) + return EOPNOTSUPP; + + mutex_lock (&global_lock); + become_task (user); + make_sockaddr_port (user->sock, 0, addr_port, addr_port_name); + mutex_unlock (&global_lock); + return 0; +} + +error_t +S_socket_peername (struct sock_user *user, + mach_port_t *addr_port, + mach_msg_type_name_t *addr_port_name) +{ + error_t err; + + if (!user) + return EOPNOTSUPP; + + mutex_lock (&global_lock); + become_task (user); + err = make_sockaddr_port (user->sock, 1, addr_port, addr_port_name); + mutex_unlock (&global_lock); + + return err; +} + +error_t +S_socket_connect2 (struct sock_user *user1, + struct sock_user *user2) +{ + error_t err; + + if (!user1 || !user2) + return EOPNOTSUPP; + + mutex_lock (&global_lock); + + become_task (user1); + + if (user1->sock->type != user2->sock->type) + err = EINVAL; + else if (user1->sock->state != SS_UNCONNECTED + && user2->sock->state != SS_UNCONNECTED) + err = EISCONN; + else + err = - (*user1->sock->ops->socketpair) (user1->sock, user2->sock); + + if (!err) + { + user1->sock->conn = user2->sock; + user2->sock->conn = user1->sock; + user1->sock->state = SS_CONNECTED; + user2->sock->state = SS_CONNECTED; + } + + mutex_unlock (&global_lock); + + /* MiG should do this for us, but it doesn't. */ + if (!err) + mach_port_deallocate (mach_task_self (), user2->pi.port_right); + + return err; +} + +error_t +S_socket_create_address (mach_port_t server, + int sockaddr_type, + char *data, + mach_msg_type_number_t data_len, + mach_port_t *addr_port, + mach_msg_type_name_t *addr_port_type) +{ + struct sock_addr *addr; + error_t err; + + if (sockaddr_type != AF_INET) + return EAFNOSUPPORT; + + err = ports_create_port (addrport_class, pfinet_bucket, + sizeof (struct sock_addr) + data_len, &addr); + if (err) + return err; + + addr->len = data_len; + bcopy (data, addr->address, data_len); + + *addr_port = ports_get_right (addr); + *addr_port_type = MACH_MSG_TYPE_MAKE_SEND; + ports_port_deref (addr); + return 0; +} + +error_t +S_socket_fabricate_address (mach_port_t server, + int sockaddr_type, + mach_port_t *addr_port, + mach_msg_type_name_t *addr_port_type) +{ + return EOPNOTSUPP; +} + +error_t +S_socket_whatis_address (struct sock_addr *addr, + int *type, + char **data, + mach_msg_type_number_t *datalen) +{ + if (!addr) + return EOPNOTSUPP; + + *type = AF_INET; + if (*datalen < addr->len) + *data = mmap (0, addr->len, PROT_READ|PROT_WRITE, MAP_ANON, 0, 0); + bcopy (addr->address, *data, addr->len); + *datalen = addr->len; + + return 0; +} + +error_t +S_socket_shutdown (struct sock_user *user, + int direction) +{ + error_t err; + + if (!user) + return EOPNOTSUPP; + + mutex_lock (&global_lock); + become_task (user); + err = - (*user->sock->ops->shutdown) (user->sock, direction); + mutex_unlock (&global_lock); + + return err; +} + +error_t +S_socket_getopt (struct sock_user *user, + int level, + int option, + char **data, + u_int *datalen) +{ + error_t err; + + if (! user) + return EOPNOTSUPP; + + /* XXX all options supported in the linux code are in fact ints. */ + *datalen = sizeof (int); + + mutex_lock (&global_lock); + become_task (user); + + err = + - (user->sock->ops->getsockopt)(user->sock, level, option, *data, datalen); + + assert (*datalen == sizeof (int)); + + mutex_unlock (&global_lock); + + return err; +} + +error_t +S_socket_setopt (struct sock_user *user, + int level, + int option, + char *data, + u_int datalen) +{ + error_t err; + + if (! user) + return EOPNOTSUPP; + + mutex_lock (&global_lock); + become_task (user); + + err = + - (user->sock->ops->setsockopt)(user->sock, level, option, data, datalen); + + mutex_unlock (&global_lock); + + return err; +} + +error_t +S_socket_send (struct sock_user *user, + struct sock_addr *addr, + int flags, + char *data, + u_int datalen, + mach_port_t *ports, + u_int nports, + char *control, + u_int controllen, + mach_msg_type_number_t *amount) +{ + int sent; + + if (!user) + return EOPNOTSUPP; + + /* Don't do this yet, it's too bizarre to think about right now. */ + if (nports != 0 || controllen != 0) + return EINVAL; + + mutex_lock (&global_lock); + + become_task (user); + + if (addr) + sent = (*user->sock->ops->sendto) (user->sock, data, datalen, + user->sock->userflags, flags, + addr->address, addr->len); + else + sent = (*user->sock->ops->send) (user->sock, data, datalen, + user->sock->userflags, flags); + + mutex_unlock (&global_lock); + + /* MiG should do this for us, but it doesn't. */ + if (addr && sent >= 0) + mach_port_deallocate (mach_task_self (), addr->pi.port_right); + + if (sent >= 0) + { + *amount = sent; + return 0; + } + else + return (error_t)-sent; +} + +error_t +S_socket_recv (struct sock_user *user, + mach_port_t *addrport, + mach_msg_type_name_t *addrporttype, + int flags, + char **data, + u_int *datalen, + mach_port_t **ports, + mach_msg_type_name_t *portstype, + u_int *nports, + char **control, + u_int *controllen, + int *outflags, + mach_msg_type_number_t amount) +{ + int recvd; + char addr[128]; + size_t addrlen = sizeof addr; + int didalloc = 0; + + if (!user) + return EOPNOTSUPP; + + /* For unused recvmsg interface */ + *nports = 0; + *portstype = MACH_MSG_TYPE_COPY_SEND; + *controllen = 0; + *outflags = 0; + + /* Instead of this, we should peek at the socket and only allocate + as much as necessary. */ + if (*datalen < amount) + { + vm_allocate (mach_task_self (), (vm_address_t *) data, amount, 1); + didalloc = 1; + } + + mutex_lock (&global_lock); + become_task (user); + + recvd = (*user->sock->ops->recvfrom) (user->sock, *data, amount, + user->sock->userflags, flags, + (struct sockaddr *)addr, &addrlen); + + mutex_unlock (&global_lock); + + if (recvd < 0) + return (error_t)-recvd; + + *datalen = recvd; + + if (didalloc && round_page (*datalen) < round_page (amount)) + vm_deallocate (mach_task_self (), + (vm_address_t) (*data + round_page (*datalen)), + round_page (amount) - round_page (*datalen)); + + + S_socket_create_address (0, AF_INET, addr, addrlen, addrport, + addrporttype); + + return 0; +} diff --git a/pfinet/socket.c b/pfinet/socket.c new file mode 100644 index 00000000..7cbc0e9f --- /dev/null +++ b/pfinet/socket.c @@ -0,0 +1,35 @@ +/* + Copyright (C) 1995 Free Software Foundation, Inc. + Written by Michael I. Bushnell, p/BSG. + + This file is part of the GNU Hurd. + + The GNU Hurd is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2, or (at + your option) any later version. + + The GNU Hurd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. */ + +#include <linux/net.h> +#include <assert.h> +#include "pfinet.h" + +struct proto_ops *proto_ops; + +/* Notice that a protocol family is live; this only works for inet here. */ +int +sock_register (int family, struct proto_ops *ops) +{ + assert (family == PF_INET); + proto_ops = ops; + return 0; +} + diff --git a/pfinet/time.c b/pfinet/time.c new file mode 100644 index 00000000..13f53cb7 --- /dev/null +++ b/pfinet/time.c @@ -0,0 +1,27 @@ +/* Time management functions + Copyright (C) 1995 Free Software Foundation, Inc. + Written by Michael I. Bushnell, p/BSG. + + This file is part of the GNU Hurd. + + The GNU Hurd is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2, or (at + your option) any later version. + + The GNU Hurd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. */ + +#include <sys/time.h> + +void +do_gettimeofday (struct timeval *tp) +{ + gettimeofday (tp, 0); +} diff --git a/pfinet/timer-emul.c b/pfinet/timer-emul.c new file mode 100644 index 00000000..d0842787 --- /dev/null +++ b/pfinet/timer-emul.c @@ -0,0 +1,161 @@ +/* + Copyright (C) 1995,96,2000 Free Software Foundation, Inc. + Written by Michael I. Bushnell, p/BSG. + + This file is part of the GNU Hurd. + + The GNU Hurd is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2, or (at + your option) any later version. + + The GNU Hurd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. */ + +#include <linux/timer.h> +#include <asm/system.h> +#include <linux/sched.h> +#include <error.h> +#include <string.h> +#include "pfinet.h" + +long long root_jiffies; +volatile struct mapped_time_value *mapped_time; + +struct timer_list *timers; +thread_t timer_thread = 0; + +static int +timer_function (int this_is_a_pointless_variable_with_a_rather_long_name) +{ + mach_port_t recv; + int wait = 0; + + recv = mach_reply_port (); + + timer_thread = mach_thread_self (); + + mutex_lock (&global_lock); + while (1) + { + int jiff = jiffies; + + if (!timers) + wait = -1; + else if (timers->expires < jiff) + wait = 0; + else + wait = ((timers->expires - jiff) * 1000) / HZ; + + mutex_unlock (&global_lock); + + mach_msg (NULL, (MACH_RCV_MSG | MACH_RCV_INTERRUPT + | (wait == -1 ? 0 : MACH_RCV_TIMEOUT)), + 0, 0, recv, wait, MACH_PORT_NULL); + + mutex_lock (&global_lock); + + while (timers->expires < jiffies) + { + struct timer_list *tp; + + tp = timers; + + timers = timers->next; + if (timers) + timers->prevp = &timers; + + tp->next = 0; + tp->prevp = 0; + + (*tp->function) (tp->data); + } + } +} + + +void +add_timer (struct timer_list *timer) +{ + struct timer_list **tp; + + timer->expires += jiffies; + + for (tp = &timers; *tp; tp = &(*tp)->next) + if ((*tp)->expires > timer->expires) + { + timer->next = *tp; + timer->next->prevp = &timer->next; + timer->prevp = tp; + *tp = timer; + break; + } + if (!*tp) + { + timer->next = 0; + timer->prevp = tp; + *tp = timer; + } + + if (timers == timer) + { + /* We have change the first one, so tweak the timer thread + to push things up. */ + while (timer_thread == 0) + swtch_pri (0); + + if (timer_thread != mach_thread_self ()) + { + thread_suspend (timer_thread); + thread_abort (timer_thread); + thread_resume (timer_thread); + } + } +} + +int +del_timer (struct timer_list *timer) +{ + if (timer->prevp) + { + *timer->prevp = timer->next; + if (timer->next) + timer->next->prevp = timer->prevp; + + timer->next = 0; + timer->prevp = 0; + return 1; + } + else + return 0; +} + +void +init_timer (struct timer_list *timer) +{ + bzero (timer, sizeof (struct timer_list)); +} + +void +init_time () +{ + error_t err; + struct timeval tp; + + err = maptime_map (0, 0, &mapped_time); + if (err) + error (2, err, "cannot map time device"); + + maptime_read (mapped_time, &tp); + + root_jiffies = (long long) tp.tv_sec * HZ + + ((long long) tp.tv_usec * HZ) / 1000000; + + cthread_detach (cthread_fork ((cthread_fn_t) timer_function, 0)); +} |