diff options
Diffstat (limited to 'pfinet/linux-src/net/ipv6')
-rw-r--r-- | pfinet/linux-src/net/ipv6/addrconf.c | 1948 | ||||
-rw-r--r-- | pfinet/linux-src/net/ipv6/af_inet6.c | 610 | ||||
-rw-r--r-- | pfinet/linux-src/net/ipv6/datagram_ipv6.c | 426 | ||||
-rw-r--r-- | pfinet/linux-src/net/ipv6/exthdrs.c | 771 | ||||
-rw-r--r-- | pfinet/linux-src/net/ipv6/icmpv6.c | 673 | ||||
-rw-r--r-- | pfinet/linux-src/net/ipv6/ip6_fib.c | 1205 | ||||
-rw-r--r-- | pfinet/linux-src/net/ipv6/ip6_flowlabel.c | 620 | ||||
-rw-r--r-- | pfinet/linux-src/net/ipv6/ip6_input.c | 284 | ||||
-rw-r--r-- | pfinet/linux-src/net/ipv6/ip6_output.c | 720 | ||||
-rw-r--r-- | pfinet/linux-src/net/ipv6/ipv6_sockglue.c | 439 | ||||
-rw-r--r-- | pfinet/linux-src/net/ipv6/mcast.c | 709 | ||||
-rw-r--r-- | pfinet/linux-src/net/ipv6/ndisc.c | 1215 | ||||
-rw-r--r-- | pfinet/linux-src/net/ipv6/protocol_ipv6.c | 117 | ||||
-rw-r--r-- | pfinet/linux-src/net/ipv6/raw_ipv6.c | 644 | ||||
-rw-r--r-- | pfinet/linux-src/net/ipv6/reassembly.c | 492 | ||||
-rw-r--r-- | pfinet/linux-src/net/ipv6/route_ipv6.c | 1972 | ||||
-rw-r--r-- | pfinet/linux-src/net/ipv6/tcp_ipv6.c | 1741 | ||||
-rw-r--r-- | pfinet/linux-src/net/ipv6/udp_ipv6.c | 926 |
18 files changed, 15512 insertions, 0 deletions
diff --git a/pfinet/linux-src/net/ipv6/addrconf.c b/pfinet/linux-src/net/ipv6/addrconf.c new file mode 100644 index 00000000..f8428bd8 --- /dev/null +++ b/pfinet/linux-src/net/ipv6/addrconf.c @@ -0,0 +1,1948 @@ +/* + * IPv6 Address [auto]configuration + * Linux INET6 implementation + * + * Authors: + * Pedro Roque <roque@di.fc.ul.pt> + * + * $Id: addrconf.c,v 1.1 2007/10/08 21:12:30 stesie Exp $ + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +/* + * Changes: + * + * Janos Farkas : delete timer on ifdown + * <chexum@bankinf.banki.hu> + * Andi Kleen : kill doube kfree on module + * unload. + */ + +#include <linux/config.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/sockios.h> +#include <linux/sched.h> +#include <linux/net.h> +#include <linux/in6.h> +#include <linux/netdevice.h> +#include <linux/if_arp.h> +#include <linux/route.h> +#include <linux/inetdevice.h> +#include <linux/init.h> +#ifdef CONFIG_SYSCTL +#include <linux/sysctl.h> +#endif +#include <linux/delay.h> + +#include <linux/proc_fs.h> +#include <net/sock.h> +#include <net/snmp.h> + +#include <net/ipv6.h> +#include <net/protocol.h> +#include <net/ndisc.h> +#include <net/ip6_route.h> +#include <net/addrconf.h> +#include <net/ip.h> +#include <linux/if_tunnel.h> +#include <linux/rtnetlink.h> + +#include <asm/uaccess.h> + +/* Set to 3 to get tracing... */ +#define ACONF_DEBUG 2 + +#if ACONF_DEBUG >= 3 +#define ADBG(x) printk x +#else +#define ADBG(x) +#endif + +#ifdef CONFIG_SYSCTL +static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf *p); +static void addrconf_sysctl_unregister(struct ipv6_devconf *p); +#endif + +/* + * Configured unicast address list + */ +static struct inet6_ifaddr *inet6_addr_lst[IN6_ADDR_HSIZE]; + +/* + * AF_INET6 device list + */ +static struct inet6_dev *inet6_dev_lst[IN6_ADDR_HSIZE]; + +static atomic_t addr_list_lock = ATOMIC_INIT(0); + +void addrconf_verify(unsigned long); + +static struct timer_list addr_chk_timer = { + NULL, NULL, + 0, 0, addrconf_verify +}; + +/* These locks protect only against address deletions, + but not against address adds or status updates. + It is OK. The only race is when address is selected, + which becomes invalid immediately after selection. + It is harmless, because this address could be already invalid + several usecs ago. + + Its important, that: + + 1. The result of inet6_add_addr() is used only inside lock + or from bh_atomic context. + + 2. inet6_get_lladdr() is used only from bh protected context. + + 3. The result of ipv6_chk_addr() is not used outside of bh protected context. + */ + +static __inline__ void addrconf_lock(void) +{ + atomic_inc(&addr_list_lock); + synchronize_bh(); +} + +static __inline__ void addrconf_unlock(void) +{ + atomic_dec(&addr_list_lock); +} + +static int addrconf_ifdown(struct device *dev, int how); + +static void addrconf_dad_start(struct inet6_ifaddr *ifp); +static void addrconf_dad_timer(unsigned long data); +static void addrconf_dad_completed(struct inet6_ifaddr *ifp); +static void addrconf_rs_timer(unsigned long data); +static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa); + +struct ipv6_devconf ipv6_devconf = +{ + 0, /* forwarding */ + IPV6_DEFAULT_HOPLIMIT, /* hop limit */ + IPV6_MIN_MTU, /* mtu */ + 1, /* accept RAs */ + 1, /* accept redirects */ + 1, /* autoconfiguration */ + 1, /* dad transmits */ + MAX_RTR_SOLICITATIONS, /* router solicits */ + RTR_SOLICITATION_INTERVAL, /* rtr solicit interval */ + MAX_RTR_SOLICITATION_DELAY, /* rtr solicit delay */ +}; + +static struct ipv6_devconf ipv6_devconf_dflt = +{ + 0, /* forwarding */ + IPV6_DEFAULT_HOPLIMIT, /* hop limit */ + IPV6_MIN_MTU, /* mtu */ + 1, /* accept RAs */ + 1, /* accept redirects */ + 1, /* autoconfiguration */ + 1, /* dad transmits */ + MAX_RTR_SOLICITATIONS, /* router solicits */ + RTR_SOLICITATION_INTERVAL, /* rtr solicit interval */ + MAX_RTR_SOLICITATION_DELAY, /* rtr solicit delay */ +}; + +int ipv6_addr_type(struct in6_addr *addr) +{ + u32 st; + + st = addr->s6_addr32[0]; + + /* Consider all addresses with the first three bits different of + 000 and 111 as unicasts. + */ + if ((st & __constant_htonl(0xE0000000)) != __constant_htonl(0x00000000) && + (st & __constant_htonl(0xE0000000)) != __constant_htonl(0xE0000000)) + return IPV6_ADDR_UNICAST; + + if ((st & __constant_htonl(0xFF000000)) == __constant_htonl(0xFF000000)) { + int type = IPV6_ADDR_MULTICAST; + + switch((st & __constant_htonl(0x00FF0000))) { + case __constant_htonl(0x00010000): + type |= IPV6_ADDR_LOOPBACK; + break; + + case __constant_htonl(0x00020000): + type |= IPV6_ADDR_LINKLOCAL; + break; + + case __constant_htonl(0x00050000): + type |= IPV6_ADDR_SITELOCAL; + break; + }; + return type; + } + + if ((st & __constant_htonl(0xFFC00000)) == __constant_htonl(0xFE800000)) + return (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST); + + if ((st & __constant_htonl(0xFFC00000)) == __constant_htonl(0xFEC00000)) + return (IPV6_ADDR_SITELOCAL | IPV6_ADDR_UNICAST); + + if ((addr->s6_addr32[0] | addr->s6_addr32[1]) == 0) { + if (addr->s6_addr32[2] == 0) { + if (addr->in6_u.u6_addr32[3] == 0) + return IPV6_ADDR_ANY; + + if (addr->s6_addr32[3] == __constant_htonl(0x00000001)) + return (IPV6_ADDR_LOOPBACK | IPV6_ADDR_UNICAST); + + return (IPV6_ADDR_COMPATv4 | IPV6_ADDR_UNICAST); + } + + if (addr->s6_addr32[2] == __constant_htonl(0x0000ffff)) + return IPV6_ADDR_MAPPED; + } + + return IPV6_ADDR_RESERVED; +} + +static struct inet6_dev * ipv6_add_dev(struct device *dev) +{ + struct inet6_dev *ndev, **bptr, *iter; + int hash; + + if (dev->mtu < IPV6_MIN_MTU) + return NULL; + + ndev = kmalloc(sizeof(struct inet6_dev), GFP_KERNEL); + + if (ndev) { + memset(ndev, 0, sizeof(struct inet6_dev)); + + ndev->dev = dev; + memcpy(&ndev->cnf, &ipv6_devconf_dflt, sizeof(ndev->cnf)); + ndev->cnf.mtu6 = dev->mtu; + ndev->cnf.sysctl = NULL; + ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl); + if (ndev->nd_parms == NULL) { + kfree(ndev); + return NULL; + } +#ifdef CONFIG_SYSCTL + neigh_sysctl_register(dev, ndev->nd_parms, NET_IPV6, NET_IPV6_NEIGH, "ipv6"); + addrconf_sysctl_register(ndev, &ndev->cnf); +#endif + hash = ipv6_devindex_hash(dev->ifindex); + bptr = &inet6_dev_lst[hash]; + iter = *bptr; + + for (; iter; iter = iter->next) + bptr = &iter->next; + + *bptr = ndev; + + } + return ndev; +} + +static struct inet6_dev * ipv6_find_idev(struct device *dev) +{ + struct inet6_dev *idev; + + if ((idev = ipv6_get_idev(dev)) == NULL) { + idev = ipv6_add_dev(dev); + if (idev == NULL) + return NULL; + if (dev->flags&IFF_UP) + ipv6_mc_up(idev); + } + return idev; +} + +static void addrconf_forward_change(struct inet6_dev *idev) +{ + int i; + + if (idev) + return; + + for (i = 0; i < IN6_ADDR_HSIZE; i++) { + for (idev = inet6_dev_lst[i]; idev; idev = idev->next) + idev->cnf.forwarding = ipv6_devconf.forwarding; + } +} + +struct inet6_dev * ipv6_get_idev(struct device *dev) +{ + struct inet6_dev *idev; + int hash; + + hash = ipv6_devindex_hash(dev->ifindex); + + for (idev = inet6_dev_lst[hash]; idev; idev = idev->next) { + if (idev->dev == dev) + return idev; + } + return NULL; +} + +static struct inet6_ifaddr * +ipv6_add_addr(struct inet6_dev *idev, struct in6_addr *addr, int scope) +{ + struct inet6_ifaddr *ifa; + int hash; + + ifa = kmalloc(sizeof(struct inet6_ifaddr), GFP_ATOMIC); + + if (ifa == NULL) { + ADBG(("ipv6_add_addr: malloc failed\n")); + return NULL; + } + + memset(ifa, 0, sizeof(struct inet6_ifaddr)); + memcpy(&ifa->addr, addr, sizeof(struct in6_addr)); + + init_timer(&ifa->timer); + ifa->timer.data = (unsigned long) ifa; + ifa->scope = scope; + ifa->idev = idev; + + /* Add to list. */ + hash = ipv6_addr_hash(addr); + + ifa->lst_next = inet6_addr_lst[hash]; + inet6_addr_lst[hash] = ifa; + + /* Add to inet6_dev unicast addr list. */ + ifa->if_next = idev->addr_list; + idev->addr_list = ifa; + + return ifa; +} + +static void ipv6_del_addr(struct inet6_ifaddr *ifp) +{ + struct inet6_ifaddr *iter, **back; + int hash; + + if (atomic_read(&addr_list_lock)) { + ifp->flags |= ADDR_INVALID; + ipv6_ifa_notify(RTM_DELADDR, ifp); + return; + } + + hash = ipv6_addr_hash(&ifp->addr); + + iter = inet6_addr_lst[hash]; + back = &inet6_addr_lst[hash]; + + for (; iter; iter = iter->lst_next) { + if (iter == ifp) { + *back = ifp->lst_next; + synchronize_bh(); + + ifp->lst_next = NULL; + break; + } + back = &(iter->lst_next); + } + + iter = ifp->idev->addr_list; + back = &ifp->idev->addr_list; + + for (; iter; iter = iter->if_next) { + if (iter == ifp) { + *back = ifp->if_next; + synchronize_bh(); + + ifp->if_next = NULL; + break; + } + back = &(iter->if_next); + } + + ipv6_ifa_notify(RTM_DELADDR, ifp); + + kfree(ifp); +} + +/* + * Choose an apropriate source address + * should do: + * i) get an address with an apropriate scope + * ii) see if there is a specific route for the destination and use + * an address of the attached interface + * iii) don't use deprecated addresses + */ +int ipv6_get_saddr(struct dst_entry *dst, + struct in6_addr *daddr, struct in6_addr *saddr) +{ + int scope; + struct inet6_ifaddr *ifp = NULL; + struct inet6_ifaddr *match = NULL; + struct device *dev = NULL; + struct rt6_info *rt; + int err; + int i; + + rt = (struct rt6_info *) dst; + if (rt) + dev = rt->rt6i_dev; + + addrconf_lock(); + + scope = ipv6_addr_scope(daddr); + if (rt && (rt->rt6i_flags & RTF_ALLONLINK)) { + /* + * route for the "all destinations on link" rule + * when no routers are present + */ + scope = IFA_LINK; + } + + /* + * known dev + * search dev and walk through dev addresses + */ + + if (dev) { + struct inet6_dev *idev; + int hash; + + if (dev->flags & IFF_LOOPBACK) + scope = IFA_HOST; + + hash = ipv6_devindex_hash(dev->ifindex); + for (idev = inet6_dev_lst[hash]; idev; idev=idev->next) { + if (idev->dev == dev) { + for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) { + if (ifp->scope == scope) { + if (!(ifp->flags & (ADDR_STATUS|DAD_STATUS))) + goto out; + + if (!(ifp->flags & (ADDR_INVALID|DAD_STATUS))) + match = ifp; + } + } + break; + } + } + } + + if (scope == IFA_LINK) + goto out; + + /* + * dev == NULL or search failed for specified dev + */ + + for (i=0; i < IN6_ADDR_HSIZE; i++) { + for (ifp=inet6_addr_lst[i]; ifp; ifp=ifp->lst_next) { + if (ifp->scope == scope) { + if (!(ifp->flags & (ADDR_STATUS|DAD_STATUS))) + goto out; + + if (!(ifp->flags & (ADDR_INVALID|DAD_STATUS))) + match = ifp; + } + } + } + +out: + if (ifp == NULL) + ifp = match; + + err = -ENETUNREACH; + if (ifp) { + memcpy(saddr, &ifp->addr, sizeof(struct in6_addr)); + err = 0; + } + addrconf_unlock(); + return err; +} + +struct inet6_ifaddr * ipv6_get_lladdr(struct device *dev) +{ + struct inet6_ifaddr *ifp = NULL; + struct inet6_dev *idev; + + if ((idev = ipv6_get_idev(dev)) != NULL) { + addrconf_lock(); + for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) { + if (ifp->scope == IFA_LINK) + break; + } + addrconf_unlock(); + } + return ifp; +} + +/* + * Retrieve the ifaddr struct from an v6 address + * Called from ipv6_rcv to check if the address belongs + * to the host. + */ + +struct inet6_ifaddr * ipv6_chk_addr(struct in6_addr *addr, struct device *dev, int nd) +{ + struct inet6_ifaddr * ifp; + u8 hash; + unsigned flags = 0; + + if (!nd) + flags |= DAD_STATUS|ADDR_INVALID; + + addrconf_lock(); + + hash = ipv6_addr_hash(addr); + for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) { + if (ipv6_addr_cmp(&ifp->addr, addr) == 0 && !(ifp->flags&flags)) { + if (dev == NULL || ifp->idev->dev == dev || + !(ifp->scope&(IFA_LINK|IFA_HOST))) + break; + } + } + + addrconf_unlock(); + return ifp; +} + +void addrconf_dad_failure(struct inet6_ifaddr *ifp) +{ + printk(KERN_INFO "%s: duplicate address detected!\n", ifp->idev->dev->name); + del_timer(&ifp->timer); + ipv6_del_addr(ifp); +} + + +/* Join to solicited addr multicast group. */ + +static void addrconf_join_solict(struct device *dev, struct in6_addr *addr) +{ + struct in6_addr maddr; + + if (dev->flags&(IFF_LOOPBACK|IFF_NOARP)) + return; + +#ifndef CONFIG_IPV6_NO_PB + addrconf_addr_solict_mult_old(addr, &maddr); + ipv6_dev_mc_inc(dev, &maddr); +#endif +#ifdef CONFIG_IPV6_EUI64 + addrconf_addr_solict_mult_new(addr, &maddr); + ipv6_dev_mc_inc(dev, &maddr); +#endif +} + +static void addrconf_leave_solict(struct device *dev, struct in6_addr *addr) +{ + struct in6_addr maddr; + + if (dev->flags&(IFF_LOOPBACK|IFF_NOARP)) + return; + +#ifndef CONFIG_IPV6_NO_PB + addrconf_addr_solict_mult_old(addr, &maddr); + ipv6_dev_mc_dec(dev, &maddr); +#endif +#ifdef CONFIG_IPV6_EUI64 + addrconf_addr_solict_mult_new(addr, &maddr); + ipv6_dev_mc_dec(dev, &maddr); +#endif +} + + +#ifdef CONFIG_IPV6_EUI64 +static int ipv6_generate_eui64(u8 *eui, struct device *dev) +{ + switch (dev->type) { + case ARPHRD_ETHER: + if (dev->addr_len != ETH_ALEN) + return -1; + memcpy(eui, dev->dev_addr, 3); + memcpy(eui + 5, dev->dev_addr+3, 3); + eui[3] = 0xFF; + eui[4] = 0xFE; + eui[0] ^= 2; + return 0; + } + return -1; +} + +static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev) +{ + int err = -1; + struct inet6_ifaddr *ifp; + + for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) { + if (ifp->scope == IFA_LINK && !(ifp->flags&(ADDR_STATUS|DAD_STATUS))) { + memcpy(eui, ifp->addr.s6_addr+8, 8); + err = 0; + break; + } + } + return err; +} +#endif + +/* + * Add prefix route. + */ + +static void +addrconf_prefix_route(struct in6_addr *pfx, int plen, struct device *dev, + unsigned long expires, unsigned flags) +{ + struct in6_rtmsg rtmsg; + + memset(&rtmsg, 0, sizeof(rtmsg)); + memcpy(&rtmsg.rtmsg_dst, pfx, sizeof(struct in6_addr)); + rtmsg.rtmsg_dst_len = plen; + rtmsg.rtmsg_metric = IP6_RT_PRIO_ADDRCONF; + rtmsg.rtmsg_ifindex = dev->ifindex; + rtmsg.rtmsg_info = expires; + rtmsg.rtmsg_flags = RTF_UP|flags; + rtmsg.rtmsg_type = RTMSG_NEWROUTE; + + /* Prevent useless cloning on PtP SIT. + This thing is done here expecting that the whole + class of non-broadcast devices need not cloning. + */ + if (dev->type == ARPHRD_SIT && (dev->flags&IFF_POINTOPOINT)) + rtmsg.rtmsg_flags |= RTF_NONEXTHOP; + + ip6_route_add(&rtmsg); +} + +/* Create "default" multicast route to the interface */ + +static void addrconf_add_mroute(struct device *dev) +{ + struct in6_rtmsg rtmsg; + + memset(&rtmsg, 0, sizeof(rtmsg)); + ipv6_addr_set(&rtmsg.rtmsg_dst, + __constant_htonl(0xFF000000), 0, 0, 0); + rtmsg.rtmsg_dst_len = 8; + rtmsg.rtmsg_metric = IP6_RT_PRIO_ADDRCONF; + rtmsg.rtmsg_ifindex = dev->ifindex; + rtmsg.rtmsg_flags = RTF_UP|RTF_ADDRCONF; + rtmsg.rtmsg_type = RTMSG_NEWROUTE; + ip6_route_add(&rtmsg); +} + +static void sit_route_add(struct device *dev) +{ + struct in6_rtmsg rtmsg; + + memset(&rtmsg, 0, sizeof(rtmsg)); + + rtmsg.rtmsg_type = RTMSG_NEWROUTE; + rtmsg.rtmsg_metric = IP6_RT_PRIO_ADDRCONF; + + /* prefix length - 96 bytes "::d.d.d.d" */ + rtmsg.rtmsg_dst_len = 96; + rtmsg.rtmsg_flags = RTF_UP|RTF_NONEXTHOP; + rtmsg.rtmsg_ifindex = dev->ifindex; + + ip6_route_add(&rtmsg); +} + +static void addrconf_add_lroute(struct device *dev) +{ + struct in6_addr addr; + + ipv6_addr_set(&addr, __constant_htonl(0xFE800000), 0, 0, 0); + addrconf_prefix_route(&addr, 10, dev, 0, RTF_ADDRCONF); +} + +static struct inet6_dev *addrconf_add_dev(struct device *dev) +{ + struct inet6_dev *idev; + + if ((idev = ipv6_find_idev(dev)) == NULL) + return NULL; + + /* Add default multicast route */ + addrconf_add_mroute(dev); + + /* Add link local route */ + addrconf_add_lroute(dev); + return idev; +} + +void addrconf_prefix_rcv(struct device *dev, u8 *opt, int len) +{ + struct prefix_info *pinfo; + struct rt6_info *rt; + __u32 valid_lft; + __u32 prefered_lft; + int addr_type; + unsigned long rt_expires; + struct inet6_dev *in6_dev = ipv6_get_idev(dev); + + if (in6_dev == NULL) { + printk(KERN_DEBUG "addrconf: device %s not configured\n", dev->name); + return; + } + + pinfo = (struct prefix_info *) opt; + + if (len < sizeof(struct prefix_info)) { + ADBG(("addrconf: prefix option too short\n")); + return; + } + + /* + * Validation checks ([ADDRCONF], page 19) + */ + + addr_type = ipv6_addr_type(&pinfo->prefix); + + if (addr_type & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL)) + return; + + valid_lft = ntohl(pinfo->valid); + prefered_lft = ntohl(pinfo->prefered); + + if (prefered_lft > valid_lft) { + printk(KERN_WARNING "addrconf: prefix option has invalid lifetime\n"); + return; + } + + /* + * Two things going on here: + * 1) Add routes for on-link prefixes + * 2) Configure prefixes with the auto flag set + */ + + /* Avoid arithemtic overflow. Really, we could + save rt_expires in seconds, likely valid_lft, + but it would require division in fib gc, that it + not good. + */ + if (valid_lft >= 0x7FFFFFFF/HZ) + rt_expires = 0; + else + rt_expires = jiffies + valid_lft * HZ; + + rt = rt6_lookup(&pinfo->prefix, NULL, dev->ifindex, 1); + + if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) { + if (rt->rt6i_flags&RTF_EXPIRES) { + if (pinfo->onlink == 0 || valid_lft == 0) { + ip6_del_rt(rt); + } else { + rt->rt6i_expires = rt_expires; + } + } + } else if (pinfo->onlink && valid_lft) { + addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len, + dev, rt_expires, RTF_ADDRCONF|RTF_EXPIRES); + } + if (rt) + dst_release(&rt->u.dst); + + /* Try to figure out our local address for this prefix */ + + if (pinfo->autoconf && in6_dev->cnf.autoconf) { + struct inet6_ifaddr * ifp; + struct in6_addr addr; + int plen; + + plen = pinfo->prefix_len >> 3; + +#ifdef CONFIG_IPV6_EUI64 + if (pinfo->prefix_len == 64) { + memcpy(&addr, &pinfo->prefix, 8); + if (ipv6_generate_eui64(addr.s6_addr + 8, dev) && + ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) + return; + goto ok; + } +#endif +#ifndef CONFIG_IPV6_NO_PB + if (pinfo->prefix_len == ((sizeof(struct in6_addr) - dev->addr_len)<<3)) { + memcpy(&addr, &pinfo->prefix, plen); + memcpy(addr.s6_addr + plen, dev->dev_addr, + dev->addr_len); + goto ok; + } +#endif + printk(KERN_DEBUG "IPv6 addrconf: prefix with wrong length %d\n", pinfo->prefix_len); + return; + +ok: + ifp = ipv6_chk_addr(&addr, dev, 1); + + if ((ifp == NULL || (ifp->flags&ADDR_INVALID)) && valid_lft) { + + if (ifp == NULL) + ifp = ipv6_add_addr(in6_dev, &addr, addr_type & IPV6_ADDR_SCOPE_MASK); + + if (ifp == NULL) + return; + + ifp->prefix_len = pinfo->prefix_len; + + addrconf_dad_start(ifp); + } + + if (ifp && valid_lft == 0) { + ipv6_del_addr(ifp); + ifp = NULL; + } + + if (ifp) { + int event = 0; + ifp->valid_lft = valid_lft; + ifp->prefered_lft = prefered_lft; + ifp->tstamp = jiffies; + if (ifp->flags & ADDR_INVALID) + event = RTM_NEWADDR; + ifp->flags &= ~(ADDR_DEPRECATED|ADDR_INVALID); + ipv6_ifa_notify(event, ifp); + } + } +} + +/* + * Set destination address. + * Special case for SIT interfaces where we create a new "virtual" + * device. + */ +int addrconf_set_dstaddr(void *arg) +{ + struct in6_ifreq ireq; + struct device *dev; + int err = -EINVAL; + + rtnl_lock(); + + err = -EFAULT; + if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq))) + goto err_exit; + + dev = dev_get_by_index(ireq.ifr6_ifindex); + + err = -ENODEV; + if (dev == NULL) + goto err_exit; + + if (dev->type == ARPHRD_SIT) { + struct ifreq ifr; + mm_segment_t oldfs; + struct ip_tunnel_parm p; + + err = -EADDRNOTAVAIL; + if (!(ipv6_addr_type(&ireq.ifr6_addr) & IPV6_ADDR_COMPATv4)) + goto err_exit; + + memset(&p, 0, sizeof(p)); + p.iph.daddr = ireq.ifr6_addr.s6_addr32[3]; + p.iph.saddr = 0; + p.iph.version = 4; + p.iph.ihl = 5; + p.iph.protocol = IPPROTO_IPV6; + p.iph.ttl = 64; + ifr.ifr_ifru.ifru_data = (void*)&p; + + oldfs = get_fs(); set_fs(KERNEL_DS); + err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL); + set_fs(oldfs); + + if (err == 0) { + err = -ENOBUFS; + if ((dev = dev_get(p.name)) == NULL) + goto err_exit; + err = dev_open(dev); + } + } + +err_exit: + rtnl_unlock(); + return err; +} + +/* + * Manual configuration of address on an interface + */ +static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen) +{ + struct inet6_ifaddr *ifp; + struct inet6_dev *idev; + struct device *dev; + int scope; + + if ((dev = dev_get_by_index(ifindex)) == NULL) + return -ENODEV; + + if (!(dev->flags&IFF_UP)) + return -ENETDOWN; + + if ((idev = addrconf_add_dev(dev)) == NULL) + return -ENOBUFS; + + scope = ipv6_addr_scope(pfx); + + addrconf_lock(); + if ((ifp = ipv6_add_addr(idev, pfx, scope)) != NULL) { + ifp->prefix_len = plen; + ifp->flags |= ADDR_PERMANENT; + addrconf_dad_start(ifp); + addrconf_unlock(); + return 0; + } + addrconf_unlock(); + + return -ENOBUFS; +} + +static int inet6_addr_del(int ifindex, struct in6_addr *pfx, int plen) +{ + struct inet6_ifaddr *ifp; + struct inet6_dev *idev; + struct device *dev; + + if ((dev = dev_get_by_index(ifindex)) == NULL) + return -ENODEV; + + if ((idev = ipv6_get_idev(dev)) == NULL) + return -ENXIO; + + start_bh_atomic(); + for (ifp = idev->addr_list; ifp; ifp=ifp->if_next) { + if (ifp->prefix_len == plen && + (!memcmp(pfx, &ifp->addr, sizeof(struct in6_addr)))) { + ipv6_del_addr(ifp); + end_bh_atomic(); + + /* If the last address is deleted administratively, + disable IPv6 on this interface. + */ + if (idev->addr_list == NULL) + addrconf_ifdown(idev->dev, 1); + return 0; + } + } + end_bh_atomic(); + return -EADDRNOTAVAIL; +} + + +int addrconf_add_ifaddr(void *arg) +{ + struct in6_ifreq ireq; + int err; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq))) + return -EFAULT; + + rtnl_lock(); + err = inet6_addr_add(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen); + rtnl_unlock(); + return err; +} + +int addrconf_del_ifaddr(void *arg) +{ + struct in6_ifreq ireq; + int err; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq))) + return -EFAULT; + + rtnl_lock(); + err = inet6_addr_del(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen); + rtnl_unlock(); + return err; +} + +static void sit_add_v4_addrs(struct inet6_dev *idev) +{ + struct inet6_ifaddr * ifp; + struct in6_addr addr; + struct device *dev; + int scope; + + memset(&addr, 0, sizeof(struct in6_addr)); + memcpy(&addr.s6_addr32[3], idev->dev->dev_addr, 4); + + if (idev->dev->flags&IFF_POINTOPOINT) { + addr.s6_addr32[0] = __constant_htonl(0xfe800000); + scope = IFA_LINK; + } else { + scope = IPV6_ADDR_COMPATv4; + } + + if (addr.s6_addr32[3]) { + addrconf_lock(); + ifp = ipv6_add_addr(idev, &addr, scope); + if (ifp) { + ifp->flags |= ADDR_PERMANENT; + ifp->prefix_len = 128; + ipv6_ifa_notify(RTM_NEWADDR, ifp); + } + addrconf_unlock(); + return; + } + + for (dev = dev_base; dev != NULL; dev = dev->next) { + if (dev->ip_ptr && (dev->flags & IFF_UP)) { + struct in_device * in_dev = dev->ip_ptr; + struct in_ifaddr * ifa; + + int flag = scope; + + for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { + addr.s6_addr32[3] = ifa->ifa_local; + + if (ifa->ifa_scope == RT_SCOPE_LINK) + continue; + if (ifa->ifa_scope >= RT_SCOPE_HOST) { + if (idev->dev->flags&IFF_POINTOPOINT) + continue; + flag |= IFA_HOST; + } + + addrconf_lock(); + ifp = ipv6_add_addr(idev, &addr, flag); + if (ifp) { + if (idev->dev->flags&IFF_POINTOPOINT) + ifp->prefix_len = 10; + else + ifp->prefix_len = 96; + ifp->flags |= ADDR_PERMANENT; + ipv6_ifa_notify(RTM_NEWADDR, ifp); + } + addrconf_unlock(); + } + } + } +} + +static void init_loopback(struct device *dev) +{ + struct in6_addr addr; + struct inet6_dev *idev; + struct inet6_ifaddr * ifp; + + /* ::1 */ + + memset(&addr, 0, sizeof(struct in6_addr)); + addr.s6_addr[15] = 1; + + if ((idev = ipv6_find_idev(dev)) == NULL) { + printk(KERN_DEBUG "init loopback: add_dev failed\n"); + return; + } + + addrconf_lock(); + ifp = ipv6_add_addr(idev, &addr, IFA_HOST); + + if (ifp) { + ifp->flags |= ADDR_PERMANENT; + ifp->prefix_len = 128; + ipv6_ifa_notify(RTM_NEWADDR, ifp); + } + addrconf_unlock(); +} + +static void addrconf_add_linklocal(struct inet6_dev *idev, struct in6_addr *addr) +{ + struct inet6_ifaddr * ifp; + + addrconf_lock(); + ifp = ipv6_add_addr(idev, addr, IFA_LINK); + if (ifp) { + ifp->flags = ADDR_PERMANENT; + ifp->prefix_len = 10; + addrconf_dad_start(ifp); + } + addrconf_unlock(); +} + +static void addrconf_dev_config(struct device *dev) +{ + struct in6_addr addr; + struct inet6_dev * idev; + + if (dev->type != ARPHRD_ETHER) { + /* Alas, we support only Ethernet autoconfiguration. */ + return; + } + + idev = addrconf_add_dev(dev); + if (idev == NULL) + return; + +#ifdef CONFIG_IPV6_EUI64 + memset(&addr, 0, sizeof(struct in6_addr)); + + addr.s6_addr[0] = 0xFE; + addr.s6_addr[1] = 0x80; + + if (ipv6_generate_eui64(addr.s6_addr + 8, dev) == 0) + addrconf_add_linklocal(idev, &addr); +#endif + +#ifndef CONFIG_IPV6_NO_PB + memset(&addr, 0, sizeof(struct in6_addr)); + + addr.s6_addr[0] = 0xFE; + addr.s6_addr[1] = 0x80; + + memcpy(addr.s6_addr + (sizeof(struct in6_addr) - dev->addr_len), + dev->dev_addr, dev->addr_len); + addrconf_add_linklocal(idev, &addr); +#endif +} + +static void addrconf_sit_config(struct device *dev) +{ + struct inet6_dev *idev; + + /* + * Configure the tunnel with one of our IPv4 + * addresses... we should configure all of + * our v4 addrs in the tunnel + */ + + if ((idev = ipv6_find_idev(dev)) == NULL) { + printk(KERN_DEBUG "init sit: add_dev failed\n"); + return; + } + + sit_add_v4_addrs(idev); + + if (dev->flags&IFF_POINTOPOINT) { + addrconf_add_mroute(dev); + addrconf_add_lroute(dev); + } else + sit_route_add(dev); +} + + +int addrconf_notify(struct notifier_block *this, unsigned long event, + void * data) +{ + struct device *dev; + + dev = (struct device *) data; + + switch(event) { + case NETDEV_UP: + switch(dev->type) { + case ARPHRD_SIT: + addrconf_sit_config(dev); + break; + + case ARPHRD_LOOPBACK: + init_loopback(dev); + break; + + default: + addrconf_dev_config(dev); + break; + }; + +#ifdef CONFIG_IPV6_NETLINK + rt6_sndmsg(RTMSG_NEWDEVICE, NULL, NULL, NULL, dev, 0, 0, 0, 0); +#endif + break; + + case NETDEV_CHANGEMTU: + if (dev->mtu >= IPV6_MIN_MTU) { + struct inet6_dev *idev; + + if ((idev = ipv6_get_idev(dev)) == NULL) + break; + idev->cnf.mtu6 = dev->mtu; + rt6_mtu_change(dev, dev->mtu); + break; + } + + /* MTU falled under IPV6_MIN_MTU. Stop IPv6 on this interface. */ + + case NETDEV_DOWN: + case NETDEV_UNREGISTER: + /* + * Remove all addresses from this interface. + */ + if (addrconf_ifdown(dev, event != NETDEV_DOWN) == 0) { +#ifdef CONFIG_IPV6_NETLINK + rt6_sndmsg(RTMSG_DELDEVICE, NULL, NULL, NULL, dev, 0, 0, 0, 0); +#endif + } + + break; + case NETDEV_CHANGE: + break; + }; + + return NOTIFY_OK; +} + +static int addrconf_ifdown(struct device *dev, int how) +{ + struct inet6_dev *idev, **bidev; + struct inet6_ifaddr *ifa, **bifa; + int i, hash; + + rt6_ifdown(dev); + neigh_ifdown(&nd_tbl, dev); + + idev = ipv6_get_idev(dev); + if (idev == NULL) + return -ENODEV; + + start_bh_atomic(); + + /* Discard address list */ + + idev->addr_list = NULL; + + /* + * Clean addresses hash table + */ + + for (i=0; i<16; i++) { + bifa = &inet6_addr_lst[i]; + + while ((ifa = *bifa) != NULL) { + if (ifa->idev == idev) { + *bifa = ifa->lst_next; + del_timer(&ifa->timer); + ipv6_ifa_notify(RTM_DELADDR, ifa); + kfree(ifa); + continue; + } + bifa = &ifa->lst_next; + } + } + + /* Discard multicast list */ + + if (how == 1) + ipv6_mc_destroy_dev(idev); + else + ipv6_mc_down(idev); + + /* Delete device from device hash table (if unregistered) */ + + if (how == 1) { + hash = ipv6_devindex_hash(dev->ifindex); + + for (bidev = &inet6_dev_lst[hash]; (idev=*bidev) != NULL; bidev = &idev->next) { + if (idev->dev == dev) { + *bidev = idev->next; + neigh_parms_release(&nd_tbl, idev->nd_parms); +#ifdef CONFIG_SYSCTL + addrconf_sysctl_unregister(&idev->cnf); +#endif + kfree(idev); + break; + } + } + } + end_bh_atomic(); + return 0; +} + + +static void addrconf_rs_timer(unsigned long data) +{ + struct inet6_ifaddr *ifp; + + ifp = (struct inet6_ifaddr *) data; + + if (ifp->idev->cnf.forwarding) + return; + + if (ifp->idev->if_flags & IF_RA_RCVD) { + /* + * Announcement received after solicitation + * was sent + */ + return; + } + + if (ifp->probes++ <= ifp->idev->cnf.rtr_solicits) { + struct in6_addr all_routers; + + ipv6_addr_all_routers(&all_routers); + + ndisc_send_rs(ifp->idev->dev, &ifp->addr, &all_routers); + + ifp->timer.function = addrconf_rs_timer; + ifp->timer.expires = (jiffies + + ifp->idev->cnf.rtr_solicit_interval); + add_timer(&ifp->timer); + } else { + struct in6_rtmsg rtmsg; + + printk(KERN_DEBUG "%s: no IPv6 routers present\n", + ifp->idev->dev->name); + + memset(&rtmsg, 0, sizeof(struct in6_rtmsg)); + rtmsg.rtmsg_type = RTMSG_NEWROUTE; + rtmsg.rtmsg_metric = IP6_RT_PRIO_ADDRCONF; + rtmsg.rtmsg_flags = (RTF_ALLONLINK | RTF_ADDRCONF | + RTF_DEFAULT | RTF_UP); + + rtmsg.rtmsg_ifindex = ifp->idev->dev->ifindex; + + ip6_route_add(&rtmsg); + } +} + +/* + * Duplicate Address Detection + */ +static void addrconf_dad_start(struct inet6_ifaddr *ifp) +{ + struct device *dev; + unsigned long rand_num; + + dev = ifp->idev->dev; + + addrconf_join_solict(dev, &ifp->addr); + + if (ifp->prefix_len != 128 && (ifp->flags&ADDR_PERMANENT)) + addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev, 0, RTF_ADDRCONF); + + if (dev->flags&(IFF_NOARP|IFF_LOOPBACK)) { + start_bh_atomic(); + ifp->flags &= ~DAD_INCOMPLETE; + addrconf_dad_completed(ifp); + end_bh_atomic(); + return; + } + + net_srandom(ifp->addr.s6_addr32[3]); + + ifp->probes = ifp->idev->cnf.dad_transmits; + ifp->flags |= DAD_INCOMPLETE; + + rand_num = net_random() % ifp->idev->cnf.rtr_solicit_delay; + + ifp->timer.function = addrconf_dad_timer; + ifp->timer.expires = jiffies + rand_num; + + add_timer(&ifp->timer); +} + +static void addrconf_dad_timer(unsigned long data) +{ + struct inet6_ifaddr *ifp; + struct in6_addr unspec; + struct in6_addr mcaddr; + + ifp = (struct inet6_ifaddr *) data; + + if (ifp->probes == 0) { + /* + * DAD was successful + */ + + ifp->flags &= ~DAD_INCOMPLETE; + addrconf_dad_completed(ifp); + return; + } + + ifp->probes--; + + /* send a neighbour solicitation for our addr */ + memset(&unspec, 0, sizeof(unspec)); +#ifdef CONFIG_IPV6_EUI64 + addrconf_addr_solict_mult_new(&ifp->addr, &mcaddr); + ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &unspec); +#endif +#ifndef CONFIG_IPV6_NO_PB + addrconf_addr_solict_mult_old(&ifp->addr, &mcaddr); + ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &unspec); +#endif + + ifp->timer.expires = jiffies + ifp->idev->cnf.rtr_solicit_interval; + add_timer(&ifp->timer); +} + +static void addrconf_dad_completed(struct inet6_ifaddr *ifp) +{ + struct device * dev = ifp->idev->dev; + + /* + * Configure the address for reception. Now it is valid. + */ + + ipv6_ifa_notify(RTM_NEWADDR, ifp); + + /* If added prefix is link local and forwarding is off, + start sending router solicitations. + */ + + if (ifp->idev->cnf.forwarding == 0 && + (dev->flags&IFF_LOOPBACK) == 0 && + (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) { + struct in6_addr all_routers; + + ipv6_addr_all_routers(&all_routers); + + /* + * If a host as already performed a random delay + * [...] as part of DAD [...] there is no need + * to delay again before sending the first RS + */ + ndisc_send_rs(ifp->idev->dev, &ifp->addr, &all_routers); + + ifp->probes = 1; + ifp->timer.function = addrconf_rs_timer; + ifp->timer.expires = (jiffies + + ifp->idev->cnf.rtr_solicit_interval); + ifp->idev->if_flags |= IF_RS_SENT; + add_timer(&ifp->timer); + } +} + +#ifdef CONFIG_PROC_FS +static int iface_proc_info(char *buffer, char **start, off_t offset, + int length, int dummy) +{ + struct inet6_ifaddr *ifp; + int i; + int len = 0; + off_t pos=0; + off_t begin=0; + + addrconf_lock(); + + for (i=0; i < IN6_ADDR_HSIZE; i++) { + for (ifp=inet6_addr_lst[i]; ifp; ifp=ifp->lst_next) { + int j; + + for (j=0; j<16; j++) { + sprintf(buffer + len, "%02x", + ifp->addr.s6_addr[j]); + len += 2; + } + + len += sprintf(buffer + len, + " %02x %02x %02x %02x %8s\n", + ifp->idev->dev->ifindex, + ifp->prefix_len, + ifp->scope, + ifp->flags, + ifp->idev->dev->name); + pos=begin+len; + if(pos<offset) { + len=0; + begin=pos; + } + if(pos>offset+length) + goto done; + } + } + +done: + addrconf_unlock(); + + *start=buffer+(offset-begin); + len-=(offset-begin); + if(len>length) + len=length; + if(len<0) + len=0; + return len; +} + +struct proc_dir_entry iface_proc_entry = +{ + 0, 8, "if_inet6", + S_IFREG | S_IRUGO, 1, 0, 0, + 0, NULL, + &iface_proc_info +}; +#endif /* CONFIG_PROC_FS */ + +/* + * Periodic address status verification + */ + +void addrconf_verify(unsigned long foo) +{ + struct inet6_ifaddr *ifp; + unsigned long now = jiffies; + int i; + + if (atomic_read(&addr_list_lock)) { + addr_chk_timer.expires = jiffies + 1*HZ; + add_timer(&addr_chk_timer); + return; + } + + for (i=0; i < IN6_ADDR_HSIZE; i++) { + for (ifp=inet6_addr_lst[i]; ifp;) { + if (ifp->flags & ADDR_INVALID) { + struct inet6_ifaddr *bp = ifp; + ifp= ifp->lst_next; + ipv6_del_addr(bp); + continue; + } + if (!(ifp->flags & ADDR_PERMANENT)) { + struct inet6_ifaddr *bp; + unsigned long age; + + age = (now - ifp->tstamp) / HZ; + + bp = ifp; + ifp= ifp->lst_next; + + if (age > bp->valid_lft) + ipv6_del_addr(bp); + else if (age > bp->prefered_lft) { + bp->flags |= ADDR_DEPRECATED; + ipv6_ifa_notify(0, bp); + } + + continue; + } + ifp = ifp->lst_next; + } + } + + addr_chk_timer.expires = jiffies + ADDR_CHECK_FREQUENCY; + add_timer(&addr_chk_timer); +} + +#ifdef CONFIG_RTNETLINK + +static int +inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +{ + struct rtattr **rta = arg; + struct ifaddrmsg *ifm = NLMSG_DATA(nlh); + struct in6_addr *pfx; + + pfx = NULL; + if (rta[IFA_ADDRESS-1]) { + if (RTA_PAYLOAD(rta[IFA_ADDRESS-1]) < sizeof(*pfx)) + return -EINVAL; + pfx = RTA_DATA(rta[IFA_ADDRESS-1]); + } + if (rta[IFA_LOCAL-1]) { + if (pfx && memcmp(pfx, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*pfx))) + return -EINVAL; + pfx = RTA_DATA(rta[IFA_LOCAL-1]); + } + if (pfx == NULL) + return -EINVAL; + + return inet6_addr_del(ifm->ifa_index, pfx, ifm->ifa_prefixlen); +} + +static int +inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +{ + struct rtattr **rta = arg; + struct ifaddrmsg *ifm = NLMSG_DATA(nlh); + struct in6_addr *pfx; + + pfx = NULL; + if (rta[IFA_ADDRESS-1]) { + if (RTA_PAYLOAD(rta[IFA_ADDRESS-1]) < sizeof(*pfx)) + return -EINVAL; + pfx = RTA_DATA(rta[IFA_ADDRESS-1]); + } + if (rta[IFA_LOCAL-1]) { + if (pfx && memcmp(pfx, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*pfx))) + return -EINVAL; + pfx = RTA_DATA(rta[IFA_LOCAL-1]); + } + if (pfx == NULL) + return -EINVAL; + + return inet6_addr_add(ifm->ifa_index, pfx, ifm->ifa_prefixlen); +} + +static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, + u32 pid, u32 seq, int event) +{ + struct ifaddrmsg *ifm; + struct nlmsghdr *nlh; + struct ifa_cacheinfo ci; + unsigned char *b = skb->tail; + + nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ifm)); + ifm = NLMSG_DATA(nlh); + ifm->ifa_family = AF_INET6; + ifm->ifa_prefixlen = ifa->prefix_len; + ifm->ifa_flags = ifa->flags & ~ADDR_INVALID; + ifm->ifa_scope = RT_SCOPE_UNIVERSE; + if (ifa->scope&IFA_HOST) + ifm->ifa_scope = RT_SCOPE_HOST; + else if (ifa->scope&IFA_LINK) + ifm->ifa_scope = RT_SCOPE_LINK; + else if (ifa->scope&IFA_SITE) + ifm->ifa_scope = RT_SCOPE_SITE; + ifm->ifa_index = ifa->idev->dev->ifindex; + RTA_PUT(skb, IFA_ADDRESS, 16, &ifa->addr); + if (!(ifa->flags&IFA_F_PERMANENT)) { + ci.ifa_prefered = ifa->prefered_lft; + ci.ifa_valid = ifa->valid_lft; + if (ci.ifa_prefered != 0xFFFFFFFF) { + long tval = (jiffies - ifa->tstamp)/HZ; + ci.ifa_prefered -= tval; + if (ci.ifa_valid != 0xFFFFFFFF) + ci.ifa_valid -= tval; + } + RTA_PUT(skb, IFA_CACHEINFO, sizeof(ci), &ci); + } + nlh->nlmsg_len = skb->tail - b; + return skb->len; + +nlmsg_failure: +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; +} + +static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) +{ + int idx, ip_idx; + int s_idx, s_ip_idx; + struct inet6_ifaddr *ifa; + + s_idx = cb->args[0]; + s_ip_idx = ip_idx = cb->args[1]; + + for (idx=0; idx < IN6_ADDR_HSIZE; idx++) { + if (idx < s_idx) + continue; + if (idx > s_idx) + s_ip_idx = 0; + start_bh_atomic(); + for (ifa=inet6_addr_lst[idx], ip_idx = 0; ifa; + ifa = ifa->lst_next, ip_idx++) { + if (ip_idx < s_ip_idx) + continue; + if (inet6_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, RTM_NEWADDR) <= 0) { + end_bh_atomic(); + goto done; + } + } + end_bh_atomic(); + } +done: + cb->args[0] = idx; + cb->args[1] = ip_idx; + + return skb->len; +} + +static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa) +{ + struct sk_buff *skb; + int size = NLMSG_SPACE(sizeof(struct ifaddrmsg)+128); + + skb = alloc_skb(size, GFP_ATOMIC); + if (!skb) { + netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFADDR, ENOBUFS); + return; + } + if (inet6_fill_ifaddr(skb, ifa, 0, 0, event) < 0) { + kfree_skb(skb); + netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFADDR, EINVAL); + return; + } + NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_IFADDR; + netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_IFADDR, GFP_ATOMIC); +} + +static struct rtnetlink_link inet6_rtnetlink_table[RTM_MAX-RTM_BASE+1] = +{ + { NULL, NULL, }, + { NULL, NULL, }, + { NULL, NULL, }, + { NULL, NULL, }, + + { inet6_rtm_newaddr, NULL, }, + { inet6_rtm_deladdr, NULL, }, + { NULL, inet6_dump_ifaddr, }, + { NULL, NULL, }, + + { inet6_rtm_newroute, NULL, }, + { inet6_rtm_delroute, NULL, }, + { inet6_rtm_getroute, inet6_dump_fib, }, + { NULL, NULL, }, +}; +#endif + +static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) +{ +#ifdef CONFIG_RTNETLINK + inet6_ifa_notify(event ? : RTM_NEWADDR, ifp); +#endif + switch (event) { + case RTM_NEWADDR: + ip6_rt_addr_add(&ifp->addr, ifp->idev->dev); + break; + case RTM_DELADDR: + start_bh_atomic(); + addrconf_leave_solict(ifp->idev->dev, &ifp->addr); + if (ipv6_chk_addr(&ifp->addr, ifp->idev->dev, 0) == NULL) + ip6_rt_addr_del(&ifp->addr, ifp->idev->dev); + end_bh_atomic(); + break; + } +} + +#ifdef CONFIG_SYSCTL + +static +int addrconf_sysctl_forward(ctl_table *ctl, int write, struct file * filp, + void *buffer, size_t *lenp) +{ + int *valp = ctl->data; + int val = *valp; + int ret; + + ret = proc_dointvec(ctl, write, filp, buffer, lenp); + + if (write && *valp != val && valp != &ipv6_devconf_dflt.forwarding) { + struct inet6_dev *idev = NULL; + + if (valp != &ipv6_devconf.forwarding) { + struct device *dev = dev_get_by_index(ctl->ctl_name); + if (dev) + idev = ipv6_get_idev(dev); + if (idev == NULL) + return ret; + } else + ipv6_devconf_dflt.forwarding = ipv6_devconf.forwarding; + + addrconf_forward_change(idev); + + if (*valp) { + start_bh_atomic(); + rt6_purge_dflt_routers(0); + end_bh_atomic(); + } + } + + return ret; +} + +static struct addrconf_sysctl_table +{ + struct ctl_table_header *sysctl_header; + ctl_table addrconf_vars[11]; + ctl_table addrconf_dev[2]; + ctl_table addrconf_conf_dir[2]; + ctl_table addrconf_proto_dir[2]; + ctl_table addrconf_root_dir[2]; +} addrconf_sysctl = { + NULL, + {{NET_IPV6_FORWARDING, "forwarding", + &ipv6_devconf.forwarding, sizeof(int), 0644, NULL, + &addrconf_sysctl_forward}, + + {NET_IPV6_HOP_LIMIT, "hop_limit", + &ipv6_devconf.hop_limit, sizeof(int), 0644, NULL, + &proc_dointvec}, + + {NET_IPV6_MTU, "mtu", + &ipv6_devconf.mtu6, sizeof(int), 0644, NULL, + &proc_dointvec}, + + {NET_IPV6_ACCEPT_RA, "accept_ra", + &ipv6_devconf.accept_ra, sizeof(int), 0644, NULL, + &proc_dointvec}, + + {NET_IPV6_ACCEPT_REDIRECTS, "accept_redirects", + &ipv6_devconf.accept_redirects, sizeof(int), 0644, NULL, + &proc_dointvec}, + + {NET_IPV6_AUTOCONF, "autoconf", + &ipv6_devconf.autoconf, sizeof(int), 0644, NULL, + &proc_dointvec}, + + {NET_IPV6_DAD_TRANSMITS, "dad_transmits", + &ipv6_devconf.dad_transmits, sizeof(int), 0644, NULL, + &proc_dointvec}, + + {NET_IPV6_RTR_SOLICITS, "router_solicitations", + &ipv6_devconf.rtr_solicits, sizeof(int), 0644, NULL, + &proc_dointvec}, + + {NET_IPV6_RTR_SOLICIT_INTERVAL, "router_solicitation_interval", + &ipv6_devconf.rtr_solicit_interval, sizeof(int), 0644, NULL, + &proc_dointvec_jiffies}, + + {NET_IPV6_RTR_SOLICIT_DELAY, "router_solicitation_delay", + &ipv6_devconf.rtr_solicit_delay, sizeof(int), 0644, NULL, + &proc_dointvec_jiffies}, + + {0}}, + + {{NET_PROTO_CONF_ALL, "all", NULL, 0, 0555, addrconf_sysctl.addrconf_vars},{0}}, + {{NET_IPV6_CONF, "conf", NULL, 0, 0555, addrconf_sysctl.addrconf_dev},{0}}, + {{NET_IPV6, "ipv6", NULL, 0, 0555, addrconf_sysctl.addrconf_conf_dir},{0}}, + {{CTL_NET, "net", NULL, 0, 0555, addrconf_sysctl.addrconf_proto_dir},{0}} +}; + +static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf *p) +{ + int i; + struct device *dev = idev ? idev->dev : NULL; + struct addrconf_sysctl_table *t; + + t = kmalloc(sizeof(*t), GFP_KERNEL); + if (t == NULL) + return; + memcpy(t, &addrconf_sysctl, sizeof(*t)); + for (i=0; i<sizeof(t->addrconf_vars)/sizeof(t->addrconf_vars[0])-1; i++) { + t->addrconf_vars[i].data += (char*)p - (char*)&ipv6_devconf; + t->addrconf_vars[i].de = NULL; + } + if (dev) { + t->addrconf_dev[0].procname = dev->name; + t->addrconf_dev[0].ctl_name = dev->ifindex; + } else { + t->addrconf_dev[0].procname = "default"; + t->addrconf_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT; + } + t->addrconf_dev[0].child = t->addrconf_vars; + t->addrconf_dev[0].de = NULL; + t->addrconf_conf_dir[0].child = t->addrconf_dev; + t->addrconf_conf_dir[0].de = NULL; + t->addrconf_proto_dir[0].child = t->addrconf_conf_dir; + t->addrconf_proto_dir[0].de = NULL; + t->addrconf_root_dir[0].child = t->addrconf_proto_dir; + t->addrconf_root_dir[0].de = NULL; + + t->sysctl_header = register_sysctl_table(t->addrconf_root_dir, 0); + if (t->sysctl_header == NULL) + kfree(t); + else + p->sysctl = t; +} + +static void addrconf_sysctl_unregister(struct ipv6_devconf *p) +{ + if (p->sysctl) { + struct addrconf_sysctl_table *t = p->sysctl; + p->sysctl = NULL; + unregister_sysctl_table(t->sysctl_header); + kfree(t); + } +} + + +#endif + +/* + * Init / cleanup code + */ + +__initfunc(void addrconf_init(void)) +{ +#ifdef MODULE + struct device *dev; + + /* This takes sense only during module load. */ + + for (dev = dev_base; dev; dev = dev->next) { + if (!(dev->flags&IFF_UP)) + continue; + + switch (dev->type) { + case ARPHRD_LOOPBACK: + init_loopback(dev); + break; + case ARPHRD_ETHER: + addrconf_dev_config(dev); + break; + default: + /* Ignore all other */ + } + } +#endif + +#ifdef CONFIG_PROC_FS + proc_net_register(&iface_proc_entry); +#endif + + addr_chk_timer.expires = jiffies + ADDR_CHECK_FREQUENCY; + add_timer(&addr_chk_timer); +#ifdef CONFIG_RTNETLINK + rtnetlink_links[PF_INET6] = inet6_rtnetlink_table; +#endif +#ifdef CONFIG_SYSCTL + addrconf_sysctl.sysctl_header = + register_sysctl_table(addrconf_sysctl.addrconf_root_dir, 0); + addrconf_sysctl_register(NULL, &ipv6_devconf_dflt); +#endif +} + +#ifdef MODULE +void addrconf_cleanup(void) +{ + struct inet6_dev *idev; + struct inet6_ifaddr *ifa; + int i; + +#ifdef CONFIG_RTNETLINK + rtnetlink_links[PF_INET6] = NULL; +#endif +#ifdef CONFIG_SYSCTL + addrconf_sysctl_unregister(&ipv6_devconf_dflt); + addrconf_sysctl_unregister(&ipv6_devconf); +#endif + + del_timer(&addr_chk_timer); + + /* + * clean dev list. + */ + + for (i=0; i < IN6_ADDR_HSIZE; i++) { + struct inet6_dev *next; + for (idev = inet6_dev_lst[i]; idev; idev = next) { + next = idev->next; + addrconf_ifdown(idev->dev, 1); + } + } + + start_bh_atomic(); + /* + * clean addr_list + */ + + for (i=0; i < IN6_ADDR_HSIZE; i++) { + for (ifa=inet6_addr_lst[i]; ifa; ) { + struct inet6_ifaddr *bifa; + + bifa = ifa; + ifa = ifa->lst_next; + printk(KERN_DEBUG "bug: IPv6 address leakage detected: ifa=%p\n", bifa); + /* Do not free it; something is wrong. + Now we can investigate it with debugger. + */ + } + } + end_bh_atomic(); + +#ifdef CONFIG_PROC_FS + proc_net_unregister(iface_proc_entry.low_ino); +#endif +} +#endif /* MODULE */ diff --git a/pfinet/linux-src/net/ipv6/af_inet6.c b/pfinet/linux-src/net/ipv6/af_inet6.c new file mode 100644 index 00000000..4ad6403c --- /dev/null +++ b/pfinet/linux-src/net/ipv6/af_inet6.c @@ -0,0 +1,610 @@ +/* + * PF_INET6 socket protocol family + * Linux INET6 implementation + * + * Authors: + * Pedro Roque <roque@di.fc.ul.pt> + * + * Adapted from linux/net/ipv4/af_inet.c + * + * $Id: af_inet6.c,v 1.1 2007/10/08 21:12:30 stesie Exp $ + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + + +#include <linux/module.h> +#include <linux/config.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/in.h> +#include <linux/kernel.h> +#include <linux/major.h> +#include <linux/sched.h> +#include <linux/timer.h> +#include <linux/string.h> +#include <linux/sockios.h> +#include <linux/net.h> +#include <linux/fcntl.h> +#include <linux/mm.h> +#include <linux/interrupt.h> +#include <linux/proc_fs.h> +#include <linux/stat.h> +#include <linux/init.h> +#include <linux/version.h> + +#include <linux/inet.h> +#include <linux/netdevice.h> +#include <linux/icmpv6.h> + +#include <net/ip.h> +#include <net/ipv6.h> +#include <net/udp.h> +#include <net/tcp.h> +#include <net/ipip.h> +#include <net/protocol.h> +#include <net/inet_common.h> +#include <net/transp_v6.h> +#include <net/ip6_route.h> +#include <net/addrconf.h> + +#include <asm/uaccess.h> +#include <asm/system.h> + +#ifdef MODULE +static int unloadable = 0; /* XX: Turn to one when all is ok within the + module for allowing unload */ +#endif + +#if defined(MODULE) && LINUX_VERSION_CODE > 0x20115 +MODULE_AUTHOR("Cast of dozens"); +MODULE_DESCRIPTION("IPv6 protocol stack for Linux"); +MODULE_PARM(unloadable, "i"); +#endif + +extern struct proto_ops inet6_stream_ops; +extern struct proto_ops inet6_dgram_ops; + +/* IPv6 procfs goodies... */ + +#ifdef CONFIG_PROC_FS +extern int raw6_get_info(char *, char **, off_t, int, int); +extern int tcp6_get_info(char *, char **, off_t, int, int); +extern int udp6_get_info(char *, char **, off_t, int, int); +extern int afinet6_get_info(char *, char **, off_t, int, int); +extern int afinet6_get_snmp(char *, char **, off_t, int, int); +#endif + +#ifdef CONFIG_SYSCTL +extern void ipv6_sysctl_register(void); +extern void ipv6_sysctl_unregister(void); +#endif + +static int inet6_create(struct socket *sock, int protocol) +{ + struct sock *sk; + struct proto *prot; + + sk = sk_alloc(PF_INET6, GFP_KERNEL, 1); + if (sk == NULL) + goto do_oom; + + if(sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET) { + if (protocol && protocol != IPPROTO_TCP) + goto free_and_noproto; + protocol = IPPROTO_TCP; + prot = &tcpv6_prot; + sock->ops = &inet6_stream_ops; + } else if(sock->type == SOCK_DGRAM) { + if (protocol && protocol != IPPROTO_UDP) + goto free_and_noproto; + protocol = IPPROTO_UDP; + sk->no_check = UDP_NO_CHECK; + prot=&udpv6_prot; + sock->ops = &inet6_dgram_ops; + } else if(sock->type == SOCK_RAW) { + if (!capable(CAP_NET_RAW)) + goto free_and_badperm; + if (!protocol) + goto free_and_noproto; + prot = &rawv6_prot; + sock->ops = &inet6_dgram_ops; + sk->reuse = 1; + sk->num = protocol; + } else { + goto free_and_badtype; + } + + sock_init_data(sock, sk); + + sk->destruct = NULL; + sk->zapped = 0; + sk->family = PF_INET6; + sk->protocol = protocol; + + sk->prot = prot; + sk->backlog_rcv = prot->backlog_rcv; + + sk->timer.data = (unsigned long)sk; + sk->timer.function = &net_timer; + + sk->net_pinfo.af_inet6.hop_limit = -1; + sk->net_pinfo.af_inet6.mcast_hops = -1; + sk->net_pinfo.af_inet6.mc_loop = 1; + sk->net_pinfo.af_inet6.pmtudisc = IPV6_PMTUDISC_WANT; + + /* Init the ipv4 part of the socket since we can have sockets + * using v6 API for ipv4. + */ + sk->ip_ttl = 64; + + sk->ip_mc_loop = 1; + sk->ip_mc_ttl = 1; + sk->ip_mc_index = 0; + sk->ip_mc_list = NULL; + + if (sk->type==SOCK_RAW && protocol==IPPROTO_RAW) + sk->ip_hdrincl=1; + + if (sk->num) { + /* It assumes that any protocol which allows + * the user to assign a number at socket + * creation time automatically shares. + */ + sk->sport = ntohs(sk->num); + sk->prot->hash(sk); + add_to_prot_sklist(sk); + } + + if (sk->prot->init) { + int err = sk->prot->init(sk); + if (err != 0) { + destroy_sock(sk); + return(err); + } + } + MOD_INC_USE_COUNT; + return(0); + +free_and_badtype: + sk_free(sk); + return -ESOCKTNOSUPPORT; +free_and_badperm: + sk_free(sk); + return -EPERM; +free_and_noproto: + sk_free(sk); + return -EPROTONOSUPPORT; +do_oom: + return -ENOBUFS; +} + + +/* bind for INET6 API */ +static int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) +{ + struct sockaddr_in6 *addr=(struct sockaddr_in6 *)uaddr; + struct sock *sk = sock->sk; + __u32 v4addr = 0; + unsigned short snum; + int addr_type = 0; + + /* If the socket has its own bind function then use it. */ + if(sk->prot->bind) + return sk->prot->bind(sk, uaddr, addr_len); + + /* Check these errors (active socket, bad address length, double bind). */ + if ((sk->state != TCP_CLOSE) || + (addr_len < sizeof(struct sockaddr_in6)) || + (sk->num != 0)) + return -EINVAL; + + addr_type = ipv6_addr_type(&addr->sin6_addr); + if ((addr_type & IPV6_ADDR_MULTICAST) && sock->type == SOCK_STREAM) + return(-EINVAL); + + /* Check if the address belongs to the host. */ + if (addr_type == IPV6_ADDR_MAPPED) { + v4addr = addr->sin6_addr.s6_addr32[3]; + if (inet_addr_type(v4addr) != RTN_LOCAL) + return(-EADDRNOTAVAIL); + } else { + if (addr_type != IPV6_ADDR_ANY) { + /* ipv4 addr of the socket is invalid. Only the + * unpecified and mapped address have a v4 equivalent. + */ + v4addr = LOOPBACK4_IPV6; + if (!(addr_type & IPV6_ADDR_MULTICAST)) { + if (ipv6_chk_addr(&addr->sin6_addr, NULL, 0) == NULL) + return(-EADDRNOTAVAIL); + } + } + } + + sk->rcv_saddr = v4addr; + sk->saddr = v4addr; + + memcpy(&sk->net_pinfo.af_inet6.rcv_saddr, &addr->sin6_addr, + sizeof(struct in6_addr)); + + if (!(addr_type & IPV6_ADDR_MULTICAST)) + memcpy(&sk->net_pinfo.af_inet6.saddr, &addr->sin6_addr, + sizeof(struct in6_addr)); + + snum = ntohs(addr->sin6_port); + if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE)) + return(-EACCES); + + /* Make sure we are allowed to bind here. */ + if(sk->prot->get_port(sk, snum) != 0) + return -EADDRINUSE; + + sk->sport = ntohs(sk->num); + sk->dport = 0; + sk->daddr = 0; + sk->prot->hash(sk); + add_to_prot_sklist(sk); + + return(0); +} + +static int inet6_release(struct socket *sock, struct socket *peer) +{ + struct sock *sk = sock->sk; + + if (sk == NULL) + return -EINVAL; + + /* Free mc lists */ + ipv6_sock_mc_close(sk); + + /* Huh! MOD_DEC_USE_COUNT was here :-( + It is impossible by two reasons: socket destroy + may be delayed and inet_release may sleep and + return to nowhere then. It should be moved to + inet6_destroy_sock(), but we have no explicit constructor :-( + --ANK (980802) + */ + MOD_DEC_USE_COUNT; + return inet_release(sock, peer); +} + +int inet6_destroy_sock(struct sock *sk) +{ + struct sk_buff *skb; + struct ipv6_txoptions *opt; + + /* + * Release destination entry + */ + + dst_release(xchg(&sk->dst_cache,NULL)); + + /* Release rx options */ + + if ((skb = xchg(&sk->net_pinfo.af_inet6.pktoptions, NULL)) != NULL) + kfree_skb(skb); + + /* Free flowlabels */ + fl6_free_socklist(sk); + + /* Free tx options */ + + if ((opt = xchg(&sk->net_pinfo.af_inet6.opt, NULL)) != NULL) + sock_kfree_s(sk, opt, opt->tot_len); + + return 0; +} + +/* + * This does both peername and sockname. + */ + +static int inet6_getname(struct socket *sock, struct sockaddr *uaddr, + int *uaddr_len, int peer) +{ + struct sockaddr_in6 *sin=(struct sockaddr_in6 *)uaddr; + struct sock *sk; + + sin->sin6_family = AF_INET6; + sin->sin6_flowinfo = 0; + sk = sock->sk; + if (peer) { + if (!tcp_connected(sk->state)) + return(-ENOTCONN); + sin->sin6_port = sk->dport; + memcpy(&sin->sin6_addr, &sk->net_pinfo.af_inet6.daddr, + sizeof(struct in6_addr)); + if (sk->net_pinfo.af_inet6.sndflow) + sin->sin6_flowinfo = sk->net_pinfo.af_inet6.flow_label; + } else { + if (ipv6_addr_type(&sk->net_pinfo.af_inet6.rcv_saddr) == IPV6_ADDR_ANY) + memcpy(&sin->sin6_addr, + &sk->net_pinfo.af_inet6.saddr, + sizeof(struct in6_addr)); + else + memcpy(&sin->sin6_addr, + &sk->net_pinfo.af_inet6.rcv_saddr, + sizeof(struct in6_addr)); + + sin->sin6_port = sk->sport; + } + *uaddr_len = sizeof(*sin); + return(0); +} + +static int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) +{ + struct sock *sk = sock->sk; + int err = -EINVAL; + int pid; + + switch(cmd) + { + case FIOSETOWN: + case SIOCSPGRP: + err = get_user(pid, (int *) arg); + if(err) + return err; + + /* see sock_no_fcntl */ + if (current->pid != pid && current->pgrp != -pid && + !capable(CAP_NET_ADMIN)) + return -EPERM; + sk->proc = pid; + return(0); + case FIOGETOWN: + case SIOCGPGRP: + err = put_user(sk->proc,(int *)arg); + if(err) + return err; + return(0); + case SIOCGSTAMP: + if(sk->stamp.tv_sec==0) + return -ENOENT; + err = copy_to_user((void *)arg, &sk->stamp, + sizeof(struct timeval)); + if (err) + return -EFAULT; + return 0; + + case SIOCADDRT: + case SIOCDELRT: + + return(ipv6_route_ioctl(cmd,(void *)arg)); + + case SIOCSIFADDR: + return addrconf_add_ifaddr((void *) arg); + case SIOCDIFADDR: + return addrconf_del_ifaddr((void *) arg); + case SIOCSIFDSTADDR: + return addrconf_set_dstaddr((void *) arg); + default: + if ((cmd >= SIOCDEVPRIVATE) && + (cmd <= (SIOCDEVPRIVATE + 15))) + return(dev_ioctl(cmd,(void *) arg)); + + if(sk->prot->ioctl==0 || (err=sk->prot->ioctl(sk, cmd, arg))==-ENOIOCTLCMD) + return(dev_ioctl(cmd,(void *) arg)); + return err; + } + /*NOTREACHED*/ + return(0); +} + +struct proto_ops inet6_stream_ops = { + PF_INET6, + + sock_no_dup, + inet6_release, + inet6_bind, + inet_stream_connect, /* ok */ + sock_no_socketpair, /* a do nothing */ + inet_accept, /* ok */ + inet6_getname, + inet_poll, /* ok */ + inet6_ioctl, /* must change */ + inet_listen, /* ok */ + inet_shutdown, /* ok */ + inet_setsockopt, /* ok */ + inet_getsockopt, /* ok */ + sock_no_fcntl, /* ok */ + inet_sendmsg, /* ok */ + inet_recvmsg /* ok */ +}; + +struct proto_ops inet6_dgram_ops = { + PF_INET6, + + sock_no_dup, + inet6_release, + inet6_bind, + inet_dgram_connect, /* ok */ + sock_no_socketpair, /* a do nothing */ + inet_accept, /* ok */ + inet6_getname, + datagram_poll, /* ok */ + inet6_ioctl, /* must change */ + sock_no_listen, /* ok */ + inet_shutdown, /* ok */ + inet_setsockopt, /* ok */ + inet_getsockopt, /* ok */ + sock_no_fcntl, /* ok */ + inet_sendmsg, /* ok */ + inet_recvmsg /* ok */ +}; + +struct net_proto_family inet6_family_ops = { + PF_INET6, + inet6_create +}; + +#ifdef CONFIG_PROC_FS +static struct proc_dir_entry proc_net_raw6 = { + PROC_NET_RAW6, 4, "raw6", + S_IFREG | S_IRUGO, 1, 0, 0, + 0, &proc_net_inode_operations, + raw6_get_info +}; +static struct proc_dir_entry proc_net_tcp6 = { + PROC_NET_TCP6, 4, "tcp6", + S_IFREG | S_IRUGO, 1, 0, 0, + 0, &proc_net_inode_operations, + tcp6_get_info +}; +static struct proc_dir_entry proc_net_udp6 = { + PROC_NET_RAW6, 4, "udp6", + S_IFREG | S_IRUGO, 1, 0, 0, + 0, &proc_net_inode_operations, + udp6_get_info +}; +static struct proc_dir_entry proc_net_sockstat6 = { + PROC_NET_SOCKSTAT6, 9, "sockstat6", + S_IFREG | S_IRUGO, 1, 0, 0, + 0, &proc_net_inode_operations, + afinet6_get_info +}; +static struct proc_dir_entry proc_net_snmp6 = { + PROC_NET_SNMP6, 5, "snmp6", + S_IFREG | S_IRUGO, 1, 0, 0, + 0, &proc_net_inode_operations, + afinet6_get_snmp +}; +#endif /* CONFIG_PROC_FS */ + +#ifdef MODULE +int ipv6_unload(void) +{ + if (!unloadable) return 1; + /* We keep internally 3 raw sockets */ + return atomic_read(&(__this_module.uc.usecount)) - 3; +} +#endif + +#if defined(MODULE) && defined(CONFIG_SYSCTL) +extern void ipv6_sysctl_register(void); +extern void ipv6_sysctl_unregister(void); +#endif + +#ifdef MODULE +int init_module(void) +#else +__initfunc(void inet6_proto_init(struct net_proto *pro)) +#endif +{ + struct sk_buff *dummy_skb; + int err; + +#ifdef MODULE + if (!mod_member_present(&__this_module, can_unload)) + return -EINVAL; + + __this_module.can_unload = &ipv6_unload; +#endif + + printk(KERN_INFO "IPv6 v0.8 for NET4.0\n"); + + if (sizeof(struct inet6_skb_parm) > sizeof(dummy_skb->cb)) + { + printk(KERN_CRIT "inet6_proto_init: size fault\n"); +#ifdef MODULE + return -EINVAL; +#else + return; +#endif + } + + /* + * ipngwg API draft makes clear that the correct semantics + * for TCP and UDP is to consider one TCP and UDP instance + * in a host availiable by both INET and INET6 APIs and + * able to communicate via both network protocols. + */ + +#if defined(MODULE) && defined(CONFIG_SYSCTL) + ipv6_sysctl_register(); +#endif + err = icmpv6_init(&inet6_family_ops); + if (err) + goto icmp_fail; + err = ndisc_init(&inet6_family_ops); + if (err) + goto ndisc_fail; + err = igmp6_init(&inet6_family_ops); + if (err) + goto igmp_fail; + ipv6_netdev_notif_init(); + ipv6_packet_init(); + ip6_route_init(); + ip6_flowlabel_init(); + addrconf_init(); + sit_init(); + + /* Init v6 transport protocols. */ + udpv6_init(); + tcpv6_init(); + + /* Create /proc/foo6 entries. */ +#ifdef CONFIG_PROC_FS + proc_net_register(&proc_net_raw6); + proc_net_register(&proc_net_tcp6); + proc_net_register(&proc_net_udp6); + proc_net_register(&proc_net_sockstat6); + proc_net_register(&proc_net_snmp6); +#endif + + /* Now the userspace is allowed to create INET6 sockets. */ + (void) sock_register(&inet6_family_ops); + +#ifdef MODULE + return 0; +#else + return; +#endif + +igmp_fail: + ndisc_cleanup(); +ndisc_fail: + icmpv6_cleanup(); +icmp_fail: +#if defined(MODULE) && defined(CONFIG_SYSCTL) + ipv6_sysctl_unregister(); +#endif +#ifdef MODULE + return err; +#else + return; +#endif +} + +#ifdef MODULE +void cleanup_module(void) +{ + /* First of all disallow new sockets creation. */ + sock_unregister(PF_INET6); +#ifdef CONFIG_PROC_FS + proc_net_unregister(proc_net_raw6.low_ino); + proc_net_unregister(proc_net_tcp6.low_ino); + proc_net_unregister(proc_net_udp6.low_ino); + proc_net_unregister(proc_net_sockstat6.low_ino); + proc_net_unregister(proc_net_snmp6.low_ino); +#endif + /* Cleanup code parts. */ + sit_cleanup(); + ipv6_netdev_notif_cleanup(); + ip6_flowlabel_cleanup(); + addrconf_cleanup(); + ip6_route_cleanup(); + ipv6_packet_cleanup(); + igmp6_cleanup(); + ndisc_cleanup(); + icmpv6_cleanup(); +#ifdef CONFIG_SYSCTL + ipv6_sysctl_unregister(); +#endif +} +#endif /* MODULE */ diff --git a/pfinet/linux-src/net/ipv6/datagram_ipv6.c b/pfinet/linux-src/net/ipv6/datagram_ipv6.c new file mode 100644 index 00000000..af38fbcb --- /dev/null +++ b/pfinet/linux-src/net/ipv6/datagram_ipv6.c @@ -0,0 +1,426 @@ +/* + * common UDP/RAW code + * Linux INET6 implementation + * + * Authors: + * Pedro Roque <roque@di.fc.ul.pt> + * + * $Id: datagram_ipv6.c,v 1.1 2007/10/08 21:12:30 stesie Exp $ + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/interrupt.h> +#include <linux/socket.h> +#include <linux/sockios.h> +#include <linux/in6.h> +#include <linux/ipv6.h> +#include <linux/route.h> + +#include <net/ipv6.h> +#include <net/ndisc.h> +#include <net/addrconf.h> +#include <net/transp_v6.h> + +#include <linux/errqueue.h> +#include <asm/uaccess.h> + +void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, + u16 port, u32 info, u8 *payload) +{ + struct icmp6hdr *icmph = (struct icmp6hdr *)skb->h.raw; + struct sock_exterr_skb *serr; + + if (!sk->net_pinfo.af_inet6.recverr) + return; + + skb = skb_clone(skb, GFP_ATOMIC); + if (!skb) + return; + + serr = SKB_EXT_ERR(skb); + serr->ee.ee_errno = err; + serr->ee.ee_origin = SO_EE_ORIGIN_ICMP6; + serr->ee.ee_type = icmph->icmp6_type; + serr->ee.ee_code = icmph->icmp6_code; + serr->ee.ee_pad = 0; + serr->ee.ee_info = info; + serr->ee.ee_data = 0; + serr->addr_offset = (u8*)&(((struct ipv6hdr*)(icmph+1))->daddr) - skb->nh.raw; + serr->port = port; + + skb->h.raw = payload; + skb_pull(skb, payload - skb->data); + + if (sock_queue_err_skb(sk, skb)) + kfree_skb(skb); +} + +void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info) +{ + struct sock_exterr_skb *serr; + struct ipv6hdr *iph; + struct sk_buff *skb; + + if (!sk->net_pinfo.af_inet6.recverr) + return; + + skb = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC); + if (!skb) + return; + + iph = (struct ipv6hdr*)skb_put(skb, sizeof(struct ipv6hdr)); + skb->nh.ipv6h = iph; + memcpy(&iph->daddr, fl->fl6_dst, 16); + + serr = SKB_EXT_ERR(skb); + serr->ee.ee_errno = err; + serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL; + serr->ee.ee_type = 0; + serr->ee.ee_code = 0; + serr->ee.ee_pad = 0; + serr->ee.ee_info = info; + serr->ee.ee_data = 0; + serr->addr_offset = (u8*)&iph->daddr - skb->nh.raw; + serr->port = fl->uli_u.ports.dport; + + skb->h.raw = skb->tail; + skb_pull(skb, skb->tail - skb->data); + + if (sock_queue_err_skb(sk, skb)) + kfree_skb(skb); +} + +/* + * Handle MSG_ERRQUEUE + */ +int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) +{ + struct sock_exterr_skb *serr; + struct sk_buff *skb, *skb2; + struct sockaddr_in6 *sin; + struct { + struct sock_extended_err ee; + struct sockaddr_in6 offender; + } errhdr; + int err; + int copied; + + err = -EAGAIN; + skb = skb_dequeue(&sk->error_queue); + if (skb == NULL) + goto out; + + copied = skb->len; + if (copied > len) { + msg->msg_flags |= MSG_TRUNC; + copied = len; + } + err = memcpy_toiovec(msg->msg_iov, skb->data, copied); + if (err) + goto out_free_skb; + + serr = SKB_EXT_ERR(skb); + + sin = (struct sockaddr_in6 *)msg->msg_name; + if (sin) { + sin->sin6_family = AF_INET6; + sin->sin6_flowinfo = 0; + sin->sin6_port = serr->port; + if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6) { + memcpy(&sin->sin6_addr, skb->nh.raw + serr->addr_offset, 16); + if (sk->net_pinfo.af_inet6.sndflow) + sin->sin6_flowinfo = *(u32*)(skb->nh.raw + serr->addr_offset - 24) & IPV6_FLOWINFO_MASK; + } else + ipv6_addr_set(&sin->sin6_addr, 0, 0, + __constant_htonl(0xffff), + *(u32*)(skb->nh.raw + serr->addr_offset)); + } + + memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); + sin = &errhdr.offender; + sin->sin6_family = AF_UNSPEC; + if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL) { + sin->sin6_family = AF_INET6; + sin->sin6_flowinfo = 0; + if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6) { + memcpy(&sin->sin6_addr, &skb->nh.ipv6h->saddr, 16); + if (sk->net_pinfo.af_inet6.rxopt.all) + datagram_recv_ctl(sk, msg, skb); + } else { + ipv6_addr_set(&sin->sin6_addr, 0, 0, + __constant_htonl(0xffff), + skb->nh.iph->saddr); + if (sk->ip_cmsg_flags) + ip_cmsg_recv(msg, skb); + } + } + + put_cmsg(msg, SOL_IPV6, IPV6_RECVERR, sizeof(errhdr), &errhdr); + + /* Now we could try to dump offended packet options */ + + msg->msg_flags |= MSG_ERRQUEUE; + err = copied; + + /* Reset and regenerate socket error */ + sk->err = 0; + if ((skb2 = skb_peek(&sk->error_queue)) != NULL) { + sk->err = SKB_EXT_ERR(skb2)->ee.ee_errno; + sk->error_report(sk); + } + +out_free_skb: + kfree_skb(skb); +out: + return err; +} + + + +int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) +{ + struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6; + struct inet6_skb_parm *opt = (struct inet6_skb_parm *) skb->cb; + + if (np->rxopt.bits.rxinfo) { + struct in6_pktinfo src_info; + + src_info.ipi6_ifindex = opt->iif; + ipv6_addr_copy(&src_info.ipi6_addr, &skb->nh.ipv6h->daddr); + put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info); + } + + if (np->rxopt.bits.rxhlim) { + int hlim = skb->nh.ipv6h->hop_limit; + put_cmsg(msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim); + } + + if (np->rxopt.bits.rxflow && (*(u32*)skb->nh.raw & IPV6_FLOWINFO_MASK)) { + u32 flowinfo = *(u32*)skb->nh.raw & IPV6_FLOWINFO_MASK; + put_cmsg(msg, SOL_IPV6, IPV6_FLOWINFO, sizeof(flowinfo), &flowinfo); + } + if (np->rxopt.bits.hopopts && opt->hop) { + u8 *ptr = skb->nh.raw + opt->hop; + put_cmsg(msg, SOL_IPV6, IPV6_HOPOPTS, (ptr[1]+1)<<3, ptr); + } + if (np->rxopt.bits.dstopts && opt->dst0) { + u8 *ptr = skb->nh.raw + opt->dst0; + put_cmsg(msg, SOL_IPV6, IPV6_DSTOPTS, (ptr[1]+1)<<3, ptr); + } + if (np->rxopt.bits.srcrt && opt->srcrt) { + struct ipv6_rt_hdr *rthdr = (struct ipv6_rt_hdr *)(skb->nh.raw + opt->srcrt); + put_cmsg(msg, SOL_IPV6, IPV6_RTHDR, (rthdr->hdrlen+1) << 3, rthdr); + } + if (np->rxopt.bits.authhdr && opt->auth) { + u8 *ptr = skb->nh.raw + opt->auth; + put_cmsg(msg, SOL_IPV6, IPV6_AUTHHDR, (ptr[1]+1)<<2, ptr); + } + if (np->rxopt.bits.dstopts && opt->dst1) { + u8 *ptr = skb->nh.raw + opt->dst1; + put_cmsg(msg, SOL_IPV6, IPV6_DSTOPTS, (ptr[1]+1)<<3, ptr); + } + return 0; +} + +int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, + struct ipv6_txoptions *opt, + int *hlimit) +{ + struct in6_pktinfo *src_info; + struct cmsghdr *cmsg; + struct ipv6_rt_hdr *rthdr; + struct ipv6_opt_hdr *hdr; + int len; + int err = 0; + + for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) { + + if (cmsg->cmsg_len < sizeof(struct cmsghdr) || + (unsigned long)(((char*)cmsg - (char*)msg->msg_control) + + cmsg->cmsg_len) > msg->msg_controllen) { + err = -EINVAL; + goto exit_f; + } + + if (cmsg->cmsg_level != SOL_IPV6) + continue; + + switch (cmsg->cmsg_type) { + case IPV6_PKTINFO: + if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct in6_pktinfo))) { + err = -EINVAL; + goto exit_f; + } + + src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg); + + if (src_info->ipi6_ifindex) { + if (fl->oif && src_info->ipi6_ifindex != fl->oif) + return -EINVAL; + fl->oif = src_info->ipi6_ifindex; + } + + if (!ipv6_addr_any(&src_info->ipi6_addr)) { + struct inet6_ifaddr *ifp; + + ifp = ipv6_chk_addr(&src_info->ipi6_addr, NULL, 0); + + if (ifp == NULL) { + err = -EINVAL; + goto exit_f; + } + + fl->fl6_src = &src_info->ipi6_addr; + } + + break; + + case IPV6_FLOWINFO: + if (cmsg->cmsg_len < CMSG_LEN(4)) { + err = -EINVAL; + goto exit_f; + } + + if (fl->fl6_flowlabel&IPV6_FLOWINFO_MASK) { + if ((fl->fl6_flowlabel^*(u32 *)CMSG_DATA(cmsg))&~IPV6_FLOWINFO_MASK) { + err = -EINVAL; + goto exit_f; + } + } + fl->fl6_flowlabel = IPV6_FLOWINFO_MASK & *(u32 *)CMSG_DATA(cmsg); + break; + + case IPV6_HOPOPTS: + if (opt->hopopt || cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_opt_hdr))) { + err = -EINVAL; + goto exit_f; + } + + hdr = (struct ipv6_opt_hdr *)CMSG_DATA(cmsg); + len = ((hdr->hdrlen + 1) << 3); + if (cmsg->cmsg_len < CMSG_LEN(len)) { + err = -EINVAL; + goto exit_f; + } + if (!capable(CAP_NET_RAW)) { + err = -EPERM; + goto exit_f; + } + opt->opt_nflen += len; + opt->hopopt = hdr; + break; + + case IPV6_DSTOPTS: + if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_opt_hdr))) { + err = -EINVAL; + goto exit_f; + } + + hdr = (struct ipv6_opt_hdr *)CMSG_DATA(cmsg); + len = ((hdr->hdrlen + 1) << 3); + if (cmsg->cmsg_len < CMSG_LEN(len)) { + err = -EINVAL; + goto exit_f; + } + if (!capable(CAP_NET_RAW)) { + err = -EPERM; + goto exit_f; + } + if (opt->dst1opt) { + err = -EINVAL; + goto exit_f; + } + opt->opt_flen += len; + opt->dst1opt = hdr; + break; + + case IPV6_AUTHHDR: + if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_opt_hdr))) { + err = -EINVAL; + goto exit_f; + } + + hdr = (struct ipv6_opt_hdr *)CMSG_DATA(cmsg); + len = ((hdr->hdrlen + 2) << 2); + if (cmsg->cmsg_len < CMSG_LEN(len)) { + err = -EINVAL; + goto exit_f; + } + if (len & ~7) { + err = -EINVAL; + goto exit_f; + } + opt->opt_flen += len; + opt->auth = hdr; + break; + + case IPV6_RTHDR: + if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_rt_hdr))) { + err = -EINVAL; + goto exit_f; + } + + rthdr = (struct ipv6_rt_hdr *)CMSG_DATA(cmsg); + + /* + * TYPE 0 + */ + if (rthdr->type) { + err = -EINVAL; + goto exit_f; + } + + len = ((rthdr->hdrlen + 1) << 3); + + if (cmsg->cmsg_len < CMSG_LEN(len)) { + err = -EINVAL; + goto exit_f; + } + + /* segments left must also match */ + if ((rthdr->hdrlen >> 1) != rthdr->segments_left) { + err = -EINVAL; + goto exit_f; + } + + opt->opt_nflen += len; + opt->srcrt = rthdr; + + if (opt->dst1opt) { + int dsthdrlen = ((opt->dst1opt->hdrlen+1)<<3); + + opt->opt_nflen += dsthdrlen; + opt->dst0opt = opt->dst1opt; + opt->dst1opt = NULL; + opt->opt_flen -= dsthdrlen; + } + + break; + + case IPV6_HOPLIMIT: + if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) { + err = -EINVAL; + goto exit_f; + } + + *hlimit = *(int *)CMSG_DATA(cmsg); + break; + + default: + printk(KERN_DEBUG "invalid cmsg type: %d\n", cmsg->cmsg_type); + err = -EINVAL; + break; + }; + } + +exit_f: + return err; +} diff --git a/pfinet/linux-src/net/ipv6/exthdrs.c b/pfinet/linux-src/net/ipv6/exthdrs.c new file mode 100644 index 00000000..a3d3dfe7 --- /dev/null +++ b/pfinet/linux-src/net/ipv6/exthdrs.c @@ -0,0 +1,771 @@ +/* + * Extension Header handling for IPv6 + * Linux INET6 implementation + * + * Authors: + * Pedro Roque <roque@di.fc.ul.pt> + * Andi Kleen <ak@muc.de> + * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> + * + * $Id: exthdrs.c,v 1.1 2007/10/08 21:12:30 stesie Exp $ + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/sockios.h> +#include <linux/sched.h> +#include <linux/net.h> +#include <linux/netdevice.h> +#include <linux/in6.h> +#include <linux/icmpv6.h> + +#include <net/sock.h> +#include <net/snmp.h> + +#include <net/ipv6.h> +#include <net/protocol.h> +#include <net/transp_v6.h> +#include <net/rawv6.h> +#include <net/ndisc.h> +#include <net/ip6_route.h> +#include <net/addrconf.h> + +#include <asm/uaccess.h> + +/* + * Parsing inbound headers. + * + * Parsing function "func" returns pointer to the place, + * where next nexthdr value is stored or NULL, if parsing + * failed. It should also update skb->h. + */ + +struct hdrtype_proc +{ + int type; + u8* (*func) (struct sk_buff **, u8 *ptr); +}; + +/* + * Parsing tlv encoded headers. + * + * Parsing function "func" returns 1, if parsing succeed + * and 0, if it failed. + * It MUST NOT touch skb->h. + */ + +struct tlvtype_proc +{ + int type; + int (*func) (struct sk_buff *, __u8 *ptr); +}; + +/********************* + Generic functions + *********************/ + +/* An unknown option is detected, decide what to do */ + +int ip6_tlvopt_unknown(struct sk_buff *skb, u8 *opt) +{ + switch ((opt[0] & 0xC0) >> 6) { + case 0: /* ignore */ + return 1; + + case 1: /* drop packet */ + break; + + case 3: /* Send ICMP if not a multicast address and drop packet */ + /* Actually, it is redundant check. icmp_send + will recheck in any case. + */ + if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr)) + break; + case 2: /* send ICMP PARM PROB regardless and drop packet */ + icmpv6_param_prob(skb, ICMPV6_UNK_OPTION, opt); + return 0; + }; + + kfree_skb(skb); + return 0; +} + +/* Parse tlv encoded option header (hop-by-hop or destination) */ + +static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb, + __u8 *nhptr) +{ + struct tlvtype_proc *curr; + u8 *ptr = skb->h.raw; + int len = ((ptr[1]+1)<<3) - 2; + + ptr += 2; + + if (skb->tail - (ptr + len) < 0) { + kfree_skb(skb); + return 0; + } + + while (len > 0) { + int optlen = ptr[1]+2; + + switch (ptr[0]) { + case IPV6_TLV_PAD0: + optlen = 1; + break; + + case IPV6_TLV_PADN: + break; + + default: /* Other TLV code so scan list */ + for (curr=procs; curr->type >= 0; curr++) { + if (curr->type == ptr[0]) { + if (curr->func(skb, ptr) == 0) + return 0; + break; + } + } + if (curr->type < 0) { + if (ip6_tlvopt_unknown(skb, ptr) == 0) + return 0; + } + break; + } + ptr += optlen; + len -= optlen; + } + if (len == 0) + return 1; + kfree_skb(skb); + return 0; +} + +/***************************** + Destination options header. + *****************************/ + +struct tlvtype_proc tlvprocdestopt_lst[] = { + /* No destination options are defined now */ + {-1, NULL} +}; + +static u8 *ipv6_dest_opt(struct sk_buff **skb_ptr, u8 *nhptr) +{ + struct sk_buff *skb=*skb_ptr; + struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb; + struct ipv6_destopt_hdr *hdr = (struct ipv6_destopt_hdr *) skb->h.raw; + + opt->dst1 = (u8*)hdr - skb->nh.raw; + + if (ip6_parse_tlv(tlvprocdestopt_lst, skb, nhptr)) { + skb->h.raw += ((hdr->hdrlen+1)<<3); + return &hdr->nexthdr; + } + + return NULL; +} + +/******************************** + NONE header. No data in packet. + ********************************/ + +static u8 *ipv6_nodata(struct sk_buff **skb_ptr, u8 *nhptr) +{ + kfree_skb(*skb_ptr); + return NULL; +} + +/******************************** + Routing header. + ********************************/ + +static u8* ipv6_routing_header(struct sk_buff **skb_ptr, u8 *nhptr) +{ + struct sk_buff *skb = *skb_ptr; + struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb; + struct in6_addr *addr; + struct in6_addr daddr; + int addr_type; + int n, i; + + struct ipv6_rt_hdr *hdr = (struct ipv6_rt_hdr *) skb->h.raw; + struct rt0_hdr *rthdr; + + if (((hdr->hdrlen+1)<<3) > skb->tail - skb->h.raw) { + ipv6_statistics.Ip6InHdrErrors++; + kfree_skb(skb); + return NULL; + } + +looped_back: + if (hdr->segments_left == 0) { + opt->srcrt = (u8*)hdr - skb->nh.raw; + skb->h.raw += (hdr->hdrlen + 1) << 3; + opt->dst0 = opt->dst1; + opt->dst1 = 0; + return &hdr->nexthdr; + } + + if (hdr->type != IPV6_SRCRT_TYPE_0 || hdr->hdrlen & 0x01) { + u8 *pos = (u8*) hdr; + + if (hdr->type != IPV6_SRCRT_TYPE_0) + pos += 2; + else + pos += 1; + + icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, pos); + return NULL; + } + + /* + * This is the routing header forwarding algorithm from + * RFC 1883, page 17. + */ + + n = hdr->hdrlen >> 1; + + if (hdr->segments_left > n) { + icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, &hdr->segments_left); + return NULL; + } + + /* We are about to mangle packet header. Be careful! + Do not damage packets queued somewhere. + */ + if (skb_cloned(skb)) { + struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC); + kfree_skb(skb); + if (skb2 == NULL) + return NULL; + *skb_ptr = skb = skb2; + opt = (struct inet6_skb_parm *)skb2->cb; + hdr = (struct ipv6_rt_hdr *) skb2->h.raw; + } + + i = n - --hdr->segments_left; + + rthdr = (struct rt0_hdr *) hdr; + addr = rthdr->addr; + addr += i - 1; + + addr_type = ipv6_addr_type(addr); + + if (addr_type == IPV6_ADDR_MULTICAST) { + kfree_skb(skb); + return NULL; + } + + ipv6_addr_copy(&daddr, addr); + ipv6_addr_copy(addr, &skb->nh.ipv6h->daddr); + ipv6_addr_copy(&skb->nh.ipv6h->daddr, &daddr); + + dst_release(xchg(&skb->dst, NULL)); + ip6_route_input(skb); + if (skb->dst->error) { + skb->dst->input(skb); + return NULL; + } + if (skb->dst->dev->flags&IFF_LOOPBACK) { + if (skb->nh.ipv6h->hop_limit <= 1) { + icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, + 0, skb->dev); + kfree_skb(skb); + return NULL; + } + skb->nh.ipv6h->hop_limit--; + goto looped_back; + } + + skb->dst->input(skb); + return NULL; +} + +/* + This function inverts received rthdr. + NOTE: specs allow to make it automatically only if + packet authenticated. + + I will not discuss it here (though, I am really pissed off at + this stupid requirement making rthdr idea useless) + + Actually, it creates severe problems for us. + Embrionic requests has no associated sockets, + so that user have no control over it and + cannot not only to set reply options, but + even to know, that someone wants to connect + without success. :-( + + For now we need to test the engine, so that I created + temporary (or permanent) backdoor. + If listening socket set IPV6_RTHDR to 2, then we invert header. + --ANK (980729) + */ + +struct ipv6_txoptions * +ipv6_invert_rthdr(struct sock *sk, struct ipv6_rt_hdr *hdr) +{ + /* Received rthdr: + + [ H1 -> H2 -> ... H_prev ] daddr=ME + + Inverted result: + [ H_prev -> ... -> H1 ] daddr =sender + + Note, that IP output engine will rewrire this rthdr + by rotating it left by one addr. + */ + + int n, i; + struct rt0_hdr *rthdr = (struct rt0_hdr*)hdr; + struct rt0_hdr *irthdr; + struct ipv6_txoptions *opt; + int hdrlen = ipv6_optlen(hdr); + + if (hdr->segments_left || + hdr->type != IPV6_SRCRT_TYPE_0 || + hdr->hdrlen & 0x01) + return NULL; + + n = hdr->hdrlen >> 1; + opt = sock_kmalloc(sk, sizeof(*opt) + hdrlen, GFP_ATOMIC); + if (opt == NULL) + return NULL; + memset(opt, 0, sizeof(*opt)); + opt->tot_len = sizeof(*opt) + hdrlen; + opt->srcrt = (void*)(opt+1); + opt->opt_nflen = hdrlen; + + memcpy(opt->srcrt, hdr, sizeof(*hdr)); + irthdr = (struct rt0_hdr*)opt->srcrt; + /* Obsolete field, MBZ, when originated by us */ + irthdr->bitmap = 0; + opt->srcrt->segments_left = n; + for (i=0; i<n; i++) + memcpy(irthdr->addr+i, rthdr->addr+(n-1-i), 16); + return opt; +} + +/******************************** + AUTH header. + ********************************/ + +/* + rfc1826 said, that if a host does not implement AUTH header + it MAY ignore it. We use this hole 8) + + Actually, now we can implement OSPFv6 without kernel IPsec. + Authentication for poors may be done in user space with the same success. + + Yes, it means, that we allow application to send/receive + raw authentication header. Apparently, we suppose, that it knows + what it does and calculates authentication data correctly. + Certainly, it is possible only for udp and raw sockets, but not for tcp. + + AUTH header has 4byte granular length, which kills all the idea + behind AUTOMATIC 64bit alignment of IPv6. Now we will loose + cpu ticks, checking that sender did not something stupid + and opt->hdrlen is even. Shit! --ANK (980730) + */ + +static u8 *ipv6_auth_hdr(struct sk_buff **skb_ptr, u8 *nhptr) +{ + struct sk_buff *skb=*skb_ptr; + struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb; + struct ipv6_opt_hdr *hdr = (struct ipv6_opt_hdr *)skb->h.raw; + int len = (hdr->hdrlen+2)<<2; + + if (len&7) + return NULL; + opt->auth = (u8*)hdr - skb->nh.raw; + if (skb->h.raw + len > skb->tail) + return NULL; + skb->h.raw += len; + return &hdr->nexthdr; +} + +/* This list MUST NOT contain entry for NEXTHDR_HOP. + It is parsed immediately after packet received + and if it occurs somewhere in another place we must + generate error. + */ + +struct hdrtype_proc hdrproc_lst[] = { + {NEXTHDR_FRAGMENT, ipv6_reassembly}, + {NEXTHDR_ROUTING, ipv6_routing_header}, + {NEXTHDR_DEST, ipv6_dest_opt}, + {NEXTHDR_NONE, ipv6_nodata}, + {NEXTHDR_AUTH, ipv6_auth_hdr}, + /* + {NEXTHDR_ESP, ipv6_esp_hdr}, + */ + {-1, NULL} +}; + +u8 *ipv6_parse_exthdrs(struct sk_buff **skb_in, u8 *nhptr) +{ + struct hdrtype_proc *hdrt; + u8 nexthdr = *nhptr; + +restart: + for (hdrt=hdrproc_lst; hdrt->type >= 0; hdrt++) { + if (hdrt->type == nexthdr) { + if ((nhptr = hdrt->func(skb_in, nhptr)) != NULL) { + nexthdr = *nhptr; + goto restart; + } + return NULL; + } + } + return nhptr; +} + + +/********************************** + Hop-by-hop options. + **********************************/ + +/* Router Alert as of draft-ietf-ipngwg-ipv6router-alert-04 */ + +static int ipv6_hop_ra(struct sk_buff *skb, u8 *ptr) +{ + if (ptr[1] == 2) { + ((struct inet6_skb_parm*)skb->cb)->ra = ptr - skb->nh.raw; + return 1; + } + if (net_ratelimit()) + printk(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n", ptr[1]); + kfree_skb(skb); + return 0; +} + +/* Jumbo payload */ + +static int ipv6_hop_jumbo(struct sk_buff *skb, u8 *ptr) +{ + u32 pkt_len; + + if (ptr[1] != 4 || ((ptr-skb->nh.raw)&3) != 2) { + if (net_ratelimit()) + printk(KERN_DEBUG "ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n", ptr[1]); + goto drop; + } + + pkt_len = ntohl(*(u32*)(ptr+2)); + if (pkt_len < 0x10000) { + icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, ptr+2); + return 0; + } + if (skb->nh.ipv6h->payload_len) { + icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, ptr); + return 0; + } + + if (pkt_len > skb->len - sizeof(struct ipv6hdr)) { + ipv6_statistics.Ip6InTruncatedPkts++; + goto drop; + } + skb_trim(skb, pkt_len + sizeof(struct ipv6hdr)); + return 1; + +drop: + kfree_skb(skb); + return 0; +} + +struct tlvtype_proc tlvprochopopt_lst[] = { + {IPV6_TLV_ROUTERALERT, ipv6_hop_ra}, + {IPV6_TLV_JUMBO, ipv6_hop_jumbo}, + {-1, NULL} +}; + +u8 * ipv6_parse_hopopts(struct sk_buff *skb, u8 *nhptr) +{ + ((struct inet6_skb_parm*)skb->cb)->hop = sizeof(struct ipv6hdr); + if (ip6_parse_tlv(tlvprochopopt_lst, skb, nhptr)) + return nhptr+((nhptr[1]+1)<<3); + return NULL; +} + +/* + * Creating outbound headers. + * + * "build" functions work when skb is filled from head to tail (datagram) + * "push" functions work when headers are added from tail to head (tcp) + * + * In both cases we assume, that caller reserved enough room + * for headers. + */ + +u8 *ipv6_build_rthdr(struct sk_buff *skb, u8 *prev_hdr, + struct ipv6_rt_hdr *opt, struct in6_addr *addr) +{ + struct rt0_hdr *phdr, *ihdr; + int hops; + + ihdr = (struct rt0_hdr *) opt; + + phdr = (struct rt0_hdr *) skb_put(skb, (ihdr->rt_hdr.hdrlen + 1) << 3); + memcpy(phdr, ihdr, sizeof(struct rt0_hdr)); + + hops = ihdr->rt_hdr.hdrlen >> 1; + + if (hops > 1) + memcpy(phdr->addr, ihdr->addr + 1, + (hops - 1) * sizeof(struct in6_addr)); + + ipv6_addr_copy(phdr->addr + (hops - 1), addr); + + phdr->rt_hdr.nexthdr = *prev_hdr; + *prev_hdr = NEXTHDR_ROUTING; + return &phdr->rt_hdr.nexthdr; +} + +static u8 *ipv6_build_exthdr(struct sk_buff *skb, u8 *prev_hdr, u8 type, struct ipv6_opt_hdr *opt) +{ + struct ipv6_opt_hdr *h = (struct ipv6_opt_hdr *)skb_put(skb, ipv6_optlen(opt)); + + memcpy(h, opt, ipv6_optlen(opt)); + h->nexthdr = *prev_hdr; + *prev_hdr = type; + return &h->nexthdr; +} + +static u8 *ipv6_build_authhdr(struct sk_buff *skb, u8 *prev_hdr, struct ipv6_opt_hdr *opt) +{ + struct ipv6_opt_hdr *h = (struct ipv6_opt_hdr *)skb_put(skb, (opt->hdrlen+2)<<2); + + memcpy(h, opt, (opt->hdrlen+2)<<2); + h->nexthdr = *prev_hdr; + *prev_hdr = NEXTHDR_AUTH; + return &h->nexthdr; +} + + +u8 *ipv6_build_nfrag_opts(struct sk_buff *skb, u8 *prev_hdr, struct ipv6_txoptions *opt, + struct in6_addr *daddr, u32 jumbolen) +{ + struct ipv6_opt_hdr *h = (struct ipv6_opt_hdr *)skb->data; + + if (opt && opt->hopopt) + prev_hdr = ipv6_build_exthdr(skb, prev_hdr, NEXTHDR_HOP, opt->hopopt); + + if (jumbolen) { + u8 *jumboopt = (u8 *)skb_put(skb, 8); + + if (opt && opt->hopopt) { + *jumboopt++ = IPV6_TLV_PADN; + *jumboopt++ = 0; + h->hdrlen++; + } else { + h = (struct ipv6_opt_hdr *)jumboopt; + h->nexthdr = *prev_hdr; + h->hdrlen = 0; + jumboopt += 2; + *prev_hdr = NEXTHDR_HOP; + prev_hdr = &h->nexthdr; + } + jumboopt[0] = IPV6_TLV_JUMBO; + jumboopt[1] = 4; + *(u32*)(jumboopt+2) = htonl(jumbolen); + } + if (opt) { + if (opt->dst0opt) + prev_hdr = ipv6_build_exthdr(skb, prev_hdr, NEXTHDR_DEST, opt->dst0opt); + if (opt->srcrt) + prev_hdr = ipv6_build_rthdr(skb, prev_hdr, opt->srcrt, daddr); + } + return prev_hdr; +} + +u8 *ipv6_build_frag_opts(struct sk_buff *skb, u8 *prev_hdr, struct ipv6_txoptions *opt) +{ + if (opt->auth) + prev_hdr = ipv6_build_authhdr(skb, prev_hdr, opt->auth); + if (opt->dst1opt) + prev_hdr = ipv6_build_exthdr(skb, prev_hdr, NEXTHDR_DEST, opt->dst1opt); + return prev_hdr; +} + +static void ipv6_push_rthdr(struct sk_buff *skb, u8 *proto, + struct ipv6_rt_hdr *opt, + struct in6_addr **addr_p) +{ + struct rt0_hdr *phdr, *ihdr; + int hops; + + ihdr = (struct rt0_hdr *) opt; + + phdr = (struct rt0_hdr *) skb_push(skb, (ihdr->rt_hdr.hdrlen + 1) << 3); + memcpy(phdr, ihdr, sizeof(struct rt0_hdr)); + + hops = ihdr->rt_hdr.hdrlen >> 1; + + if (hops > 1) + memcpy(phdr->addr, ihdr->addr + 1, + (hops - 1) * sizeof(struct in6_addr)); + + ipv6_addr_copy(phdr->addr + (hops - 1), *addr_p); + *addr_p = ihdr->addr; + + phdr->rt_hdr.nexthdr = *proto; + *proto = NEXTHDR_ROUTING; +} + +static void ipv6_push_exthdr(struct sk_buff *skb, u8 *proto, u8 type, struct ipv6_opt_hdr *opt) +{ + struct ipv6_opt_hdr *h = (struct ipv6_opt_hdr *)skb_push(skb, ipv6_optlen(opt)); + + memcpy(h, opt, ipv6_optlen(opt)); + h->nexthdr = *proto; + *proto = type; +} + +static void ipv6_push_authhdr(struct sk_buff *skb, u8 *proto, struct ipv6_opt_hdr *opt) +{ + struct ipv6_opt_hdr *h = (struct ipv6_opt_hdr *)skb_push(skb, (opt->hdrlen+2)<<2); + + memcpy(h, opt, (opt->hdrlen+2)<<2); + h->nexthdr = *proto; + *proto = NEXTHDR_AUTH; +} + +void ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, + u8 *proto, + struct in6_addr **daddr) +{ + if (opt->srcrt) + ipv6_push_rthdr(skb, proto, opt->srcrt, daddr); + if (opt->dst0opt) + ipv6_push_exthdr(skb, proto, NEXTHDR_DEST, opt->dst0opt); + if (opt->hopopt) + ipv6_push_exthdr(skb, proto, NEXTHDR_HOP, opt->hopopt); +} + +void ipv6_push_frag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, u8 *proto) +{ + if (opt->dst1opt) + ipv6_push_exthdr(skb, proto, NEXTHDR_DEST, opt->dst1opt); + if (opt->auth) + ipv6_push_authhdr(skb, proto, opt->auth); +} + +struct ipv6_txoptions * +ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt) +{ + struct ipv6_txoptions *opt2; + + opt2 = sock_kmalloc(sk, opt->tot_len, GFP_ATOMIC); + if (opt2) { + long dif = (char*)opt2 - (char*)opt; + memcpy(opt2, opt, opt->tot_len); + if (opt2->hopopt) + *((char**)&opt2->hopopt) += dif; + if (opt2->dst0opt) + *((char**)&opt2->dst0opt) += dif; + if (opt2->dst1opt) + *((char**)&opt2->dst1opt) += dif; + if (opt2->auth) + *((char**)&opt2->auth) += dif; + if (opt2->srcrt) + *((char**)&opt2->srcrt) += dif; + } + return opt2; +} + + +/* + * find out if nexthdr is a well-known extension header or a protocol + */ + +static __inline__ int ipv6_ext_hdr(u8 nexthdr) +{ + /* + * find out if nexthdr is an extension header or a protocol + */ + return ( (nexthdr == NEXTHDR_HOP) || + (nexthdr == NEXTHDR_ROUTING) || + (nexthdr == NEXTHDR_FRAGMENT) || + (nexthdr == NEXTHDR_AUTH) || + (nexthdr == NEXTHDR_NONE) || + (nexthdr == NEXTHDR_DEST) ); +} + +/* + * Skip any extension headers. This is used by the ICMP module. + * + * Note that strictly speaking this conflicts with RFC1883 4.0: + * ...The contents and semantics of each extension header determine whether + * or not to proceed to the next header. Therefore, extension headers must + * be processed strictly in the order they appear in the packet; a + * receiver must not, for example, scan through a packet looking for a + * particular kind of extension header and process that header prior to + * processing all preceding ones. + * + * We do exactly this. This is a protocol bug. We can't decide after a + * seeing an unknown discard-with-error flavour TLV option if it's a + * ICMP error message or not (errors should never be send in reply to + * ICMP error messages). + * + * But I see no other way to do this. This might need to be reexamined + * when Linux implements ESP (and maybe AUTH) headers. + * --AK + * + * This function parses (probably truncated) exthdr set "hdr" + * of length "len". "nexthdrp" initially points to some place, + * where type of the first header can be found. + * + * It skips all well-known exthdrs, and returns pointer to the start + * of unparsable area i.e. the first header with unknown type. + * If it is not NULL *nexthdr is updated by type/protocol of this header. + * + * NOTES: - if packet terminated with NEXTHDR_NONE it returns NULL. + * - it may return pointer pointing beyond end of packet, + * if the last recognized header is truncated in the middle. + * - if packet is truncated, so that all parsed headers are skipped, + * it returns NULL. + * - First fragment header is skipped, not-first ones + * are considered as unparsable. + * - ESP is unparsable for now and considered like + * normal payload protocol. + * - Note also special handling of AUTH header. Thanks to IPsec wizards. + * + * --ANK (980726) + */ + +u8 *ipv6_skip_exthdr(struct ipv6_opt_hdr *hdr, u8 *nexthdrp, int len) +{ + u8 nexthdr = *nexthdrp; + + while (ipv6_ext_hdr(nexthdr)) { + int hdrlen; + + if (len < sizeof(struct ipv6_opt_hdr)) + return NULL; + if (nexthdr == NEXTHDR_NONE) + return NULL; + if (nexthdr == NEXTHDR_FRAGMENT) { + struct frag_hdr *fhdr = (struct frag_hdr *) hdr; + if (ntohs(fhdr->frag_off) & ~0x7) + break; + hdrlen = 8; + } else if (nexthdr == NEXTHDR_AUTH) + hdrlen = (hdr->hdrlen+2)<<2; + else + hdrlen = ipv6_optlen(hdr); + + nexthdr = hdr->nexthdr; + hdr = (struct ipv6_opt_hdr *) ((u8*)hdr + hdrlen); + len -= hdrlen; + } + + *nexthdrp = nexthdr; + return (u8*)hdr; +} + diff --git a/pfinet/linux-src/net/ipv6/icmpv6.c b/pfinet/linux-src/net/ipv6/icmpv6.c new file mode 100644 index 00000000..de5e7780 --- /dev/null +++ b/pfinet/linux-src/net/ipv6/icmpv6.c @@ -0,0 +1,673 @@ +/* + * Internet Control Message Protocol (ICMPv6) + * Linux INET6 implementation + * + * Authors: + * Pedro Roque <roque@di.fc.ul.pt> + * + * $Id: icmpv6.c,v 1.1 2007/10/08 21:12:30 stesie Exp $ + * + * Based on net/ipv4/icmp.c + * + * RFC 1885 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +/* + * Changes: + * + * Andi Kleen : exception handling + * Andi Kleen add rate limits. never reply to a icmp. + * add more length checks and other fixes. + */ + +#define __NO_VERSION__ +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/in.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/sockios.h> +#include <linux/net.h> +#include <linux/skbuff.h> +#include <linux/init.h> + +#include <linux/inet.h> +#include <linux/netdevice.h> +#include <linux/icmpv6.h> + +#include <net/ip.h> +#include <net/sock.h> + +#include <net/ipv6.h> +#include <net/checksum.h> +#include <net/protocol.h> +#include <net/raw.h> +#include <net/rawv6.h> +#include <net/transp_v6.h> +#include <net/ip6_route.h> +#include <net/addrconf.h> +#include <net/icmp.h> + +#include <asm/uaccess.h> +#include <asm/system.h> + +struct icmpv6_mib icmpv6_statistics; + +/* + * ICMP socket for flow control. + */ + +struct socket *icmpv6_socket; + +int icmpv6_rcv(struct sk_buff *skb, unsigned long len); + +static struct inet6_protocol icmpv6_protocol = +{ + icmpv6_rcv, /* handler */ + NULL, /* error control */ + NULL, /* next */ + IPPROTO_ICMPV6, /* protocol ID */ + 0, /* copy */ + NULL, /* data */ + "ICMPv6" /* name */ +}; + +struct icmpv6_msg { + struct icmp6hdr icmph; + __u8 *data; + struct in6_addr *daddr; + int len; + __u32 csum; +}; + + + +/* + * getfrag callback + */ + +static int icmpv6_getfrag(const void *data, struct in6_addr *saddr, + char *buff, unsigned int offset, unsigned int len) +{ + struct icmpv6_msg *msg = (struct icmpv6_msg *) data; + struct icmp6hdr *icmph; + __u32 csum; + + /* + * in theory offset must be 0 since we never send more + * than IPV6_MIN_MTU bytes on an error or more than the path mtu + * on an echo reply. (those are the rules on RFC 1883) + * + * Luckily, this statement is obsolete after + * draft-ietf-ipngwg-icmp-v2-00 --ANK (980730) + */ + + if (offset) { + csum = csum_partial_copy((void *) msg->data + + offset - sizeof(struct icmp6hdr), + buff, len, msg->csum); + msg->csum = csum; + return 0; + } + + csum = csum_partial_copy((void *) &msg->icmph, buff, + sizeof(struct icmp6hdr), msg->csum); + + csum = csum_partial_copy((void *) msg->data, + buff + sizeof(struct icmp6hdr), + len - sizeof(struct icmp6hdr), csum); + + icmph = (struct icmp6hdr *) buff; + + icmph->icmp6_cksum = csum_ipv6_magic(saddr, msg->daddr, msg->len, + IPPROTO_ICMPV6, csum); + return 0; +} + + +/* + * Slightly more convenient version of icmpv6_send. + */ +void icmpv6_param_prob(struct sk_buff *skb, int code, void *pos) +{ + int offset = (u8*)pos - (u8*)skb->nh.ipv6h; + + icmpv6_send(skb, ICMPV6_PARAMPROB, code, offset, skb->dev); + kfree_skb(skb); +} + +/* + * Figure out, may we reply to this packet with icmp error. + * + * We do not reply, if: + * - it was icmp error message. + * - it is truncated, so that it is known, that protocol is ICMPV6 + * (i.e. in the middle of some exthdr) + * - it is not the first fragment. BTW IPv6 specs say nothing about + * this case, but it is clear, that our reply would be useless + * for sender. + * + * --ANK (980726) + */ + +static int is_ineligible(struct ipv6hdr *hdr, int len) +{ + u8 *ptr; + __u8 nexthdr = hdr->nexthdr; + + if (len < (int)sizeof(*hdr)) + return 1; + + ptr = ipv6_skip_exthdr((struct ipv6_opt_hdr *)(hdr+1), &nexthdr, len - sizeof(*hdr)); + if (!ptr) + return 0; + if (nexthdr == IPPROTO_ICMPV6) { + struct icmp6hdr *ihdr = (struct icmp6hdr *)ptr; + return (ptr - (u8*)hdr) > len || !(ihdr->icmp6_type & 0x80); + } + return nexthdr == NEXTHDR_FRAGMENT; +} + +int sysctl_icmpv6_time = 1*HZ; + +/* + * Check the ICMP output rate limit + */ +static inline int icmpv6_xrlim_allow(struct sock *sk, int type, + struct flowi *fl) +{ + struct dst_entry *dst; + int res = 0; + + /* Informational messages are not limited. */ + if (type & 0x80) + return 1; + + /* Do not limit pmtu discovery, it would break it. */ + if (type == ICMPV6_PKT_TOOBIG) + return 1; + + /* + * Look up the output route. + * XXX: perhaps the expire for routing entries cloned by + * this lookup should be more aggressive (not longer than timeout). + */ + dst = ip6_route_output(sk, fl); + if (dst->error) { + ipv6_statistics.Ip6OutNoRoutes++; + } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) { + res = 1; + } else { + struct rt6_info *rt = (struct rt6_info *)dst; + int tmo = sysctl_icmpv6_time; + + /* Give more bandwidth to wider prefixes. */ + if (rt->rt6i_dst.plen < 128) + tmo >>= ((128 - rt->rt6i_dst.plen)>>5); + + res = xrlim_allow(dst, tmo); + } + dst_release(dst); + return res; +} + +/* + * an inline helper for the "simple" if statement below + * checks if parameter problem report is caused by an + * unrecognized IPv6 option that has the Option Type + * highest-order two bits set to 10 + */ + +static __inline__ int opt_unrec(struct sk_buff *skb, __u32 offset) +{ + u8 *buff = skb->nh.raw; + + return ( ( *(buff + offset) & 0xC0 ) == 0x80 ); +} + +/* + * Send an ICMP message in response to a packet in error + */ + +void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, + struct device *dev) +{ + struct ipv6hdr *hdr = skb->nh.ipv6h; + struct sock *sk = icmpv6_socket->sk; + struct in6_addr *saddr = NULL; + int iif = 0; + struct icmpv6_msg msg; + struct flowi fl; + int addr_type = 0; + int len; + + /* + * sanity check pointer in case of parameter problem + */ + + if (type == ICMPV6_PARAMPROB && + (info > (skb->tail - ((unsigned char *) hdr)))) { + printk(KERN_DEBUG "icmpv6_send: bug! pointer > skb\n"); + return; + } + + /* + * Make sure we respect the rules + * i.e. RFC 1885 2.4(e) + * Rule (e.1) is enforced by not using icmpv6_send + * in any code that processes icmp errors. + */ + + addr_type = ipv6_addr_type(&hdr->daddr); + + if (ipv6_chk_addr(&hdr->daddr, skb->dev, 0)) + saddr = &hdr->daddr; + + /* + * Dest addr check + */ + + if ((addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST)) { + if (type != ICMPV6_PKT_TOOBIG && + !(type == ICMPV6_PARAMPROB && + code == ICMPV6_UNK_OPTION && + (opt_unrec(skb, info)))) + return; + + saddr = NULL; + } + + addr_type = ipv6_addr_type(&hdr->saddr); + + /* + * Source addr check + */ + + if (addr_type & IPV6_ADDR_LINKLOCAL) + iif = skb->dev->ifindex; + + /* + * Must not send if we know that source is Anycast also. + * for now we don't know that. + */ + if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) { + printk(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n"); + return; + } + + /* + * Never answer to a ICMP packet. + */ + if (is_ineligible(hdr, (u8*)skb->tail - (u8*)hdr)) { + if (net_ratelimit()) + printk(KERN_DEBUG "icmpv6_send: no reply to icmp error/fragment\n"); + return; + } + + fl.proto = IPPROTO_ICMPV6; + fl.nl_u.ip6_u.daddr = &hdr->saddr; + fl.nl_u.ip6_u.saddr = saddr; + fl.oif = iif; + fl.fl6_flowlabel = 0; + fl.uli_u.icmpt.type = type; + fl.uli_u.icmpt.code = code; + + if (!icmpv6_xrlim_allow(sk, type, &fl)) + return; + + /* + * ok. kick it. checksum will be provided by the + * getfrag_t callback. + */ + + msg.icmph.icmp6_type = type; + msg.icmph.icmp6_code = code; + msg.icmph.icmp6_cksum = 0; + msg.icmph.icmp6_pointer = htonl(info); + + msg.data = skb->nh.raw; + msg.csum = 0; + msg.daddr = &hdr->saddr; + + len = min((skb->tail - ((unsigned char *) hdr)) + sizeof(struct icmp6hdr), + IPV6_MIN_MTU - sizeof(struct ipv6hdr)); + + if (len < 0) { + printk(KERN_DEBUG "icmp: len problem\n"); + return; + } + + msg.len = len; + + ip6_build_xmit(sk, icmpv6_getfrag, &msg, &fl, len, NULL, -1, + MSG_DONTWAIT); + if (type >= ICMPV6_DEST_UNREACH && type <= ICMPV6_PARAMPROB) + (&icmpv6_statistics.Icmp6OutDestUnreachs)[type-1]++; + icmpv6_statistics.Icmp6OutMsgs++; +} + +static void icmpv6_echo_reply(struct sk_buff *skb) +{ + struct sock *sk = icmpv6_socket->sk; + struct ipv6hdr *hdr = skb->nh.ipv6h; + struct icmp6hdr *icmph = (struct icmp6hdr *) skb->h.raw; + struct in6_addr *saddr; + struct icmpv6_msg msg; + struct flowi fl; + unsigned char *data; + int len; + + data = (char *) (icmph + 1); + + saddr = &hdr->daddr; + + if (ipv6_addr_type(saddr) & IPV6_ADDR_MULTICAST) + saddr = NULL; + + len = skb->tail - data; + len += sizeof(struct icmp6hdr); + + msg.icmph.icmp6_type = ICMPV6_ECHO_REPLY; + msg.icmph.icmp6_code = 0; + msg.icmph.icmp6_cksum = 0; + msg.icmph.icmp6_identifier = icmph->icmp6_identifier; + msg.icmph.icmp6_sequence = icmph->icmp6_sequence; + + msg.data = data; + msg.csum = 0; + msg.len = len; + msg.daddr = &hdr->saddr; + + fl.proto = IPPROTO_ICMPV6; + fl.nl_u.ip6_u.daddr = &hdr->saddr; + fl.nl_u.ip6_u.saddr = saddr; + fl.oif = skb->dev->ifindex; + fl.fl6_flowlabel = 0; + fl.uli_u.icmpt.type = ICMPV6_ECHO_REPLY; + fl.uli_u.icmpt.code = 0; + + ip6_build_xmit(sk, icmpv6_getfrag, &msg, &fl, len, NULL, -1, + MSG_DONTWAIT); + icmpv6_statistics.Icmp6OutEchoReplies++; + icmpv6_statistics.Icmp6OutMsgs++; +} + +static void icmpv6_notify(struct sk_buff *skb, + int type, int code, u32 info, unsigned char *buff, int len) +{ + struct in6_addr *saddr = &skb->nh.ipv6h->saddr; + struct in6_addr *daddr = &skb->nh.ipv6h->daddr; + struct ipv6hdr *hdr = (struct ipv6hdr *) buff; + struct inet6_protocol *ipprot; + struct sock *sk; + u8 *pb; + int hash; + u8 nexthdr; + + nexthdr = hdr->nexthdr; + + len -= sizeof(struct ipv6hdr); + if (len < 0) + return; + + /* now skip over extension headers */ + pb = ipv6_skip_exthdr((struct ipv6_opt_hdr *) (hdr + 1), &nexthdr, len); + if (!pb) + return; + + /* BUGGG_FUTURE: we should try to parse exthdrs in this packet. + Without this we will not able f.e. to make source routed + pmtu discovery. + Corresponding argument (opt) to notifiers is already added. + --ANK (980726) + */ + + hash = nexthdr & (MAX_INET_PROTOS - 1); + + for (ipprot = (struct inet6_protocol *) inet6_protos[hash]; + ipprot != NULL; + ipprot=(struct inet6_protocol *)ipprot->next) { + if (ipprot->protocol != nexthdr) + continue; + + if (ipprot->err_handler) + ipprot->err_handler(skb, hdr, NULL, type, code, pb, info); + } + + sk = raw_v6_htable[hash]; + + if (sk == NULL) + return; + + while((sk = raw_v6_lookup(sk, nexthdr, daddr, saddr))) { + rawv6_err(sk, skb, hdr, NULL, type, code, pb, info); + sk = sk->next; + } +} + +/* + * Handle icmp messages + */ + +int icmpv6_rcv(struct sk_buff *skb, unsigned long len) +{ + struct device *dev = skb->dev; + struct in6_addr *saddr = &skb->nh.ipv6h->saddr; + struct in6_addr *daddr = &skb->nh.ipv6h->daddr; + struct ipv6hdr *orig_hdr; + struct icmp6hdr *hdr = (struct icmp6hdr *) skb->h.raw; + int ulen; + int type; + + icmpv6_statistics.Icmp6InMsgs++; + + if (len < sizeof(struct icmp6hdr)) + goto discard_it; + + /* Perform checksum. */ + switch (skb->ip_summed) { + case CHECKSUM_NONE: + skb->csum = csum_partial((char *)hdr, len, 0); + case CHECKSUM_HW: + if (csum_ipv6_magic(saddr, daddr, len, IPPROTO_ICMPV6, + skb->csum)) { + printk(KERN_DEBUG "ICMPv6 checksum failed [%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x > %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x]\n", + ntohs(saddr->in6_u.u6_addr16[0]), + ntohs(saddr->in6_u.u6_addr16[1]), + ntohs(saddr->in6_u.u6_addr16[2]), + ntohs(saddr->in6_u.u6_addr16[3]), + ntohs(saddr->in6_u.u6_addr16[4]), + ntohs(saddr->in6_u.u6_addr16[5]), + ntohs(saddr->in6_u.u6_addr16[6]), + ntohs(saddr->in6_u.u6_addr16[7]), + ntohs(daddr->in6_u.u6_addr16[0]), + ntohs(daddr->in6_u.u6_addr16[1]), + ntohs(daddr->in6_u.u6_addr16[2]), + ntohs(daddr->in6_u.u6_addr16[3]), + ntohs(daddr->in6_u.u6_addr16[4]), + ntohs(daddr->in6_u.u6_addr16[5]), + ntohs(daddr->in6_u.u6_addr16[6]), + ntohs(daddr->in6_u.u6_addr16[7])); + goto discard_it; + } + default: + /* CHECKSUM_UNNECESSARY */ + }; + + /* + * length of original packet carried in skb + */ + ulen = skb->tail - (unsigned char *) (hdr + 1); + + type = hdr->icmp6_type; + + if (type >= ICMPV6_DEST_UNREACH && type <= ICMPV6_PARAMPROB) + (&icmpv6_statistics.Icmp6InDestUnreachs)[type-ICMPV6_DEST_UNREACH]++; + else if (type >= ICMPV6_ECHO_REQUEST && type <= NDISC_REDIRECT) + (&icmpv6_statistics.Icmp6InEchos)[type-ICMPV6_ECHO_REQUEST]++; + + switch (type) { + + case ICMPV6_ECHO_REQUEST: + icmpv6_echo_reply(skb); + break; + + case ICMPV6_ECHO_REPLY: + /* we coulnd't care less */ + break; + + case ICMPV6_PKT_TOOBIG: + /* BUGGG_FUTURE: if packet contains rthdr, we cannot update + standard destination cache. Seems, only "advanced" + destination cache will allow to solve this problem + --ANK (980726) + */ + orig_hdr = (struct ipv6hdr *) (hdr + 1); + if (ulen >= sizeof(struct ipv6hdr)) + rt6_pmtu_discovery(&orig_hdr->daddr, &orig_hdr->saddr, dev, + ntohl(hdr->icmp6_mtu)); + + /* + * Drop through to notify + */ + + case ICMPV6_DEST_UNREACH: + case ICMPV6_TIME_EXCEED: + case ICMPV6_PARAMPROB: + icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu, + (char *) (hdr + 1), ulen); + break; + + case NDISC_ROUTER_SOLICITATION: + case NDISC_ROUTER_ADVERTISEMENT: + case NDISC_NEIGHBOUR_SOLICITATION: + case NDISC_NEIGHBOUR_ADVERTISEMENT: + case NDISC_REDIRECT: + ndisc_rcv(skb, len); + break; + + case ICMPV6_MGM_QUERY: + igmp6_event_query(skb, hdr, len); + break; + + case ICMPV6_MGM_REPORT: + igmp6_event_report(skb, hdr, len); + break; + + case ICMPV6_MGM_REDUCTION: + break; + + default: + if (net_ratelimit()) + printk(KERN_DEBUG "icmpv6: msg of unkown type\n"); + + /* informational */ + if (type & 0x80) + break; + + /* + * error of unkown type. + * must pass to upper level + */ + + icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu, + (char *) (hdr + 1), ulen); + }; + kfree_skb(skb); + return 0; + +discard_it: + icmpv6_statistics.Icmp6InErrors++; + kfree_skb(skb); + return 0; +} + +int __init icmpv6_init(struct net_proto_family *ops) +{ + struct sock *sk; + int err; + + icmpv6_socket = sock_alloc(); + if (icmpv6_socket == NULL) { + printk(KERN_ERR + "Failed to create the ICMP6 control socket.\n"); + return -1; + } + icmpv6_socket->inode->i_uid = 0; + icmpv6_socket->inode->i_gid = 0; + icmpv6_socket->type = SOCK_RAW; + + if ((err = ops->create(icmpv6_socket, IPPROTO_ICMPV6)) < 0) { + printk(KERN_ERR + "Failed to initialize the ICMP6 control socket (err %d).\n", + err); + sock_release(icmpv6_socket); + icmpv6_socket = NULL; /* for safety */ + return err; + } + + sk = icmpv6_socket->sk; + sk->allocation = GFP_ATOMIC; + sk->num = 256; /* Don't receive any data */ + + inet6_add_protocol(&icmpv6_protocol); + + return 0; +} + +void icmpv6_cleanup(void) +{ + sock_release(icmpv6_socket); + icmpv6_socket = NULL; /* For safety. */ + inet6_del_protocol(&icmpv6_protocol); +} + +static struct icmp6_err { + int err; + int fatal; +} tab_unreach[] = { + { ENETUNREACH, 0}, /* NOROUTE */ + { EACCES, 1}, /* ADM_PROHIBITED */ + { EHOSTUNREACH, 0}, /* Was NOT_NEIGHBOUR, now reserved */ + { EHOSTUNREACH, 0}, /* ADDR_UNREACH */ + { ECONNREFUSED, 1}, /* PORT_UNREACH */ +}; + +int icmpv6_err_convert(int type, int code, int *err) +{ + int fatal = 0; + + *err = EPROTO; + + switch (type) { + case ICMPV6_DEST_UNREACH: + fatal = 1; + if (code <= ICMPV6_PORT_UNREACH) { + *err = tab_unreach[code].err; + fatal = tab_unreach[code].fatal; + } + break; + + case ICMPV6_PKT_TOOBIG: + *err = EMSGSIZE; + break; + + case ICMPV6_PARAMPROB: + *err = EPROTO; + fatal = 1; + break; + + case ICMPV6_TIME_EXCEED: + *err = EHOSTUNREACH; + break; + }; + + return fatal; +} diff --git a/pfinet/linux-src/net/ipv6/ip6_fib.c b/pfinet/linux-src/net/ipv6/ip6_fib.c new file mode 100644 index 00000000..69735248 --- /dev/null +++ b/pfinet/linux-src/net/ipv6/ip6_fib.c @@ -0,0 +1,1205 @@ +/* + * Linux INET6 implementation + * Forwarding Information Database + * + * Authors: + * Pedro Roque <roque@di.fc.ul.pt> + * + * $Id: ip6_fib.c,v 1.1 2007/10/08 21:12:30 stesie Exp $ + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/config.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/net.h> +#include <linux/route.h> +#include <linux/netdevice.h> +#include <linux/in6.h> + +#ifdef CONFIG_PROC_FS +#include <linux/proc_fs.h> +#endif + +#include <net/ipv6.h> +#include <net/ndisc.h> +#include <net/addrconf.h> + +#include <net/ip6_fib.h> +#include <net/ip6_route.h> + +#define RT6_DEBUG 2 +#undef CONFIG_IPV6_SUBTREES + +#if RT6_DEBUG >= 1 +#define BUG_TRAP(x) ({ if (!(x)) { printk("Assertion (" #x ") failed at " __FILE__ "(%d):" __FUNCTION__ "\n", __LINE__); } }) +#else +#define BUG_TRAP(x) do { ; } while (0) +#endif + +#if RT6_DEBUG >= 3 +#define RT6_TRACE(x...) printk(KERN_DEBUG x) +#else +#define RT6_TRACE(x...) do { ; } while (0) +#endif + +struct rt6_statistics rt6_stats; + +enum fib_walk_state_t +{ +#ifdef CONFIG_IPV6_SUBTREES + FWS_S, +#endif + FWS_L, + FWS_R, + FWS_C, + FWS_U +}; + +struct fib6_cleaner_t +{ + struct fib6_walker_t w; + int (*func)(struct rt6_info *, void *arg); + void *arg; +}; + +#ifdef CONFIG_IPV6_SUBTREES +#define FWS_INIT FWS_S +#define SUBTREE(fn) ((fn)->subtree) +#else +#define FWS_INIT FWS_L +#define SUBTREE(fn) NULL +#endif + +static void fib6_prune_clones(struct fib6_node *fn, struct rt6_info *rt); +static void fib6_repair_tree(struct fib6_node *fn); + +/* + * A routing update causes an increase of the serial number on the + * afected subtree. This allows for cached routes to be asynchronously + * tested when modifications are made to the destination cache as a + * result of redirects, path MTU changes, etc. + */ + +static __u32 rt_sernum = 0; + +static struct timer_list ip6_fib_timer = { + NULL, NULL, + 0, + ~0UL, + fib6_run_gc +}; + +static struct fib6_walker_t fib6_walker_list = { + &fib6_walker_list, &fib6_walker_list, +}; + +#define FOR_WALKERS(w) for ((w)=fib6_walker_list.next; (w) != &fib6_walker_list; (w)=(w)->next) + +static __inline__ u32 fib6_new_sernum(void) +{ + u32 n = ++rt_sernum; + if ((__s32)n <= 0) + rt_sernum = n = 1; + return n; +} + +/* + * Auxiliary address test functions for the radix tree. + * + * These assume a 32bit processor (although it will work on + * 64bit processors) + */ + +/* + * compare "prefix length" bits of an address + */ + +static __inline__ int addr_match(void *token1, void *token2, int prefixlen) +{ + __u32 *a1 = token1; + __u32 *a2 = token2; + int pdw; + int pbi; + + pdw = prefixlen >> 5; /* num of whole __u32 in prefix */ + pbi = prefixlen & 0x1f; /* num of bits in incomplete u32 in prefix */ + + if (pdw) + if (memcmp(a1, a2, pdw << 2)) + return 0; + + if (pbi) { + __u32 mask; + + mask = htonl((0xffffffff) << (32 - pbi)); + + if ((a1[pdw] ^ a2[pdw]) & mask) + return 0; + } + + return 1; +} + +/* + * test bit + */ + +static __inline__ int addr_bit_set(void *token, int fn_bit) +{ + __u32 *addr = token; + + return htonl(1 << ((~fn_bit)&0x1F)) & addr[fn_bit>>5]; +} + +/* + * find the first different bit between two addresses + * length of address must be a multiple of 32bits + */ + +static __inline__ int addr_diff(void *token1, void *token2, int addrlen) +{ + __u32 *a1 = token1; + __u32 *a2 = token2; + int i; + + addrlen >>= 2; + + for (i = 0; i < addrlen; i++) { + __u32 xb; + + xb = a1[i] ^ a2[i]; + + if (xb) { + int j = 31; + + xb = ntohl(xb); + + while (test_bit(j, &xb) == 0) + j--; + + return (i * 32 + 31 - j); + } + } + + /* + * we should *never* get to this point since that + * would mean the addrs are equal + * + * However, we do get to it 8) And exacly, when + * addresses are equal 8) + * + * ip route add 1111::/128 via ... + * ip route add 1111::/64 via ... + * and we are here. + * + * Ideally, this function should stop comparison + * at prefix length. It does not, but it is still OK, + * if returned value is greater than prefix length. + * --ANK (980803) + */ + + return addrlen<<5; +} + +static __inline__ struct fib6_node * node_alloc(void) +{ + struct fib6_node *fn; + + if ((fn = kmalloc(sizeof(struct fib6_node), GFP_ATOMIC)) != NULL) { + memset(fn, 0, sizeof(struct fib6_node)); + rt6_stats.fib_nodes++; + } + + return fn; +} + +static __inline__ void node_free(struct fib6_node * fn) +{ + rt6_stats.fib_nodes--; + kfree(fn); +} + +static __inline__ void rt6_release(struct rt6_info *rt) +{ + if (atomic_dec_and_test(&rt->rt6i_ref)) + dst_free(&rt->u.dst); +} + + +/* + * Routing Table + * + * return the apropriate node for a routing tree "add" operation + * by either creating and inserting or by returning an existing + * node. + */ + +static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr, + int addrlen, int plen, + int offset) +{ + struct fib6_node *fn, *in, *ln; + struct fib6_node *pn = NULL; + struct rt6key *key; + int bit; + int dir = 0; + __u32 sernum = fib6_new_sernum(); + + RT6_TRACE("fib6_add_1\n"); + + /* insert node in tree */ + + fn = root; + + if (plen == 0) + return fn; + + do { + key = (struct rt6key *)((u8 *)fn->leaf + offset); + + /* + * Prefix match + */ + if (plen < fn->fn_bit || + !addr_match(&key->addr, addr, fn->fn_bit)) + goto insert_above; + + /* + * Exact match ? + */ + + if (plen == fn->fn_bit) { + /* clean up an intermediate node */ + if ((fn->fn_flags & RTN_RTINFO) == 0) { + rt6_release(fn->leaf); + fn->leaf = NULL; + } + + fn->fn_sernum = sernum; + + return fn; + } + + /* + * We have more bits to go + */ + + /* Try to walk down on tree. */ + fn->fn_sernum = sernum; + dir = addr_bit_set(addr, fn->fn_bit); + pn = fn; + fn = dir ? fn->right: fn->left; + } while (fn); + + /* + * We wlaked to the bottom of tree. + * Create new leaf node without children. + */ + + ln = node_alloc(); + + if (ln == NULL) + return NULL; + ln->fn_bit = plen; + + ln->parent = pn; + ln->fn_sernum = sernum; + + if (dir) + pn->right = ln; + else + pn->left = ln; + + return ln; + + +insert_above: + /* + * split since we don't have a common prefix anymore or + * we have a less significant route. + * we've to insert an intermediate node on the list + * this new node will point to the one we need to create + * and the current + */ + + pn = fn->parent; + + /* find 1st bit in difference between the 2 addrs. + + See comment in addr_diff: bit may be an invalid value, + but if it is >= plen, the value is ignored in any case. + */ + + bit = addr_diff(addr, &key->addr, addrlen); + + /* + * (intermediate)[in] + * / \ + * (new leaf node)[ln] (old node)[fn] + */ + if (plen > bit) { + in = node_alloc(); + ln = node_alloc(); + + if (in == NULL || ln == NULL) { + if (in) + node_free(in); + if (ln) + node_free(ln); + return NULL; + } + + /* + * new intermediate node. + * RTN_RTINFO will + * be off since that an address that chooses one of + * the branches would not match less specific routes + * in the other branch + */ + + in->fn_bit = bit; + + in->parent = pn; + in->leaf = fn->leaf; + atomic_inc(&in->leaf->rt6i_ref); + + in->fn_sernum = sernum; + + /* update parent pointer */ + if (dir) + pn->right = in; + else + pn->left = in; + + ln->fn_bit = plen; + + ln->parent = in; + fn->parent = in; + + ln->fn_sernum = sernum; + + if (addr_bit_set(addr, bit)) { + in->right = ln; + in->left = fn; + } else { + in->left = ln; + in->right = fn; + } + } else { /* plen <= bit */ + + /* + * (new leaf node)[ln] + * / \ + * (old node)[fn] NULL + */ + + ln = node_alloc(); + + if (ln == NULL) + return NULL; + + ln->fn_bit = plen; + + ln->parent = pn; + + ln->fn_sernum = sernum; + + if (dir) + pn->right = ln; + else + pn->left = ln; + + if (addr_bit_set(&key->addr, plen)) + ln->right = fn; + else + ln->left = fn; + + fn->parent = ln; + } + return ln; +} + +/* + * Insert routing information in a node. + */ + +static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt) +{ + struct rt6_info *iter = NULL; + struct rt6_info **ins; + + ins = &fn->leaf; + + for (iter = fn->leaf; iter; iter=iter->u.next) { + /* + * Search for duplicates + */ + + if (iter->rt6i_metric == rt->rt6i_metric) { + /* + * Same priority level + */ + + if ((iter->rt6i_dev == rt->rt6i_dev) && + (iter->rt6i_flowr == rt->rt6i_flowr) && + (ipv6_addr_cmp(&iter->rt6i_gateway, + &rt->rt6i_gateway) == 0)) { + if (!(iter->rt6i_flags&RTF_EXPIRES)) + return -EEXIST; + iter->rt6i_expires = rt->rt6i_expires; + if (!(rt->rt6i_flags&RTF_EXPIRES)) { + iter->rt6i_flags &= ~RTF_EXPIRES; + iter->rt6i_expires = 0; + } + return -EEXIST; + } + } + + if (iter->rt6i_metric > rt->rt6i_metric) + break; + + ins = &iter->u.next; + } + + /* + * insert node + */ + + rt->u.next = iter; + *ins = rt; + rt->rt6i_node = fn; + atomic_inc(&rt->rt6i_ref); +#ifdef CONFIG_RTNETLINK + inet6_rt_notify(RTM_NEWROUTE, rt); +#endif + rt6_stats.fib_rt_entries++; + + if ((fn->fn_flags & RTN_RTINFO) == 0) { + rt6_stats.fib_route_nodes++; + fn->fn_flags |= RTN_RTINFO; + } + + return 0; +} + +static __inline__ void fib6_start_gc(struct rt6_info *rt) +{ + if (ip6_fib_timer.expires == 0 && + (rt->rt6i_flags & (RTF_EXPIRES|RTF_CACHE))) { + del_timer(&ip6_fib_timer); + ip6_fib_timer.expires = jiffies + ip6_rt_gc_interval; + add_timer(&ip6_fib_timer); + } +} + +/* + * Add routing information to the routing tree. + * <destination addr>/<source addr> + * with source addr info in sub-trees + */ + +int fib6_add(struct fib6_node *root, struct rt6_info *rt) +{ + struct fib6_node *fn; + int err = -ENOMEM; + + fn = fib6_add_1(root, &rt->rt6i_dst.addr, sizeof(struct in6_addr), + rt->rt6i_dst.plen, (u8*) &rt->rt6i_dst - (u8*) rt); + + if (fn == NULL) + return -ENOMEM; + +#ifdef CONFIG_IPV6_SUBTREES + if (rt->rt6i_src.plen) { + struct fib6_node *sn; + + if (fn->subtree == NULL) { + struct fib6_node *sfn; + + /* + * Create subtree. + * + * fn[main tree] + * | + * sfn[subtree root] + * \ + * sn[new leaf node] + */ + + /* Create subtree root node */ + sfn = node_alloc(); + if (sfn == NULL) + goto st_failure; + + sfn->leaf = &ip6_null_entry; + atomic_inc(&ip6_null_entry.rt6i_ref); + sfn->fn_flags = RTN_ROOT; + sfn->fn_sernum = fib6_new_sernum(); + + /* Now add the first leaf node to new subtree */ + + sn = fib6_add_1(sfn, &rt->rt6i_src.addr, + sizeof(struct in6_addr), rt->rt6i_src.plen, + (u8*) &rt->rt6i_src - (u8*) rt); + + if (sn == NULL) { + /* If it is failed, discard just allocated + root, and then (in st_failure) stale node + in main tree. + */ + node_free(sfn); + goto st_failure; + } + + /* Now link new subtree to main tree */ + sfn->parent = fn; + fn->subtree = sfn; + if (fn->leaf == NULL) { + fn->leaf = rt; + atomic_inc(&rt->rt6i_ref); + } + } else { + sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr, + sizeof(struct in6_addr), rt->rt6i_src.plen, + (u8*) &rt->rt6i_src - (u8*) rt); + + if (sn == NULL) + goto st_failure; + } + + fn = sn; + } +#endif + + err = fib6_add_rt2node(fn, rt); + + if (err == 0) { + fib6_start_gc(rt); + if (!(rt->rt6i_flags&RTF_CACHE)) + fib6_prune_clones(fn, rt); + } + + if (err) + dst_free(&rt->u.dst); + return err; + +#ifdef CONFIG_IPV6_SUBTREES + /* Subtree creation failed, probably main tree node + is orphan. If it is, shot it. + */ +st_failure: + if (fn && !(fn->fn_flags&RTN_RTINFO|RTN_ROOT)) + fib_repair_tree(fn); + dst_free(&rt->u.dst); + return err; +#endif +} + +/* + * Routing tree lookup + * + */ + +struct lookup_args { + int offset; /* key offset on rt6_info */ + struct in6_addr *addr; /* search key */ +}; + +static struct fib6_node * fib6_lookup_1(struct fib6_node *root, + struct lookup_args *args) +{ + struct fib6_node *fn; + int dir; + + /* + * Descend on a tree + */ + + fn = root; + + for (;;) { + struct fib6_node *next; + + dir = addr_bit_set(args->addr, fn->fn_bit); + + next = dir ? fn->right : fn->left; + + if (next) { + fn = next; + continue; + } + + break; + } + + while ((fn->fn_flags & RTN_ROOT) == 0) { +#ifdef CONFIG_IPV6_SUBTREES + if (fn->subtree) { + struct fib6_node *st; + struct lookup_args *narg; + + narg = args + 1; + + if (narg->addr) { + st = fib6_lookup_1(fn->subtree, narg); + + if (!(st->fn_flags & RTN_ROOT)) + { + return st; + } + } + } +#endif + + if (fn->fn_flags & RTN_RTINFO) { + struct rt6key *key; + + key = (struct rt6key *) ((u8 *) fn->leaf + + args->offset); + + if (addr_match(&key->addr, args->addr, key->plen)) + return fn; + } + + fn = fn->parent; + } + + return NULL; +} + +struct fib6_node * fib6_lookup(struct fib6_node *root, struct in6_addr *daddr, + struct in6_addr *saddr) +{ + struct lookup_args args[2]; + struct rt6_info *rt = NULL; + struct fib6_node *fn; + + args[0].offset = (u8*) &rt->rt6i_dst - (u8*) rt; + args[0].addr = daddr; + +#ifdef CONFIG_IPV6_SUBTREES + args[1].offset = (u8*) &rt->rt6i_src - (u8*) rt; + args[1].addr = saddr; +#endif + + fn = fib6_lookup_1(root, args); + + if (fn == NULL) + fn = root; + + return fn; +} + +/* + * Get node with sepciafied destination prefix (and source prefix, + * if subtrees are used) + */ + + +static struct fib6_node * fib6_locate_1(struct fib6_node *root, + struct in6_addr *addr, + int plen, int offset) +{ + struct fib6_node *fn; + + for (fn = root; fn ; ) { + struct rt6key *key = (struct rt6key *)((u8 *)fn->leaf + offset); + + /* + * Prefix match + */ + if (plen < fn->fn_bit || + !addr_match(&key->addr, addr, fn->fn_bit)) + return NULL; + + if (plen == fn->fn_bit) + return fn; + + /* + * We have more bits to go + */ + if (addr_bit_set(addr, fn->fn_bit)) + fn = fn->right; + else + fn = fn->left; + } + return NULL; +} + +struct fib6_node * fib6_locate(struct fib6_node *root, + struct in6_addr *daddr, int dst_len, + struct in6_addr *saddr, int src_len) +{ + struct rt6_info *rt = NULL; + struct fib6_node *fn; + + fn = fib6_locate_1(root, daddr, dst_len, + (u8*) &rt->rt6i_dst - (u8*) rt); + +#ifdef CONFIG_IPV6_SUBTREES + if (src_len) { + BUG_TRAP(saddr!=NULL); + if (fn == NULL) + fn = fn->subtree; + if (fn) + fn = fib6_locate_1(fn, saddr, src_len, + (u8*) &rt->rt6i_src - (u8*) rt); + } +#endif + + if (fn && fn->fn_flags&RTN_RTINFO) + return fn; + + return NULL; +} + + +/* + * Deletion + * + */ + +static struct rt6_info * fib6_find_prefix(struct fib6_node *fn) +{ + if (fn->fn_flags&RTN_ROOT) + return &ip6_null_entry; + + while(fn) { + if(fn->left) + return fn->left->leaf; + + if(fn->right) + return fn->right->leaf; + + fn = SUBTREE(fn); + } + return NULL; +} + +/* + * Called to trim the tree of intermediate nodes when possible. "fn" + * is the node we want to try and remove. + */ + +static void fib6_repair_tree(struct fib6_node *fn) +{ + int children; + int nstate; + struct fib6_node *child, *pn; + struct fib6_walker_t *w; + int iter = 0; + + for (;;) { + RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter); + iter++; + + BUG_TRAP(!(fn->fn_flags&RTN_RTINFO)); + BUG_TRAP(!(fn->fn_flags&RTN_TL_ROOT)); + BUG_TRAP(fn->leaf==NULL); + + children = 0; + child = NULL; + if (fn->right) child = fn->right, children |= 1; + if (fn->left) child = fn->left, children |= 2; + + if (children == 3 || SUBTREE(fn) +#ifdef CONFIG_IPV6_SUBTREES + /* Subtree root (i.e. fn) may have one child */ + || (children && fn->fn_flags&RTN_ROOT) +#endif + ) { + fn->leaf = fib6_find_prefix(fn); +#if RT6_DEBUG >= 2 + if (fn->leaf==NULL) { + BUG_TRAP(fn->leaf); + fn->leaf = &ip6_null_entry; + } +#endif + atomic_inc(&fn->leaf->rt6i_ref); + return; + } + + pn = fn->parent; +#ifdef CONFIG_IPV6_SUBTREES + if (SUBTREE(pn) == fn) { + BUG_TRAP(fn->fn_flags&RTN_ROOT); + SUBTREE(pn) = NULL; + nstate = FWS_L; + } else { + BUG_TRAP(!(fn->fn_flags&RTN_ROOT)); +#endif + if (pn->right == fn) pn->right = child; + else if (pn->left == fn) pn->left = child; +#if RT6_DEBUG >= 2 + else BUG_TRAP(0); +#endif + if (child) + child->parent = pn; + nstate = FWS_R; +#ifdef CONFIG_IPV6_SUBTREES + } +#endif + + FOR_WALKERS(w) { + if (child == NULL) { + if (w->root == fn) { + w->root = w->node = NULL; + RT6_TRACE("W %p adjusted by delroot 1\n", w); + } else if (w->node == fn) { + RT6_TRACE("W %p adjusted by delnode 1, s=%d/%d\n", w, w->state, nstate); + w->node = pn; + w->state = nstate; + } + } else { + if (w->root == fn) { + w->root = child; + RT6_TRACE("W %p adjusted by delroot 2\n", w); + } + if (w->node == fn) { + w->node = child; + if (children&2) { + RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state); + w->state = w->state>=FWS_R ? FWS_U : FWS_INIT; + } else { + RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state); + w->state = w->state>=FWS_C ? FWS_U : FWS_INIT; + } + } + } + } + + node_free(fn); + if (pn->fn_flags&RTN_RTINFO || SUBTREE(pn)) + return; + + rt6_release(pn->leaf); + pn->leaf = NULL; + fn = pn; + } +} + +static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp) +{ + struct fib6_walker_t *w; + struct rt6_info *rt = *rtp; + + RT6_TRACE("fib6_del_route\n"); + + /* Unlink it */ + *rtp = rt->u.next; + rt->rt6i_node = NULL; + rt6_stats.fib_rt_entries--; + + /* Adjust walkers */ + FOR_WALKERS(w) { + if (w->state == FWS_C && w->leaf == rt) { + RT6_TRACE("walker %p adjusted by delroute\n", w); + w->leaf = rt->u.next; + if (w->leaf == NULL) + w->state = FWS_U; + } + } + + rt->u.next = NULL; + + /* If it was last route, expunge its radix tree node */ + if (fn->leaf == NULL) { + fn->fn_flags &= ~RTN_RTINFO; + rt6_stats.fib_route_nodes--; + fib6_repair_tree(fn); + } + +#ifdef CONFIG_RTNETLINK + inet6_rt_notify(RTM_DELROUTE, rt); +#endif + rt6_release(rt); +} + +int fib6_del(struct rt6_info *rt) +{ + struct fib6_node *fn = rt->rt6i_node; + struct rt6_info **rtp; + +#if RT6_DEBUG >= 2 + if (rt->u.dst.obsolete>0) { + BUG_TRAP(rt->u.dst.obsolete>0); + return -EFAULT; + } +#endif + if (fn == NULL || rt == &ip6_null_entry) + return -ENOENT; + + BUG_TRAP(fn->fn_flags&RTN_RTINFO); + + if (!(rt->rt6i_flags&RTF_CACHE)) + fib6_prune_clones(fn, rt); + + /* + * Walk the leaf entries looking for ourself + */ + + for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->u.next) { + if (*rtp == rt) { + fib6_del_route(fn, rtp); + return 0; + } + } + return -ENOENT; +} + +/* + * Tree transversal function. + * + * Certainly, it is not interrupt safe. + * However, it is internally reenterable wrt itself and fib6_add/fib6_del. + * It means, that we can modify tree during walking + * and use this function for garbage collection, clone pruning, + * cleaning tree when a device goes down etc. etc. + * + * It guarantees that every node will be traversed, + * and that it will be traversed only once. + * + * Callback function w->func may return: + * 0 -> continue walking. + * positive value -> walking is suspended (used by tree dumps, + * and probably by gc, if it will be split to several slices) + * negative value -> terminate walking. + * + * The function itself returns: + * 0 -> walk is complete. + * >0 -> walk is incomplete (i.e. suspended) + * <0 -> walk is terminated by an error. + */ + +int fib6_walk_continue(struct fib6_walker_t *w) +{ + struct fib6_node *fn, *pn; + + for (;;) { + fn = w->node; + if (fn == NULL) + return 0; + + if (w->prune && fn != w->root && + fn->fn_flags&RTN_RTINFO && w->state < FWS_C) { + w->state = FWS_C; + w->leaf = fn->leaf; + } + switch (w->state) { +#ifdef CONFIG_IPV6_SUBTREES + case FWS_S: + if (SUBTREE(fn)) { + w->node = SUBTREE(fn); + continue; + } + w->state = FWS_L; +#endif + case FWS_L: + if (fn->left) { + w->node = fn->left; + w->state = FWS_INIT; + continue; + } + w->state = FWS_R; + case FWS_R: + if (fn->right) { + w->node = fn->right; + w->state = FWS_INIT; + continue; + } + w->state = FWS_C; + w->leaf = fn->leaf; + case FWS_C: + if (w->leaf && fn->fn_flags&RTN_RTINFO) { + int err = w->func(w); + if (err) + return err; + continue; + } + w->state = FWS_U; + case FWS_U: + if (fn == w->root) + return 0; + pn = fn->parent; + w->node = pn; +#ifdef CONFIG_IPV6_SUBTREES + if (SUBTREE(pn) == fn) { + BUG_TRAP(fn->fn_flags&RTN_ROOT); + w->state = FWS_L; + continue; + } +#endif + if (pn->left == fn) { + w->state = FWS_R; + continue; + } + if (pn->right == fn) { + w->state = FWS_C; + w->leaf = w->node->leaf; + continue; + } +#if RT6_DEBUG >= 2 + BUG_TRAP(0); +#endif + } + } +} + +int fib6_walk(struct fib6_walker_t *w) +{ + int res; + + w->state = FWS_INIT; + w->node = w->root; + + fib6_walker_link(w); + res = fib6_walk_continue(w); + if (res <= 0) + fib6_walker_unlink(w); + return res; +} + +static int fib6_clean_node(struct fib6_walker_t *w) +{ + int res; + struct rt6_info *rt; + struct fib6_cleaner_t *c = (struct fib6_cleaner_t*)w; + + for (rt = w->leaf; rt; rt = rt->u.next) { + res = c->func(rt, c->arg); + if (res < 0) { + w->leaf = rt; + res = fib6_del(rt); + if (res) { +#if RT6_DEBUG >= 2 + printk(KERN_DEBUG "fib6_clean_node: del failed: rt=%p@%p err=%d\n", rt, rt->rt6i_node, res); +#endif + continue; + } + return 0; + } + BUG_TRAP(res==0); + } + w->leaf = rt; + return 0; +} + +/* + * Convenient frontend to tree walker. + * + * func is called on each route. + * It may return -1 -> delete this route. + * 0 -> continue walking + * + * prune==1 -> only immediate children of node (certainly, + * ignoring pure split nodes) will be scanned. + */ + +void fib6_clean_tree(struct fib6_node *root, + int (*func)(struct rt6_info *, void *arg), + int prune, void *arg) +{ + struct fib6_cleaner_t c; + + c.w.root = root; + c.w.func = fib6_clean_node; + c.w.prune = prune; + c.func = func; + c.arg = arg; + + start_bh_atomic(); + fib6_walk(&c.w); + end_bh_atomic(); +} + +static int fib6_prune_clone(struct rt6_info *rt, void *arg) +{ + if (rt->rt6i_flags & RTF_CACHE) { + RT6_TRACE("pruning clone %p\n", rt); + return -1; + } + + return 0; +} + +static void fib6_prune_clones(struct fib6_node *fn, struct rt6_info *rt) +{ + fib6_clean_tree(fn, fib6_prune_clone, 1, rt); +} + +/* + * Garbage collection + */ + +static struct fib6_gc_args +{ + int timeout; + int more; +} gc_args; + +static int fib6_age(struct rt6_info *rt, void *arg) +{ + unsigned long now = jiffies; + + /* Age clones. Note, that clones are aged out + only if they are not in use now. + */ + + if (rt->rt6i_flags & RTF_CACHE) { + if (atomic_read(&rt->u.dst.use) == 0 && + (long)(now - rt->u.dst.lastuse) >= gc_args.timeout) { + RT6_TRACE("aging clone %p\n", rt); + return -1; + } + gc_args.more++; + } + + /* + * check addrconf expiration here. + * They are expired even if they are in use. + */ + + if (rt->rt6i_flags&RTF_EXPIRES && rt->rt6i_expires) { + if ((long)(now - rt->rt6i_expires) > 0) { + RT6_TRACE("expiring %p\n", rt); + return -1; + } + gc_args.more++; + } + + return 0; +} + +void fib6_run_gc(unsigned long dummy) +{ + if (dummy != ~0UL) + gc_args.timeout = (int)dummy; + else + gc_args.timeout = ip6_rt_gc_interval; + + gc_args.more = 0; + + fib6_clean_tree(&ip6_routing_table, fib6_age, 0, NULL); + + del_timer(&ip6_fib_timer); + + ip6_fib_timer.expires = 0; + if (gc_args.more) { + ip6_fib_timer.expires = jiffies + ip6_rt_gc_interval; + add_timer(&ip6_fib_timer); + } +} + +#ifdef MODULE +void fib6_gc_cleanup(void) +{ + del_timer(&ip6_fib_timer); +} +#endif + + diff --git a/pfinet/linux-src/net/ipv6/ip6_flowlabel.c b/pfinet/linux-src/net/ipv6/ip6_flowlabel.c new file mode 100644 index 00000000..9aa60db4 --- /dev/null +++ b/pfinet/linux-src/net/ipv6/ip6_flowlabel.c @@ -0,0 +1,620 @@ +/* + * ip6_flowlabel.c IPv6 flowlabel manager. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> + */ + +#include <linux/config.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/net.h> +#include <linux/netdevice.h> +#include <linux/if_arp.h> +#include <linux/in6.h> +#include <linux/route.h> +#include <linux/proc_fs.h> + +#include <net/sock.h> + +#include <net/ipv6.h> +#include <net/ndisc.h> +#include <net/protocol.h> +#include <net/ip6_route.h> +#include <net/addrconf.h> +#include <net/rawv6.h> +#include <net/icmp.h> +#include <net/transp_v6.h> + +#include <asm/uaccess.h> + +#define FL_MIN_LINGER 6 /* Minimal linger. It is set to 6sec specified + in old IPv6 RFC. Well, it was reasonable value. + */ +#define FL_MAX_LINGER 60 /* Maximal linger timeout */ + +/* FL hash table */ + +#define FL_MAX_PER_SOCK 32 +#define FL_MAX_SIZE 4096 +#define FL_HASH_MASK 255 +#define FL_HASH(l) (ntohl(l)&FL_HASH_MASK) + +static atomic_t fl_size = ATOMIC_INIT(0); +static struct ip6_flowlabel *fl_ht[FL_HASH_MASK+1]; + +static struct timer_list ip6_fl_gc_timer; + +/* FL hash table lock: it protects only of GC */ + +static atomic_t ip6_fl_lock = ATOMIC_INIT(0); + +static __inline__ void fl_lock(void) +{ + atomic_inc(&ip6_fl_lock); + synchronize_bh(); +} + +static __inline__ void fl_unlock(void) +{ + atomic_dec(&ip6_fl_lock); +} + +static struct ip6_flowlabel * fl_lookup(u32 label) +{ + struct ip6_flowlabel *fl; + + fl_lock(); + for (fl=fl_ht[FL_HASH(label)]; fl; fl = fl->next) { + if (fl->label == label) { + atomic_inc(&fl->users); + break; + } + } + fl_unlock(); + return fl; +} + +static void fl_free(struct ip6_flowlabel *fl) +{ + if (fl->opt) + kfree(fl->opt); + kfree(fl); +} + +static void fl_release(struct ip6_flowlabel *fl) +{ + fl_lock(); + fl->lastuse = jiffies; + if (atomic_dec_and_test(&fl->users)) { + unsigned long ttd = fl->lastuse + fl->linger; + if ((long)(ttd - fl->expires) > 0) + fl->expires = ttd; + ttd = fl->expires; + if (fl->opt && fl->share == IPV6_FL_S_EXCL) { + struct ipv6_txoptions *opt = fl->opt; + fl->opt = NULL; + kfree(opt); + } + if (!del_timer(&ip6_fl_gc_timer) || + (long)(ip6_fl_gc_timer.expires - ttd) > 0) + ip6_fl_gc_timer.expires = ttd; + add_timer(&ip6_fl_gc_timer); + } + fl_unlock(); +} + +static void ip6_fl_gc(unsigned long dummy) +{ + int i; + unsigned long now = jiffies; + unsigned long sched = 0; + + if (atomic_read(&ip6_fl_lock)) { + ip6_fl_gc_timer.expires = now + HZ/10; + add_timer(&ip6_fl_gc_timer); + return; + } + + for (i=0; i<=FL_HASH_MASK; i++) { + struct ip6_flowlabel *fl, **flp; + flp = &fl_ht[i]; + while ((fl=*flp) != NULL) { + if (atomic_read(&fl->users) == 0) { + unsigned long ttd = fl->lastuse + fl->linger; + if ((long)(ttd - fl->expires) > 0) + fl->expires = ttd; + ttd = fl->expires; + if ((long)(now - ttd) >= 0) { + *flp = fl->next; + fl_free(fl); + atomic_dec(&fl_size); + continue; + } + if (!sched || (long)(ttd - sched) < 0) + sched = ttd; + } + flp = &fl->next; + } + } + if (!sched && atomic_read(&fl_size)) + sched = now + FL_MAX_LINGER; + if (sched) { + ip6_fl_gc_timer.expires = sched; + add_timer(&ip6_fl_gc_timer); + } +} + +static int fl_intern(struct ip6_flowlabel *fl, __u32 label) +{ + fl->label = label & IPV6_FLOWLABEL_MASK; + + fl_lock(); + if (label == 0) { + for (;;) { + fl->label = htonl(net_random())&IPV6_FLOWLABEL_MASK; + if (fl->label) { + struct ip6_flowlabel *lfl; + lfl = fl_lookup(fl->label); + if (lfl == NULL) + break; + fl_release(lfl); + } + } + } + + fl->lastuse = jiffies; + fl->next = fl_ht[FL_HASH(fl->label)]; + fl_ht[FL_HASH(fl->label)] = fl; + atomic_inc(&fl_size); + fl_unlock(); + return 0; +} + + + +/* Socket flowlabel lists */ + +struct ip6_flowlabel * fl6_sock_lookup(struct sock *sk, u32 label) +{ + struct ipv6_fl_socklist *sfl; + struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6; + + label &= IPV6_FLOWLABEL_MASK; + + for (sfl=np->ipv6_fl_list; sfl; sfl = sfl->next) { + struct ip6_flowlabel *fl = sfl->fl; + if (fl->label == label) { + fl->lastuse = jiffies; + atomic_inc(&fl->users); + return fl; + } + } + return NULL; +} + +void fl6_free_socklist(struct sock *sk) +{ + struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6; + struct ipv6_fl_socklist *sfl; + + while ((sfl = np->ipv6_fl_list) != NULL) { + np->ipv6_fl_list = sfl->next; + fl_release(sfl->fl); + kfree(sfl); + } +} + +/* Service routines */ + + +/* + It is the only difficult place. flowlabel enforces equal headers + before and including routing header, however user may supply options + following rthdr. + */ + +struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions * opt_space, + struct ip6_flowlabel * fl, + struct ipv6_txoptions * fopt) +{ + struct ipv6_txoptions * fl_opt = fl->opt; + + if (fopt == NULL || fopt->opt_flen == 0) + return fl_opt; + + if (fl_opt != NULL) { + opt_space->hopopt = fl_opt->hopopt; + opt_space->dst0opt = fl_opt->dst0opt; + opt_space->srcrt = fl_opt->srcrt; + opt_space->opt_nflen = fl_opt->opt_nflen; + } else { + if (fopt->opt_nflen == 0) + return fopt; + opt_space->hopopt = NULL; + opt_space->dst0opt = NULL; + opt_space->srcrt = NULL; + opt_space->opt_nflen = 0; + } + opt_space->dst1opt = fopt->dst1opt; + opt_space->auth = fopt->auth; + opt_space->opt_flen = fopt->opt_flen; + return opt_space; +} + +static __u32 check_linger(__u16 ttl) +{ + if (ttl < FL_MIN_LINGER) + return FL_MIN_LINGER*HZ; + if (ttl > FL_MAX_LINGER && !capable(CAP_NET_ADMIN)) + return 0; + return ttl*HZ; +} + +static int fl6_renew(struct ip6_flowlabel *fl, unsigned linger, unsigned expires) +{ + linger = check_linger(linger); + if (!linger) + return -EPERM; + expires = check_linger(expires); + if (!expires) + return -EPERM; + fl->lastuse = jiffies; + if (fl->linger < linger) + fl->linger = linger; + if (expires < fl->linger) + expires = fl->linger; + if ((long)(fl->expires - (fl->lastuse+expires)) < 0) + fl->expires = fl->lastuse + expires; + return 0; +} + +static struct ip6_flowlabel * +fl_create(struct in6_flowlabel_req *freq, char *optval, int optlen, int *err_p) +{ + struct ip6_flowlabel *fl; + int olen; + int addr_type; + int err; + + err = -ENOMEM; + fl = kmalloc(sizeof(*fl), GFP_KERNEL); + if (fl == NULL) + goto done; + memset(fl, 0, sizeof(*fl)); + + olen = optlen - CMSG_ALIGN(sizeof(*freq)); + if (olen > 0) { + struct msghdr msg; + struct flowi flowi; + int junk; + + err = -ENOMEM; + fl->opt = kmalloc(sizeof(*fl->opt) + olen, GFP_KERNEL); + if (fl->opt == NULL) + goto done; + + memset(fl->opt, 0, sizeof(*fl->opt)); + fl->opt->tot_len = sizeof(*fl->opt) + olen; + err = -EFAULT; + if (copy_from_user(fl->opt+1, optval+CMSG_ALIGN(sizeof(*freq)), olen)) + goto done; + + msg.msg_controllen = olen; + msg.msg_control = (void*)(fl->opt+1); + flowi.oif = 0; + + err = datagram_send_ctl(&msg, &flowi, fl->opt, &junk); + if (err) + goto done; + err = -EINVAL; + if (fl->opt->opt_flen) + goto done; + if (fl->opt->opt_nflen == 0) { + kfree(fl->opt); + fl->opt = NULL; + } + } + + fl->expires = jiffies; + err = fl6_renew(fl, freq->flr_linger, freq->flr_expires); + if (err) + goto done; + fl->share = freq->flr_share; + addr_type = ipv6_addr_type(&freq->flr_dst); + if ((addr_type&IPV6_ADDR_MAPPED) + || addr_type == IPV6_ADDR_ANY) + goto done; + ipv6_addr_copy(&fl->dst, &freq->flr_dst); + atomic_set(&fl->users, 1); + switch (fl->share) { + case IPV6_FL_S_EXCL: + case IPV6_FL_S_ANY: + break; + case IPV6_FL_S_PROCESS: + fl->owner = current->pid; + break; + case IPV6_FL_S_USER: + fl->owner = current->euid; + break; + default: + err = -EINVAL; + goto done; + } + return fl; + +done: + if (fl) + fl_free(fl); + *err_p = err; + return NULL; +} + +static int mem_check(struct sock *sk) +{ + struct ipv6_fl_socklist *sfl; + int room = FL_MAX_SIZE - atomic_read(&fl_size); + int count = 0; + + if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK) + return 0; + + for (sfl = sk->net_pinfo.af_inet6.ipv6_fl_list; sfl; sfl = sfl->next) + count++; + + if (room <= 0 || + ((count >= FL_MAX_PER_SOCK || + (count > 0 && room < FL_MAX_SIZE/2) || room < FL_MAX_SIZE/4) + && !capable(CAP_NET_ADMIN))) + return -ENOBUFS; + + return 0; +} + +static int ipv6_hdr_cmp(struct ipv6_opt_hdr *h1, struct ipv6_opt_hdr *h2) +{ + if (h1 == h2) + return 0; + if (h1 == NULL || h2 == NULL) + return 1; + if (h1->hdrlen != h2->hdrlen) + return 1; + return memcmp(h1+1, h2+1, ((h1->hdrlen+1)<<3) - sizeof(*h1)); +} + +static int ipv6_opt_cmp(struct ipv6_txoptions *o1, struct ipv6_txoptions *o2) +{ + if (o1 == o2) + return 0; + if (o1 == NULL || o2 == NULL) + return 1; + if (o1->opt_nflen != o2->opt_nflen) + return 1; + if (ipv6_hdr_cmp(o1->hopopt, o2->hopopt)) + return 1; + if (ipv6_hdr_cmp(o1->dst0opt, o2->dst0opt)) + return 1; + if (ipv6_hdr_cmp((struct ipv6_opt_hdr *)o1->srcrt, (struct ipv6_opt_hdr *)o2->srcrt)) + return 1; + return 0; +} + +int ipv6_flowlabel_opt(struct sock *sk, char *optval, int optlen) +{ + int err; + struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6; + struct in6_flowlabel_req freq; + struct ipv6_fl_socklist *sfl1=NULL; + struct ipv6_fl_socklist *sfl, **sflp; + struct ip6_flowlabel *fl; + + if (optlen < sizeof(freq)) + return -EINVAL; + + if (copy_from_user(&freq, optval, sizeof(freq))) + return -EFAULT; + + switch (freq.flr_action) { + case IPV6_FL_A_PUT: + for (sflp = &np->ipv6_fl_list; (sfl=*sflp)!=NULL; sflp = &sfl->next) { + if (sfl->fl->label == freq.flr_label) { + if (freq.flr_label == (np->flow_label&IPV6_FLOWLABEL_MASK)) + np->flow_label &= ~IPV6_FLOWLABEL_MASK; + *sflp = sfl->next; + synchronize_bh(); + fl_release(sfl->fl); + kfree(sfl); + return 0; + } + } + return -ESRCH; + + case IPV6_FL_A_RENEW: + for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) { + if (sfl->fl->label == freq.flr_label) + return fl6_renew(sfl->fl, freq.flr_linger, freq.flr_expires); + } + if (freq.flr_share == IPV6_FL_S_NONE && capable(CAP_NET_ADMIN)) { + fl = fl_lookup(freq.flr_label); + if (fl) { + err = fl6_renew(fl, freq.flr_linger, freq.flr_expires); + fl_release(fl); + return err; + } + } + return -ESRCH; + + case IPV6_FL_A_GET: + if (freq.flr_label & ~IPV6_FLOWLABEL_MASK) + return -EINVAL; + + fl = fl_create(&freq, optval, optlen, &err); + if (fl == NULL) + return err; + sfl1 = kmalloc(sizeof(*sfl1), GFP_KERNEL); + + if (freq.flr_label) { + struct ip6_flowlabel *fl1 = NULL; + + err = -EEXIST; + for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) { + if (sfl->fl->label == freq.flr_label) { + if (freq.flr_flags&IPV6_FL_F_EXCL) + goto done; + fl1 = sfl->fl; + atomic_inc(&fl->users); + break; + } + } + + if (fl1 == NULL) + fl1 = fl_lookup(freq.flr_label); + if (fl1) { + err = -EEXIST; + if (freq.flr_flags&IPV6_FL_F_EXCL) + goto release; + err = -EPERM; + if (fl1->share == IPV6_FL_S_EXCL || + fl1->share != fl->share || + fl1->owner != fl->owner) + goto release; + + err = -EINVAL; + if (ipv6_addr_cmp(&fl1->dst, &fl->dst) || + ipv6_opt_cmp(fl1->opt, fl->opt)) + goto release; + + err = -ENOMEM; + if (sfl1 == NULL) + goto release; + if (fl->linger > fl1->linger) + fl1->linger = fl->linger; + if ((long)(fl->expires - fl1->expires) > 0) + fl1->expires = fl->expires; + sfl1->fl = fl1; + sfl1->next = np->ipv6_fl_list; + np->ipv6_fl_list = sfl1; + synchronize_bh(); + fl_free(fl); + return 0; + +release: + fl_release(fl1); + goto done; + } + } + err = -ENOENT; + if (!(freq.flr_flags&IPV6_FL_F_CREATE)) + goto done; + + err = -ENOMEM; + if (sfl1 == NULL || (err = mem_check(sk)) != 0) + goto done; + + err = fl_intern(fl, freq.flr_label); + if (err) + goto done; + + /* Do not check for fault */ + if (!freq.flr_label) + copy_to_user(optval + ((u8*)&freq.flr_label - (u8*)&freq), &fl->label, sizeof(fl->label)); + + sfl1->fl = fl; + sfl1->next = np->ipv6_fl_list; + np->ipv6_fl_list = sfl1; + return 0; + + default: + return -EINVAL; + } + +done: + if (fl) + fl_free(fl); + if (sfl1) + kfree(sfl1); + return err; +} + +#ifdef CONFIG_PROC_FS + + +static int ip6_fl_read_proc(char *buffer, char **start, off_t offset, + int length, int *eof, void *data) +{ + off_t pos=0; + off_t begin=0; + int len=0; + int i, k; + struct ip6_flowlabel *fl; + + len+= sprintf(buffer,"Label S Owner Users Linger Expires " + "Dst Opt\n"); + + fl_lock(); + for (i=0; i<=FL_HASH_MASK; i++) { + for (fl = fl_ht[i]; fl; fl = fl->next) { + len+=sprintf(buffer+len,"%05X %-1d %-6d %-6d %-6d %-8ld ", + (unsigned)ntohl(fl->label), + fl->share, + (unsigned)fl->owner, + atomic_read(&fl->users), + fl->linger/HZ, + (long)(fl->expires - jiffies)/HZ); + + for (k=0; k<16; k++) + len+=sprintf(buffer+len, "%02x", fl->dst.s6_addr[k]); + buffer[len++]=' '; + len+=sprintf(buffer+len, "%-4d", fl->opt ? fl->opt->opt_nflen : 0); + buffer[len++]='\n'; + + pos=begin+len; + if(pos<offset) { + len=0; + begin=pos; + } + if(pos>offset+length) + goto done; + } + } + *eof = 1; + +done: + fl_unlock(); + *start=buffer+(offset-begin); + len-=(offset-begin); + if(len>length) + len=length; + if(len<0) + len=0; + return len; +} +#endif + + +void ip6_flowlabel_init() +{ +#ifdef CONFIG_PROC_FS + struct proc_dir_entry *ent; +#endif + + init_timer(&ip6_fl_gc_timer); + ip6_fl_gc_timer.function = ip6_fl_gc; +#ifdef CONFIG_PROC_FS + ent = create_proc_entry("net/ip6_flowlabel", 0, 0); + ent->read_proc = ip6_fl_read_proc; +#endif +} + +void ip6_flowlabel_cleanup() +{ + del_timer(&ip6_fl_gc_timer); +#ifdef CONFIG_PROC_FS + remove_proc_entry("net/ip6_flowlabel", 0); +#endif +} diff --git a/pfinet/linux-src/net/ipv6/ip6_input.c b/pfinet/linux-src/net/ipv6/ip6_input.c new file mode 100644 index 00000000..54a3f455 --- /dev/null +++ b/pfinet/linux-src/net/ipv6/ip6_input.c @@ -0,0 +1,284 @@ +/* + * IPv6 input + * Linux INET6 implementation + * + * Authors: + * Pedro Roque <roque@di.fc.ul.pt> + * Ian P. Morris <I.P.Morris@soton.ac.uk> + * + * $Id: ip6_input.c,v 1.1 2007/10/08 21:12:30 stesie Exp $ + * + * Based in linux/net/ipv4/ip_input.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/sockios.h> +#include <linux/sched.h> +#include <linux/net.h> +#include <linux/netdevice.h> +#include <linux/in6.h> +#include <linux/icmpv6.h> + +#include <net/sock.h> +#include <net/snmp.h> + +#include <net/ipv6.h> +#include <net/protocol.h> +#include <net/transp_v6.h> +#include <net/rawv6.h> +#include <net/ndisc.h> +#include <net/ip6_route.h> +#include <net/addrconf.h> + + +int ipv6_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt) +{ + struct ipv6hdr *hdr; + u32 pkt_len; + + if (skb->pkt_type == PACKET_OTHERHOST) + goto drop; + + ipv6_statistics.Ip6InReceives++; + + /* Store incoming device index. When the packet will + be queued, we cannot refer to skb->dev anymore. + */ + ((struct inet6_skb_parm *)skb->cb)->iif = dev->ifindex; + + hdr = skb->nh.ipv6h; + + if (skb->len < sizeof(struct ipv6hdr) || hdr->version != 6) + goto err; + + pkt_len = ntohs(hdr->payload_len); + + /* pkt_len may be zero if Jumbo payload option is present */ + if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) { + if (pkt_len + sizeof(struct ipv6hdr) > skb->len) + goto truncated; + skb_trim(skb, pkt_len + sizeof(struct ipv6hdr)); + } + + if (hdr->nexthdr == NEXTHDR_HOP) { + skb->h.raw = (u8*)(hdr+1); + if (!ipv6_parse_hopopts(skb, &hdr->nexthdr)) { + ipv6_statistics.Ip6InHdrErrors++; + return 0; + } + } + + if (skb->dst == NULL) + ip6_route_input(skb); + + return skb->dst->input(skb); + +truncated: + ipv6_statistics.Ip6InTruncatedPkts++; +err: + ipv6_statistics.Ip6InHdrErrors++; +drop: + kfree_skb(skb); + return 0; +} + +/* + * 0 - deliver + * 1 - block + */ +static __inline__ int icmpv6_filter(struct sock *sk, struct sk_buff *skb) +{ + struct icmp6hdr *icmph; + struct raw6_opt *opt; + + opt = &sk->tp_pinfo.tp_raw; + icmph = (struct icmp6hdr *) (skb->nh.ipv6h + 1); + return test_bit(icmph->icmp6_type, &opt->filter); +} + +/* + * demultiplex raw sockets. + * (should consider queueing the skb in the sock receive_queue + * without calling rawv6.c) + */ +static struct sock * ipv6_raw_deliver(struct sk_buff *skb, + int nexthdr, unsigned long len) +{ + struct in6_addr *saddr; + struct in6_addr *daddr; + struct sock *sk, *sk2; + __u8 hash; + + saddr = &skb->nh.ipv6h->saddr; + daddr = saddr + 1; + + hash = nexthdr & (MAX_INET_PROTOS - 1); + + sk = raw_v6_htable[hash]; + + /* + * The first socket found will be delivered after + * delivery to transport protocols. + */ + + if (sk == NULL) + return NULL; + + sk = raw_v6_lookup(sk, nexthdr, daddr, saddr); + + if (sk) { + sk2 = sk; + + while ((sk2 = raw_v6_lookup(sk2->next, nexthdr, daddr, saddr))) { + struct sk_buff *buff; + + if (nexthdr == IPPROTO_ICMPV6 && + icmpv6_filter(sk2, skb)) + continue; + + buff = skb_clone(skb, GFP_ATOMIC); + if (buff) + rawv6_rcv(sk2, buff, len); + } + } + + if (sk && nexthdr == IPPROTO_ICMPV6 && icmpv6_filter(sk, skb)) + sk = NULL; + + return sk; +} + +/* + * Deliver the packet to the host + */ + +int ip6_input(struct sk_buff *skb) +{ + struct ipv6hdr *hdr = skb->nh.ipv6h; + struct inet6_protocol *ipprot; + struct sock *raw_sk; + __u8 *nhptr; + int nexthdr; + int found = 0; + u8 hash; + int len; + + skb->h.raw = skb->nh.raw + sizeof(struct ipv6hdr); + + /* + * Parse extension headers + */ + + nexthdr = hdr->nexthdr; + nhptr = &hdr->nexthdr; + + /* Skip hop-by-hop options, they are already parsed. */ + if (nexthdr == NEXTHDR_HOP) { + nhptr = (u8*)(hdr+1); + nexthdr = *nhptr; + skb->h.raw += (nhptr[1]+1)<<3; + } + + /* This check is sort of optimization. + It would be stupid to detect for optional headers, + which are missing with probability of 200% + */ + if (nexthdr != IPPROTO_TCP && nexthdr != IPPROTO_UDP) { + nhptr = ipv6_parse_exthdrs(&skb, nhptr); + if (nhptr == NULL) + return 0; + nexthdr = *nhptr; + hdr = skb->nh.ipv6h; + } + len = skb->tail - skb->h.raw; + + raw_sk = ipv6_raw_deliver(skb, nexthdr, len); + + hash = nexthdr & (MAX_INET_PROTOS - 1); + for (ipprot = (struct inet6_protocol *) inet6_protos[hash]; + ipprot != NULL; + ipprot = (struct inet6_protocol *) ipprot->next) { + struct sk_buff *buff = skb; + + if (ipprot->protocol != nexthdr) + continue; + + if (ipprot->copy || raw_sk) + buff = skb_clone(skb, GFP_ATOMIC); + /* buff == NULL ?????? */ + ipprot->handler(buff, len); + found = 1; + } + + if (raw_sk) { + rawv6_rcv(raw_sk, skb, len); + found = 1; + } + + /* + * not found: send ICMP parameter problem back + */ + if (!found) { + ipv6_statistics.Ip6InUnknownProtos++; + icmpv6_param_prob(skb, ICMPV6_UNK_NEXTHDR, nhptr); + } + + return 0; +} + +int ip6_mc_input(struct sk_buff *skb) +{ + struct ipv6hdr *hdr; + int deliver = 0; + int discard = 1; + + ipv6_statistics.Ip6InMcastPkts++; + + hdr = skb->nh.ipv6h; + if (ipv6_chk_mcast_addr(skb->dev, &hdr->daddr)) + deliver = 1; + + /* + * IPv6 multicast router mode isnt currently supported. + */ +#if 0 + if (ipv6_config.multicast_route) { + int addr_type; + + addr_type = ipv6_addr_type(&hdr->daddr); + + if (!(addr_type & (IPV6_ADDR_LOOPBACK | IPV6_ADDR_LINKLOCAL))) { + struct sk_buff *skb2; + struct dst_entry *dst; + + dst = skb->dst; + + if (deliver) { + skb2 = skb_clone(skb, GFP_ATOMIC); + } else { + discard = 0; + skb2 = skb; + } + + dst->output(skb2); + } + } +#endif + + if (deliver) { + discard = 0; + ip6_input(skb); + } + + if (discard) + kfree_skb(skb); + + return 0; +} diff --git a/pfinet/linux-src/net/ipv6/ip6_output.c b/pfinet/linux-src/net/ipv6/ip6_output.c new file mode 100644 index 00000000..f67e3e9e --- /dev/null +++ b/pfinet/linux-src/net/ipv6/ip6_output.c @@ -0,0 +1,720 @@ +/* + * IPv6 output functions + * Linux INET6 implementation + * + * Authors: + * Pedro Roque <roque@di.fc.ul.pt> + * + * $Id: ip6_output.c,v 1.1 2007/10/08 21:12:30 stesie Exp $ + * + * Based on linux/net/ipv4/ip_output.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Changes: + * A.N.Kuznetsov : airthmetics in fragmentation. + * extension headers are implemented. + * route changes now work. + * ip6_forward does not confuse sniffers. + * etc. + * + */ + +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/net.h> +#include <linux/netdevice.h> +#include <linux/if_arp.h> +#include <linux/in6.h> +#include <linux/route.h> + +#include <net/sock.h> +#include <net/snmp.h> + +#include <net/ipv6.h> +#include <net/ndisc.h> +#include <net/protocol.h> +#include <net/ip6_route.h> +#include <net/addrconf.h> +#include <net/rawv6.h> +#include <net/icmp.h> + +static u32 ipv6_fragmentation_id = 1; + +int ip6_output(struct sk_buff *skb) +{ + struct dst_entry *dst = skb->dst; + struct device *dev = dst->dev; + struct hh_cache *hh = dst->hh; + + skb->protocol = __constant_htons(ETH_P_IPV6); + skb->dev = dev; + + if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr)) { + if (!(dev->flags&IFF_LOOPBACK) && + (skb->sk == NULL || skb->sk->net_pinfo.af_inet6.mc_loop) && + ipv6_chk_mcast_addr(dev, &skb->nh.ipv6h->daddr)) { + /* Do not check for IFF_ALLMULTI; multicast routing + is not supported in any case. + */ + dev_loopback_xmit(skb); + + if (skb->nh.ipv6h->hop_limit == 0) { + kfree_skb(skb); + return 0; + } + } + + ipv6_statistics.Ip6OutMcastPkts++; + } + + if (hh) { +#ifdef __alpha__ + /* Alpha has disguisting memcpy. Help it. */ + u64 *aligned_hdr = (u64*)(skb->data - 16); + u64 *aligned_hdr0 = hh->hh_data; + read_lock_irq(&hh->hh_lock); + aligned_hdr[0] = aligned_hdr0[0]; + aligned_hdr[1] = aligned_hdr0[1]; +#else + read_lock_irq(&hh->hh_lock); + memcpy(skb->data - 16, hh->hh_data, 16); +#endif + read_unlock_irq(&hh->hh_lock); + skb_push(skb, dev->hard_header_len); + return hh->hh_output(skb); + } else if (dst->neighbour) + return dst->neighbour->output(skb); + + kfree_skb(skb); + return -EINVAL; +} + +/* + * xmit an sk_buff (used by TCP) + */ + +int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, + struct ipv6_txoptions *opt) +{ + struct ipv6_pinfo * np = sk ? &sk->net_pinfo.af_inet6 : NULL; + struct in6_addr *first_hop = fl->nl_u.ip6_u.daddr; + struct dst_entry *dst = skb->dst; + struct ipv6hdr *hdr; + u8 proto = fl->proto; + int seg_len = skb->len; + int hlimit; + + if (opt) { + int head_room; + + /* First: exthdrs may take lots of space (~8K for now) + MAX_HEADER is not enough. + */ + head_room = opt->opt_nflen + opt->opt_flen; + seg_len += head_room; + head_room += sizeof(struct ipv6hdr) + ((dst->dev->hard_header_len + 15)&~15); + + if (skb_headroom(skb) < head_room) { + struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); + kfree(skb); + skb = skb2; + if (skb == NULL) + return -ENOBUFS; + if (sk) + skb_set_owner_w(skb, sk); + } + if (opt->opt_flen) + ipv6_push_frag_opts(skb, opt, &proto); + if (opt->opt_nflen) + ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop); + } + + hdr = skb->nh.ipv6h = (struct ipv6hdr*)skb_push(skb, sizeof(struct ipv6hdr)); + + /* + * Fill in the IPv6 header + */ + + *(u32*)hdr = __constant_htonl(0x60000000) | fl->fl6_flowlabel; + hlimit = -1; + if (np) + hlimit = np->hop_limit; + if (hlimit < 0) + hlimit = ((struct rt6_info*)dst)->rt6i_hoplimit; + + hdr->payload_len = htons(seg_len); + hdr->nexthdr = proto; + hdr->hop_limit = hlimit; + + ipv6_addr_copy(&hdr->saddr, fl->nl_u.ip6_u.saddr); + ipv6_addr_copy(&hdr->daddr, first_hop); + + if (skb->len <= dst->pmtu) { + ipv6_statistics.Ip6OutRequests++; + dst->output(skb); + return 0; + } + + printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n"); + start_bh_atomic(); + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst->pmtu, skb->dev); + end_bh_atomic(); + kfree_skb(skb); + return -EMSGSIZE; +} + +/* + * To avoid extra problems ND packets are send through this + * routine. It's code duplication but I really want to avoid + * extra checks since ipv6_build_header is used by TCP (which + * is for us performace critical) + */ + +int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct device *dev, + struct in6_addr *saddr, struct in6_addr *daddr, + int proto, int len) +{ + struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6; + struct ipv6hdr *hdr; + int totlen; + + skb->protocol = __constant_htons(ETH_P_IPV6); + skb->dev = dev; + + totlen = len + sizeof(struct ipv6hdr); + + hdr = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr)); + skb->nh.ipv6h = hdr; + + *(u32*)hdr = htonl(0x60000000); + + hdr->payload_len = htons(len); + hdr->nexthdr = proto; + hdr->hop_limit = np->hop_limit; + + ipv6_addr_copy(&hdr->saddr, saddr); + ipv6_addr_copy(&hdr->daddr, daddr); + + return 0; +} + +static struct ipv6hdr * ip6_bld_1(struct sock *sk, struct sk_buff *skb, struct flowi *fl, + int hlimit, unsigned pktlength) +{ + struct ipv6hdr *hdr; + + skb->nh.raw = skb_put(skb, sizeof(struct ipv6hdr)); + hdr = skb->nh.ipv6h; + + *(u32*)hdr = fl->fl6_flowlabel | htonl(0x60000000); + + hdr->payload_len = htons(pktlength - sizeof(struct ipv6hdr)); + hdr->hop_limit = hlimit; + hdr->nexthdr = fl->proto; + + ipv6_addr_copy(&hdr->saddr, fl->nl_u.ip6_u.saddr); + ipv6_addr_copy(&hdr->daddr, fl->nl_u.ip6_u.daddr); + return hdr; +} + +static __inline__ u8 * ipv6_build_fraghdr(struct sk_buff *skb, u8* prev_hdr, unsigned offset) +{ + struct frag_hdr *fhdr; + + fhdr = (struct frag_hdr *) skb_put(skb, sizeof(struct frag_hdr)); + + fhdr->nexthdr = *prev_hdr; + *prev_hdr = NEXTHDR_FRAGMENT; + prev_hdr = &fhdr->nexthdr; + + fhdr->reserved = 0; + fhdr->frag_off = htons(offset); + fhdr->identification = ipv6_fragmentation_id++; + return &fhdr->nexthdr; +} + +static int ip6_frag_xmit(struct sock *sk, inet_getfrag_t getfrag, + const void *data, struct dst_entry *dst, + struct flowi *fl, struct ipv6_txoptions *opt, + struct in6_addr *final_dst, + int hlimit, int flags, unsigned length, int mtu) +{ + struct ipv6hdr *hdr; + struct sk_buff *last_skb; + u8 *prev_hdr; + int unfrag_len; + int frag_len; + int last_len; + int nfrags; + int fhdr_dist; + int frag_off; + int data_off; + int err; + + /* + * Fragmentation + * + * Extension header order: + * Hop-by-hop -> Dest0 -> Routing -> Fragment -> Auth -> Dest1 -> rest (...) + * + * We must build the non-fragmented part that + * will be in every packet... this also means + * that other extension headers (Dest, Auth, etc) + * must be considered in the data to be fragmented + */ + + unfrag_len = sizeof(struct ipv6hdr) + sizeof(struct frag_hdr); + last_len = length; + + if (opt) { + unfrag_len += opt->opt_nflen; + last_len += opt->opt_flen; + } + + /* + * Length of fragmented part on every packet but + * the last must be an: + * "integer multiple of 8 octects". + */ + + frag_len = (mtu - unfrag_len) & ~0x7; + + /* Unfragmentable part exceeds mtu. */ + if (frag_len <= 0) { + ipv6_local_error(sk, EMSGSIZE, fl, mtu); + return -EMSGSIZE; + } + + nfrags = last_len / frag_len; + + /* + * We must send from end to start because of + * UDP/ICMP checksums. We do a funny trick: + * fill the last skb first with the fixed + * header (and its data) and then use it + * to create the following segments and send it + * in the end. If the peer is checking the M_flag + * to trigger the reassembly code then this + * might be a good idea. + */ + + frag_off = nfrags * frag_len; + last_len -= frag_off; + + if (last_len == 0) { + last_len = frag_len; + frag_off -= frag_len; + nfrags--; + } + data_off = frag_off; + + /* And it is implementation problem: for now we assume, that + all the exthdrs will fit to the first fragment. + */ + if (opt) { + if (frag_len < opt->opt_flen) { + ipv6_local_error(sk, EMSGSIZE, fl, mtu); + return -EMSGSIZE; + } + data_off = frag_off - opt->opt_flen; + } + + last_skb = sock_alloc_send_skb(sk, unfrag_len + frag_len + + dst->dev->hard_header_len + 15, + 0, flags & MSG_DONTWAIT, &err); + + if (last_skb == NULL) + return err; + + last_skb->dst = dst_clone(dst); + + skb_reserve(last_skb, (dst->dev->hard_header_len + 15) & ~15); + + hdr = ip6_bld_1(sk, last_skb, fl, hlimit, frag_len+unfrag_len); + prev_hdr = &hdr->nexthdr; + + if (opt && opt->opt_nflen) + prev_hdr = ipv6_build_nfrag_opts(last_skb, prev_hdr, opt, final_dst, 0); + + prev_hdr = ipv6_build_fraghdr(last_skb, prev_hdr, frag_off); + fhdr_dist = prev_hdr - last_skb->data; + + err = getfrag(data, &hdr->saddr, last_skb->tail, data_off, last_len); + + if (!err) { + while (nfrags--) { + struct sk_buff *skb; + + struct frag_hdr *fhdr2; + + skb = skb_copy(last_skb, sk->allocation); + + if (skb == NULL) { + ipv6_statistics.Ip6FragFails++; + kfree_skb(last_skb); + return -ENOMEM; + } + + frag_off -= frag_len; + data_off -= frag_len; + + fhdr2 = (struct frag_hdr *) (skb->data + fhdr_dist); + + /* more flag on */ + fhdr2->frag_off = htons(frag_off | 1); + + /* Write fragmentable exthdrs to the first chunk */ + if (nfrags == 0 && opt && opt->opt_flen) { + ipv6_build_frag_opts(skb, &fhdr2->nexthdr, opt); + frag_len -= opt->opt_flen; + data_off = 0; + } + + err = getfrag(data, &hdr->saddr,skb_put(skb, frag_len), + data_off, frag_len); + + if (err) { + kfree_skb(skb); + break; + } + + ipv6_statistics.Ip6FragCreates++; + ipv6_statistics.Ip6OutRequests++; + dst->output(skb); + } + } + + if (err) { + ipv6_statistics.Ip6FragFails++; + kfree_skb(last_skb); + return -EFAULT; + } + + hdr->payload_len = htons(unfrag_len + last_len - sizeof(struct ipv6hdr)); + + /* + * update last_skb to reflect the getfrag we did + * on start. + */ + + skb_put(last_skb, last_len); + + ipv6_statistics.Ip6FragCreates++; + ipv6_statistics.Ip6FragOKs++; + ipv6_statistics.Ip6OutRequests++; + dst->output(last_skb); + + return 0; +} + +int ip6_build_xmit(struct sock *sk, inet_getfrag_t getfrag, const void *data, + struct flowi *fl, unsigned length, + struct ipv6_txoptions *opt, int hlimit, int flags) +{ + struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6; + struct in6_addr *final_dst = NULL; + struct dst_entry *dst; + int err = 0; + unsigned int pktlength, jumbolen, mtu; + struct in6_addr saddr; + + if (opt && opt->srcrt) { + struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt; + final_dst = fl->fl6_dst; + fl->fl6_dst = rt0->addr; + } + + if (!fl->oif && ipv6_addr_is_multicast(fl->nl_u.ip6_u.daddr)) + fl->oif = np->mcast_oif; + + dst = NULL; + if (sk->dst_cache) { + dst = dst_check(&sk->dst_cache, np->dst_cookie); + if (dst) { + struct rt6_info *rt = (struct rt6_info*)dst_clone(dst); + + /* Yes, checking route validity in not connected + case is not very simple. Take into account, + that we do not support routing by source, TOS, + and MSG_DONTROUTE --ANK (980726) + + 1. If route was host route, check that + cached destination is current. + If it is network route, we still may + check its validity using saved pointer + to the last used address: daddr_cache. + We do not want to save whole address now, + (because main consumer of this service + is tcp, which has not this problem), + so that the last trick works only on connected + sockets. + 2. oif also should be the same. + */ + if (((rt->rt6i_dst.plen != 128 || + ipv6_addr_cmp(fl->fl6_dst, &rt->rt6i_dst.addr)) + && (np->daddr_cache == NULL || + ipv6_addr_cmp(fl->fl6_dst, np->daddr_cache))) + || (fl->oif && fl->oif != dst->dev->ifindex)) { + dst_release(dst); + dst = NULL; + } + } + } + + if (dst == NULL) + dst = ip6_route_output(sk, fl); + + if (dst->error) { + ipv6_statistics.Ip6OutNoRoutes++; + dst_release(dst); + return -ENETUNREACH; + } + + if (fl->fl6_src == NULL) { + err = ipv6_get_saddr(dst, fl->fl6_dst, &saddr); + + if (err) { +#if IP6_DEBUG >= 2 + printk(KERN_DEBUG "ip6_build_xmit: " + "no availiable source address\n"); +#endif + goto out; + } + fl->fl6_src = &saddr; + } + pktlength = length; + + if (hlimit < 0) { + if (ipv6_addr_is_multicast(fl->fl6_dst)) + hlimit = np->mcast_hops; + else + hlimit = np->hop_limit; + if (hlimit < 0) + hlimit = ((struct rt6_info*)dst)->rt6i_hoplimit; + } + + jumbolen = 0; + + if (!sk->ip_hdrincl) { + pktlength += sizeof(struct ipv6hdr); + if (opt) + pktlength += opt->opt_flen + opt->opt_nflen; + + if (pktlength > 0xFFFF + sizeof(struct ipv6hdr)) { + /* Jumbo datagram. + It is assumed, that in the case of sk->ip_hdrincl + jumbo option is supplied by user. + */ + pktlength += 8; + jumbolen = pktlength - sizeof(struct ipv6hdr); + } + } + + mtu = dst->pmtu; + if (np->frag_size < mtu) { + if (np->frag_size) + mtu = np->frag_size; + else if (np->pmtudisc == IPV6_PMTUDISC_DONT) + mtu = IPV6_MIN_MTU; + } + + /* Critical arithmetic overflow check. + FIXME: may gcc optimize it out? --ANK (980726) + */ + if (pktlength < length) { + ipv6_local_error(sk, EMSGSIZE, fl, mtu); + err = -EMSGSIZE; + goto out; + } + + if (pktlength <= mtu) { + struct sk_buff *skb; + struct ipv6hdr *hdr; + struct device *dev = dst->dev; + + skb = sock_alloc_send_skb(sk, pktlength + 15 + + dev->hard_header_len, 0, + flags & MSG_DONTWAIT, &err); + + if (skb == NULL) { + ipv6_statistics.Ip6OutDiscards++; + goto out; + } + + skb->dst = dst_clone(dst); + + skb_reserve(skb, (dev->hard_header_len + 15) & ~15); + + hdr = (struct ipv6hdr *) skb->tail; + skb->nh.ipv6h = hdr; + + if (!sk->ip_hdrincl) { + ip6_bld_1(sk, skb, fl, hlimit, + jumbolen ? sizeof(struct ipv6hdr) : pktlength); + + if (opt || jumbolen) { + u8 *prev_hdr = &hdr->nexthdr; + prev_hdr = ipv6_build_nfrag_opts(skb, prev_hdr, opt, final_dst, jumbolen); + if (opt && opt->opt_flen) + ipv6_build_frag_opts(skb, prev_hdr, opt); + } + } + + skb_put(skb, length); + err = getfrag(data, &hdr->saddr, + ((char *) hdr) + (pktlength - length), + 0, length); + + if (!err) { + ipv6_statistics.Ip6OutRequests++; + dst->output(skb); + } else { + err = -EFAULT; + kfree_skb(skb); + } + } else { + if (sk->ip_hdrincl || jumbolen || + np->pmtudisc == IPV6_PMTUDISC_DO) { + ipv6_local_error(sk, EMSGSIZE, fl, mtu); + err = -EMSGSIZE; + goto out; + } + + err = ip6_frag_xmit(sk, getfrag, data, dst, fl, opt, final_dst, hlimit, + flags, length, mtu); + } + + /* + * cleanup + */ +out: + ip6_dst_store(sk, dst, fl->nl_u.ip6_u.daddr == &np->daddr ? &np->daddr : NULL); + return err; +} + +int ip6_call_ra_chain(struct sk_buff *skb, int sel) +{ + struct ip6_ra_chain *ra; + struct sock *last = NULL; + + for (ra = ip6_ra_chain; ra; ra = ra->next) { + struct sock *sk = ra->sk; + if (sk && ra->sel == sel) { + if (last) { + struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); + if (skb2) + rawv6_rcv(last, skb2, skb2->len); + } + last = sk; + } + } + + if (last) { + rawv6_rcv(last, skb, skb->len); + return 1; + } + return 0; +} + +int ip6_forward(struct sk_buff *skb) +{ + struct dst_entry *dst = skb->dst; + struct ipv6hdr *hdr = skb->nh.ipv6h; + struct inet6_skb_parm *opt =(struct inet6_skb_parm*)skb->cb; + + if (ipv6_devconf.forwarding == 0 && opt->srcrt == 0) + goto drop; + + /* + * We DO NOT make any processing on + * RA packets, pushing them to user level AS IS + * without ane WARRANTY that application will be able + * to interpret them. The reason is that we + * cannot make anything clever here. + * + * We are not end-node, so that if packet contains + * AH/ESP, we cannot make anything. + * Defragmentation also would be mistake, RA packets + * cannot be fragmented, because there is no warranty + * that different fragments will go along one path. --ANK + */ + if (opt->ra) { + u8 *ptr = skb->nh.raw + opt->ra; + if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3])) + return 0; + } + + /* + * check and decrement ttl + */ + if (hdr->hop_limit <= 1) { + /* Force OUTPUT device used as source address */ + skb->dev = dst->dev; + icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, + 0, skb->dev); + + kfree_skb(skb); + return -ETIMEDOUT; + } + + /* IPv6 specs say nothing about it, but it is clear that we cannot + send redirects to source routed frames. + */ + if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0) { + struct in6_addr *target = NULL; + struct rt6_info *rt; + struct neighbour *n = dst->neighbour; + + /* + * incoming and outgoing devices are the same + * send a redirect. + */ + + rt = (struct rt6_info *) dst; + if ((rt->rt6i_flags & RTF_GATEWAY)) + target = (struct in6_addr*)&n->primary_key; + else + target = &hdr->daddr; + + /* Limit redirects both by destination (here) + and by source (inside ndisc_send_redirect) + */ + if (xrlim_allow(dst, 1*HZ)) + ndisc_send_redirect(skb, n, target); + } else if (ipv6_addr_type(&hdr->saddr)&(IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK + |IPV6_ADDR_LINKLOCAL)) { + /* This check is security critical. */ + goto drop; + } + + if (skb->len > dst->pmtu) { + /* Again, force OUTPUT device used as source address */ + skb->dev = dst->dev; + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst->pmtu, skb->dev); + ipv6_statistics.Ip6InTooBigErrors++; + kfree_skb(skb); + return -EMSGSIZE; + } + + if ((skb = skb_cow(skb, dst->dev->hard_header_len)) == NULL) + return 0; + + hdr = skb->nh.ipv6h; + + /* Mangling hops number delayed to point after skb COW */ + + hdr->hop_limit--; + + ipv6_statistics.Ip6OutForwDatagrams++; + return dst->output(skb); + +drop: + ipv6_statistics.Ip6InAddrErrors++; + kfree_skb(skb); + return -EINVAL; +} diff --git a/pfinet/linux-src/net/ipv6/ipv6_sockglue.c b/pfinet/linux-src/net/ipv6/ipv6_sockglue.c new file mode 100644 index 00000000..6a48d1be --- /dev/null +++ b/pfinet/linux-src/net/ipv6/ipv6_sockglue.c @@ -0,0 +1,439 @@ +/* + * IPv6 BSD socket options interface + * Linux INET6 implementation + * + * Authors: + * Pedro Roque <roque@di.fc.ul.pt> + * + * Based on linux/net/ipv4/ip_sockglue.c + * + * $Id: ipv6_sockglue.c,v 1.1 2007/10/08 21:12:30 stesie Exp $ + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * FIXME: Make the setsockopt code POSIX compliant: That is + * + * o Return -EINVAL for setsockopt of short lengths + * o Truncate getsockopt returns + * o Return an optlen of the truncated length if need be + */ + +#include <linux/config.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/sockios.h> +#include <linux/sched.h> +#include <linux/net.h> +#include <linux/in6.h> +#include <linux/netdevice.h> +#include <linux/if_arp.h> +#include <linux/init.h> +#include <linux/sysctl.h> + +#include <net/sock.h> +#include <net/snmp.h> +#include <net/ipv6.h> +#include <net/ndisc.h> +#include <net/protocol.h> +#include <net/transp_v6.h> +#include <net/ip6_route.h> +#include <net/addrconf.h> +#include <net/inet_common.h> +#include <net/tcp.h> +#include <net/udp.h> + +#include <asm/uaccess.h> + +struct ipv6_mib ipv6_statistics={0, }; +struct packet_type ipv6_packet_type = +{ + __constant_htons(ETH_P_IPV6), + NULL, /* All devices */ + ipv6_rcv, + NULL, + NULL +}; + +/* + * addrconf module should be notifyed of a device going up + */ +static struct notifier_block ipv6_dev_notf = { + addrconf_notify, + NULL, + 0 +}; + +struct ip6_ra_chain *ip6_ra_chain; + +int ip6_ra_control(struct sock *sk, int sel, void (*destructor)(struct sock *)) +{ + struct ip6_ra_chain *ra, *new_ra, **rap; + + /* RA packet may be delivered ONLY to IPPROTO_RAW socket */ + if (sk->type != SOCK_RAW || sk->num != IPPROTO_RAW) + return -EINVAL; + + new_ra = (sel>=0) ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; + + for (rap = &ip6_ra_chain; (ra=*rap) != NULL; rap = &ra->next) { + if (ra->sk == sk) { + if (sel>=0) { + if (new_ra) + kfree(new_ra); + return -EADDRINUSE; + } + + *rap = ra->next; + synchronize_bh(); + + if (ra->destructor) + ra->destructor(sk); + kfree(ra); + return 0; + } + } + if (new_ra == NULL) + return -ENOBUFS; + new_ra->sk = sk; + new_ra->sel = sel; + new_ra->destructor = destructor; + start_bh_atomic(); + new_ra->next = ra; + *rap = new_ra; + end_bh_atomic(); + return 0; +} + + +int ipv6_setsockopt(struct sock *sk, int level, int optname, char *optval, + int optlen) +{ + struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6; + int val, valbool; + int retv = -ENOPROTOOPT; + + if(level==SOL_IP && sk->type != SOCK_RAW) + return udp_prot.setsockopt(sk, level, optname, optval, optlen); + + if(level!=SOL_IPV6) + goto out; + + if (optval == NULL) + val=0; + else if (get_user(val, (int *) optval)) + return -EFAULT; + + valbool = (val!=0); + + switch (optname) { + + case IPV6_ADDRFORM: + if (val == PF_INET) { + struct ipv6_txoptions *opt; + struct sk_buff *pktopt; + + if (sk->protocol != IPPROTO_UDP && + sk->protocol != IPPROTO_TCP) + goto out; + + lock_sock(sk); + if (sk->state != TCP_ESTABLISHED) { + retv = ENOTCONN; + goto addrform_done; + } + + if (!(ipv6_addr_type(&np->daddr) & IPV6_ADDR_MAPPED)) { + retv = -EADDRNOTAVAIL; + goto addrform_done; + } + + fl6_free_socklist(sk); + ipv6_sock_mc_close(sk); + + if (sk->protocol == IPPROTO_TCP) { + struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); + + sk->prot = &tcp_prot; + tp->af_specific = &ipv4_specific; + sk->socket->ops = &inet_stream_ops; + sk->family = PF_INET; + tcp_sync_mss(sk, tp->pmtu_cookie); + } else { + sk->prot = &udp_prot; + sk->socket->ops = &inet_dgram_ops; + } + opt = xchg(&np->opt, NULL); + if (opt) + sock_kfree_s(sk, opt, opt->tot_len); + pktopt = xchg(&np->pktoptions, NULL); + if (pktopt) + kfree_skb(pktopt); + retv = 0; + +addrform_done: + release_sock(sk); + } else { + retv = -EINVAL; + } + break; + + case IPV6_PKTINFO: + np->rxopt.bits.rxinfo = valbool; + retv = 0; + break; + + case IPV6_HOPLIMIT: + np->rxopt.bits.rxhlim = valbool; + retv = 0; + break; + + case IPV6_RTHDR: + retv = -EINVAL; + if (val >= 0 && val <= 2) { + np->rxopt.bits.srcrt = val; + retv = 0; + } + break; + + case IPV6_HOPOPTS: + np->rxopt.bits.hopopts = valbool; + retv = 0; + break; + + case IPV6_AUTHHDR: + np->rxopt.bits.authhdr = valbool; + retv = 0; + break; + + case IPV6_DSTOPTS: + np->rxopt.bits.dstopts = valbool; + retv = 0; + break; + + case IPV6_FLOWINFO: + np->rxopt.bits.rxflow = valbool; + return 0; + + case IPV6_PKTOPTIONS: + { + struct ipv6_txoptions *opt = NULL; + struct msghdr msg; + struct flowi fl; + int junk; + + fl.fl6_flowlabel = 0; + fl.oif = sk->bound_dev_if; + + if (optlen == 0) + goto update; + + opt = sock_kmalloc(sk, sizeof(*opt) + optlen, GFP_KERNEL); + retv = -ENOBUFS; + if (opt == NULL) + break; + + memset(opt, 0, sizeof(*opt)); + opt->tot_len = sizeof(*opt) + optlen; + retv = -EFAULT; + if (copy_from_user(opt+1, optval, optlen)) + goto done; + + msg.msg_controllen = optlen; + msg.msg_control = (void*)(opt+1); + + retv = datagram_send_ctl(&msg, &fl, opt, &junk); + if (retv) + goto done; +update: + retv = 0; + start_bh_atomic(); + if (opt && sk->type == SOCK_STREAM) { + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; + if ((tcp_connected(sk->state) || sk->state == TCP_SYN_SENT) + && sk->daddr != LOOPBACK4_IPV6) { + tp->ext_header_len = opt->opt_flen + opt->opt_nflen; + tcp_sync_mss(sk, tp->pmtu_cookie); + } + } + opt = xchg(&np->opt, opt); + dst_release(xchg(&sk->dst_cache, NULL)); + end_bh_atomic(); + +done: + if (opt) + sock_kfree_s(sk, opt, opt->tot_len); + break; + } + case IPV6_UNICAST_HOPS: + if (val > 255 || val < -1) + retv = -EINVAL; + else { + np->hop_limit = val; + retv = 0; + } + break; + + case IPV6_MULTICAST_HOPS: + if (val > 255 || val < -1) + retv = -EINVAL; + else { + np->mcast_hops = val; + retv = 0; + } + break; + + case IPV6_MULTICAST_LOOP: + np->mc_loop = valbool; + retv = 0; + break; + + case IPV6_MULTICAST_IF: + if (sk->bound_dev_if && sk->bound_dev_if != val) { + retv = -EINVAL; + break; + } + if (dev_get_by_index(val) == NULL) { + retv = -ENODEV; + break; + } + np->mcast_oif = val; + retv = 0; + break; + case IPV6_ADD_MEMBERSHIP: + case IPV6_DROP_MEMBERSHIP: + { + struct ipv6_mreq mreq; + + if (copy_from_user(&mreq, optval, sizeof(struct ipv6_mreq))) + return -EFAULT; + + if (optname == IPV6_ADD_MEMBERSHIP) + retv = ipv6_sock_mc_join(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_multiaddr); + else + retv = ipv6_sock_mc_drop(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_multiaddr); + break; + } + case IPV6_ROUTER_ALERT: + retv = ip6_ra_control(sk, val, NULL); + break; + case IPV6_MTU_DISCOVER: + if (val<0 || val>2) + return -EINVAL; + np->pmtudisc = val; + return 0; + case IPV6_MTU: + if (val && val < IPV6_MIN_MTU) + return -EINVAL; + np->frag_size = val; + return 0; + case IPV6_RECVERR: + np->recverr = valbool; + if (!val) + skb_queue_purge(&sk->error_queue); + return 0; + case IPV6_FLOWINFO_SEND: + np->sndflow = valbool; + return 0; + case IPV6_FLOWLABEL_MGR: + return ipv6_flowlabel_opt(sk, optval, optlen); + }; + +out: + return retv; +} + +int ipv6_getsockopt(struct sock *sk, int level, int optname, char *optval, + int *optlen) +{ + struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6; + int len; + int val; + + if(level==SOL_IP && sk->type != SOCK_RAW) + return udp_prot.getsockopt(sk, level, optname, optval, optlen); + if(level!=SOL_IPV6) + return -ENOPROTOOPT; + if (get_user(len, optlen)) + return -EFAULT; + switch (optname) { + case IPV6_PKTOPTIONS: + { + struct msghdr msg; + struct sk_buff *skb; + + start_bh_atomic(); + skb = np->pktoptions; + if (skb) + atomic_inc(&skb->users); + end_bh_atomic(); + + if (skb) { + int err; + + msg.msg_control = optval; + msg.msg_controllen = len; + msg.msg_flags = 0; + err = datagram_recv_ctl(sk, &msg, skb); + kfree_skb(skb); + if (err) + return err; + len -= msg.msg_controllen; + } else + len = 0; + return put_user(len, optlen); + } + case IP_MTU: + val = 0; + lock_sock(sk); + if (sk->dst_cache) + val = sk->dst_cache->pmtu; + release_sock(sk); + if (!val) + return -ENOTCONN; + break; + default: + return -EINVAL; + } + len=min(sizeof(int),len); + if(put_user(len, optlen)) + return -EFAULT; + if(copy_to_user(optval,&val,len)) + return -EFAULT; + return 0; +} + +#if defined(MODULE) && defined(CONFIG_SYSCTL) + +/* + * sysctl registration functions defined in sysctl_net_ipv6.c + */ + +extern void ipv6_sysctl_register(void); +extern void ipv6_sysctl_unregister(void); +#endif + +__initfunc(void ipv6_packet_init(void)) +{ + dev_add_pack(&ipv6_packet_type); +} + +__initfunc(void ipv6_netdev_notif_init(void)) +{ + register_netdevice_notifier(&ipv6_dev_notf); +} + +#ifdef MODULE +void ipv6_packet_cleanup(void) +{ + dev_remove_pack(&ipv6_packet_type); +} + +void ipv6_netdev_notif_cleanup(void) +{ + unregister_netdevice_notifier(&ipv6_dev_notf); +} +#endif diff --git a/pfinet/linux-src/net/ipv6/mcast.c b/pfinet/linux-src/net/ipv6/mcast.c new file mode 100644 index 00000000..27d1d316 --- /dev/null +++ b/pfinet/linux-src/net/ipv6/mcast.c @@ -0,0 +1,709 @@ +/* + * Multicast support for IPv6 + * Linux INET6 implementation + * + * Authors: + * Pedro Roque <roque@di.fc.ul.pt> + * + * $Id: mcast.c,v 1.1 2007/10/08 21:12:30 stesie Exp $ + * + * Based on linux/ipv4/igmp.c and linux/ipv4/ip_sockglue.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define __NO_VERSION__ +#include <linux/config.h> +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/sockios.h> +#include <linux/sched.h> +#include <linux/net.h> +#include <linux/in6.h> +#include <linux/netdevice.h> +#include <linux/if_arp.h> +#include <linux/route.h> +#include <linux/init.h> +#include <linux/proc_fs.h> + +#include <net/sock.h> +#include <net/snmp.h> + +#include <net/ipv6.h> +#include <net/protocol.h> +#include <net/if_inet6.h> +#include <net/ndisc.h> +#include <net/addrconf.h> +#include <net/ip6_route.h> + +#include <net/checksum.h> + +/* Set to 3 to get tracing... */ +#define MCAST_DEBUG 2 + +#if MCAST_DEBUG >= 3 +#define MDBG(x) printk x +#else +#define MDBG(x) +#endif + +static struct socket *igmp6_socket; + +static void igmp6_join_group(struct ifmcaddr6 *ma); +static void igmp6_leave_group(struct ifmcaddr6 *ma); +void igmp6_timer_handler(unsigned long data); + +#define IGMP6_UNSOLICITED_IVAL (10*HZ) + +/* + * Hash list of configured multicast addresses + */ +static struct ifmcaddr6 *inet6_mcast_lst[IN6_ADDR_HSIZE]; + +/* + * socket join on multicast group + */ + +int ipv6_sock_mc_join(struct sock *sk, int ifindex, struct in6_addr *addr) +{ + struct device *dev = NULL; + struct ipv6_mc_socklist *mc_lst; + struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6; + int err; + + if (!(ipv6_addr_type(addr) & IPV6_ADDR_MULTICAST)) + return -EINVAL; + + mc_lst = sock_kmalloc(sk, sizeof(struct ipv6_mc_socklist), GFP_KERNEL); + + if (mc_lst == NULL) + return -ENOMEM; + + mc_lst->next = NULL; + memcpy(&mc_lst->addr, addr, sizeof(struct in6_addr)); + mc_lst->ifindex = ifindex; + + if (ifindex == 0) { + struct rt6_info *rt; + rt = rt6_lookup(addr, NULL, 0, 0); + if (rt) { + dev = rt->rt6i_dev; + dst_release(&rt->u.dst); + } + } else + dev = dev_get_by_index(ifindex); + + if (dev == NULL) { + sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); + return -ENODEV; + } + + /* + * now add/increase the group membership on the device + */ + + err = ipv6_dev_mc_inc(dev, addr); + + if (err) { + sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); + return err; + } + + mc_lst->next = np->ipv6_mc_list; + np->ipv6_mc_list = mc_lst; + + return 0; +} + +/* + * socket leave on multicast group + */ +int ipv6_sock_mc_drop(struct sock *sk, int ifindex, struct in6_addr *addr) +{ + struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6; + struct ipv6_mc_socklist *mc_lst, **lnk; + + for (lnk = &np->ipv6_mc_list; (mc_lst = *lnk) !=NULL ; lnk = &mc_lst->next) { + if (mc_lst->ifindex == ifindex && + ipv6_addr_cmp(&mc_lst->addr, addr) == 0) { + struct device *dev; + + *lnk = mc_lst->next; + synchronize_bh(); + + if ((dev = dev_get_by_index(ifindex)) != NULL) + ipv6_dev_mc_dec(dev, &mc_lst->addr); + sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); + return 0; + } + } + + return -ENOENT; +} + +void ipv6_sock_mc_close(struct sock *sk) +{ + struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6; + struct ipv6_mc_socklist *mc_lst; + + while ((mc_lst = np->ipv6_mc_list) != NULL) { + struct device *dev = dev_get_by_index(mc_lst->ifindex); + + if (dev) + ipv6_dev_mc_dec(dev, &mc_lst->addr); + + np->ipv6_mc_list = mc_lst->next; + sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); + } +} + +static int igmp6_group_added(struct ifmcaddr6 *mc) +{ + char buf[MAX_ADDR_LEN]; + + if (!(mc->mca_flags&MAF_LOADED)) { + mc->mca_flags |= MAF_LOADED; + if (ndisc_mc_map(&mc->mca_addr, buf, mc->dev, 0) == 0) + dev_mc_add(mc->dev, buf, mc->dev->addr_len, 0); + } + + if (mc->dev->flags&IFF_UP) + igmp6_join_group(mc); + return 0; +} + +static int igmp6_group_dropped(struct ifmcaddr6 *mc) +{ + char buf[MAX_ADDR_LEN]; + + if (mc->mca_flags&MAF_LOADED) { + mc->mca_flags &= ~MAF_LOADED; + if (ndisc_mc_map(&mc->mca_addr, buf, mc->dev, 0) == 0) + dev_mc_delete(mc->dev, buf, mc->dev->addr_len, 0); + } + + if (mc->dev->flags&IFF_UP) + igmp6_leave_group(mc); + return 0; +} + + +/* + * device multicast group inc (add if not found) + */ +int ipv6_dev_mc_inc(struct device *dev, struct in6_addr *addr) +{ + struct ifmcaddr6 *mc; + struct inet6_dev *idev; + int hash; + + idev = ipv6_get_idev(dev); + + if (idev == NULL) + return -EINVAL; + + hash = ipv6_addr_hash(addr); + + for (mc = inet6_mcast_lst[hash]; mc; mc = mc->next) { + if (ipv6_addr_cmp(&mc->mca_addr, addr) == 0 && mc->dev == dev) { + atomic_inc(&mc->mca_users); + return 0; + } + } + + /* + * not found: create a new one. + */ + + mc = kmalloc(sizeof(struct ifmcaddr6), GFP_ATOMIC); + + if (mc == NULL) + return -ENOMEM; + + memset(mc, 0, sizeof(struct ifmcaddr6)); + mc->mca_timer.function = igmp6_timer_handler; + mc->mca_timer.data = (unsigned long) mc; + + memcpy(&mc->mca_addr, addr, sizeof(struct in6_addr)); + mc->dev = dev; + atomic_set(&mc->mca_users, 1); + + mc->next = inet6_mcast_lst[hash]; + inet6_mcast_lst[hash] = mc; + + mc->if_next = idev->mc_list; + idev->mc_list = mc; + + igmp6_group_added(mc); + + return 0; +} + +static void ipv6_mca_remove(struct device *dev, struct ifmcaddr6 *ma) +{ + struct inet6_dev *idev; + + idev = ipv6_get_idev(dev); + + if (idev) { + struct ifmcaddr6 *iter, **lnk; + + for (lnk = &idev->mc_list; (iter = *lnk) != NULL; lnk = &iter->if_next) { + if (iter == ma) { + *lnk = iter->if_next; + synchronize_bh(); + return; + } + } + } +} + +/* + * device multicast group del + */ +int ipv6_dev_mc_dec(struct device *dev, struct in6_addr *addr) +{ + struct ifmcaddr6 *ma, **lnk; + int hash; + + hash = ipv6_addr_hash(addr); + + for (lnk = &inet6_mcast_lst[hash]; (ma=*lnk) != NULL; lnk = &ma->next) { + if (ipv6_addr_cmp(&ma->mca_addr, addr) == 0 && ma->dev == dev) { + if (atomic_dec_and_test(&ma->mca_users)) { + igmp6_group_dropped(ma); + + *lnk = ma->next; + synchronize_bh(); + + ipv6_mca_remove(dev, ma); + kfree(ma); + } + return 0; + } + } + + return -ENOENT; +} + +/* + * check if the interface/address pair is valid + */ +int ipv6_chk_mcast_addr(struct device *dev, struct in6_addr *addr) +{ + struct ifmcaddr6 *mc; + int hash; + + hash = ipv6_addr_hash(addr); + + for (mc = inet6_mcast_lst[hash]; mc; mc=mc->next) { + if (mc->dev == dev && ipv6_addr_cmp(&mc->mca_addr, addr) == 0) + return 1; + } + + return 0; +} + +/* + * IGMP handling (alias multicast ICMPv6 messages) + */ + +static void igmp6_group_queried(struct ifmcaddr6 *ma, unsigned long resptime) +{ + unsigned long delay = resptime; + + /* Do not start timer for addresses with link/host scope */ + if (ipv6_addr_type(&ma->mca_addr)&(IPV6_ADDR_LINKLOCAL|IPV6_ADDR_LOOPBACK)) + return; + + if (del_timer(&ma->mca_timer)) + delay = ma->mca_timer.expires - jiffies; + + if (delay >= resptime) { + if (resptime) + delay = net_random() % resptime; + else + delay = 1; + } + + ma->mca_flags |= MAF_TIMER_RUNNING; + ma->mca_timer.expires = jiffies + delay; + add_timer(&ma->mca_timer); +} + +int igmp6_event_query(struct sk_buff *skb, struct icmp6hdr *hdr, int len) +{ + struct ifmcaddr6 *ma; + struct in6_addr *addrp; + unsigned long resptime; + + if (len < sizeof(struct icmp6hdr) + sizeof(struct in6_addr)) + return -EINVAL; + + /* Drop queries with not link local source */ + if (!(ipv6_addr_type(&skb->nh.ipv6h->saddr)&IPV6_ADDR_LINKLOCAL)) + return -EINVAL; + + resptime = ntohs(hdr->icmp6_maxdelay); + /* Translate milliseconds to jiffies */ + resptime = (resptime<<10)/(1024000/HZ); + + addrp = (struct in6_addr *) (hdr + 1); + + if (ipv6_addr_any(addrp)) { + struct inet6_dev *idev; + + idev = ipv6_get_idev(skb->dev); + + if (idev == NULL) + return 0; + + for (ma = idev->mc_list; ma; ma=ma->if_next) + igmp6_group_queried(ma, resptime); + } else { + int hash = ipv6_addr_hash(addrp); + + for (ma = inet6_mcast_lst[hash]; ma; ma=ma->next) { + if (ma->dev == skb->dev && + ipv6_addr_cmp(addrp, &ma->mca_addr) == 0) { + igmp6_group_queried(ma, resptime); + break; + } + } + } + + return 0; +} + + +int igmp6_event_report(struct sk_buff *skb, struct icmp6hdr *hdr, int len) +{ + struct ifmcaddr6 *ma; + struct in6_addr *addrp; + struct device *dev; + int hash; + + /* Our own report looped back. Ignore it. */ + if (skb->pkt_type == PACKET_LOOPBACK) + return 0; + + if (len < sizeof(struct icmp6hdr) + sizeof(struct in6_addr)) + return -EINVAL; + + /* Drop reports with not link local source */ + if (!(ipv6_addr_type(&skb->nh.ipv6h->saddr)&IPV6_ADDR_LINKLOCAL)) + return -EINVAL; + + addrp = (struct in6_addr *) (hdr + 1); + + dev = skb->dev; + + /* + * Cancel the timer for this group + */ + + hash = ipv6_addr_hash(addrp); + + for (ma = inet6_mcast_lst[hash]; ma; ma=ma->next) { + if ((ma->dev == dev) && ipv6_addr_cmp(&ma->mca_addr, addrp) == 0) { + if (ma->mca_flags & MAF_TIMER_RUNNING) { + del_timer(&ma->mca_timer); + ma->mca_flags &= ~MAF_TIMER_RUNNING; + } + + ma->mca_flags &= ~MAF_LAST_REPORTER; + break; + } + } + + return 0; +} + +void igmp6_send(struct in6_addr *addr, struct device *dev, int type) +{ + struct sock *sk = igmp6_socket->sk; + struct sk_buff *skb; + struct icmp6hdr *hdr; + struct inet6_ifaddr *ifp; + struct in6_addr *snd_addr; + struct in6_addr *addrp; + struct in6_addr all_routers; + int err, len, payload_len, full_len; + u8 ra[8] = { IPPROTO_ICMPV6, 0, + IPV6_TLV_ROUTERALERT, 0, 0, 0, + IPV6_TLV_PADN, 0 }; + + snd_addr = addr; + if (type == ICMPV6_MGM_REDUCTION) { + snd_addr = &all_routers; + ipv6_addr_all_routers(&all_routers); + } + + len = sizeof(struct icmp6hdr) + sizeof(struct in6_addr); + payload_len = len + sizeof(ra); + full_len = sizeof(struct ipv6hdr) + payload_len; + + skb = sock_alloc_send_skb(sk, dev->hard_header_len + full_len + 15, 0, 0, &err); + + if (skb == NULL) + return; + + skb_reserve(skb, (dev->hard_header_len + 15) & ~15); + if (dev->hard_header) { + unsigned char ha[MAX_ADDR_LEN]; + ndisc_mc_map(snd_addr, ha, dev, 1); + dev->hard_header(skb, dev, ETH_P_IPV6, ha, NULL, full_len); + } + + ifp = ipv6_get_lladdr(dev); + + if (ifp == NULL) { +#if MCAST_DEBUG >= 1 + printk(KERN_DEBUG "igmp6: %s no linklocal address\n", + dev->name); +#endif + return; + } + + ip6_nd_hdr(sk, skb, dev, &ifp->addr, snd_addr, NEXTHDR_HOP, payload_len); + + memcpy(skb_put(skb, sizeof(ra)), ra, sizeof(ra)); + + hdr = (struct icmp6hdr *) skb_put(skb, sizeof(struct icmp6hdr)); + memset(hdr, 0, sizeof(struct icmp6hdr)); + hdr->icmp6_type = type; + + addrp = (struct in6_addr *) skb_put(skb, sizeof(struct in6_addr)); + ipv6_addr_copy(addrp, addr); + + hdr->icmp6_cksum = csum_ipv6_magic(&ifp->addr, snd_addr, len, + IPPROTO_ICMPV6, + csum_partial((__u8 *) hdr, len, 0)); + + dev_queue_xmit(skb); + if (type == ICMPV6_MGM_REDUCTION) + icmpv6_statistics.Icmp6OutGroupMembReductions++; + else + icmpv6_statistics.Icmp6OutGroupMembResponses++; + icmpv6_statistics.Icmp6OutMsgs++; +} + +static void igmp6_join_group(struct ifmcaddr6 *ma) +{ + unsigned long delay; + int addr_type; + + addr_type = ipv6_addr_type(&ma->mca_addr); + + if ((addr_type & (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_LOOPBACK))) + return; + + start_bh_atomic(); + igmp6_send(&ma->mca_addr, ma->dev, ICMPV6_MGM_REPORT); + + delay = net_random() % IGMP6_UNSOLICITED_IVAL; + if (del_timer(&ma->mca_timer)) + delay = ma->mca_timer.expires - jiffies; + + ma->mca_timer.expires = jiffies + delay; + + add_timer(&ma->mca_timer); + ma->mca_flags |= MAF_TIMER_RUNNING | MAF_LAST_REPORTER; + end_bh_atomic(); +} + +static void igmp6_leave_group(struct ifmcaddr6 *ma) +{ + int addr_type; + + addr_type = ipv6_addr_type(&ma->mca_addr); + + if ((addr_type & IPV6_ADDR_LINKLOCAL)) + return; + + start_bh_atomic(); + if (ma->mca_flags & MAF_LAST_REPORTER) + igmp6_send(&ma->mca_addr, ma->dev, ICMPV6_MGM_REDUCTION); + + if (ma->mca_flags & MAF_TIMER_RUNNING) + del_timer(&ma->mca_timer); + end_bh_atomic(); +} + +void igmp6_timer_handler(unsigned long data) +{ + struct ifmcaddr6 *ma = (struct ifmcaddr6 *) data; + + ma->mca_flags |= MAF_LAST_REPORTER; + igmp6_send(&ma->mca_addr, ma->dev, ICMPV6_MGM_REPORT); + ma->mca_flags &= ~MAF_TIMER_RUNNING; +} + +/* Device going down */ + +void ipv6_mc_down(struct inet6_dev *idev) +{ + struct ifmcaddr6 *i; + struct in6_addr maddr; + + /* Withdraw multicast list */ + + for (i = idev->mc_list; i; i=i->if_next) + igmp6_group_dropped(i); + + /* Delete all-nodes address. */ + + ipv6_addr_all_nodes(&maddr); + ipv6_dev_mc_dec(idev->dev, &maddr); +} + +/* Device going up */ + +void ipv6_mc_up(struct inet6_dev *idev) +{ + struct ifmcaddr6 *i; + struct in6_addr maddr; + + /* Add all-nodes address. */ + + ipv6_addr_all_nodes(&maddr); + ipv6_dev_mc_inc(idev->dev, &maddr); + + /* Install multicast list, except for all-nodes (already installed) */ + + for (i = idev->mc_list; i; i=i->if_next) + igmp6_group_added(i); +} + +/* + * Device is about to be destroyed: clean up. + */ + +void ipv6_mc_destroy_dev(struct inet6_dev *idev) +{ + int hash; + struct ifmcaddr6 *i, **lnk; + + while ((i = idev->mc_list) != NULL) { + idev->mc_list = i->if_next; + + hash = ipv6_addr_hash(&i->mca_addr); + + for (lnk = &inet6_mcast_lst[hash]; *lnk; lnk = &(*lnk)->next) { + if (*lnk == i) { + *lnk = i->next; + synchronize_bh(); + break; + } + } + igmp6_group_dropped(i); + kfree(i); + } +} + +#ifdef CONFIG_PROC_FS +static int igmp6_read_proc(char *buffer, char **start, off_t offset, + int length, int *eof, void *data) +{ + off_t pos=0, begin=0; + struct ifmcaddr6 *im; + int len=0; + struct device *dev; + + for (dev = dev_base; dev; dev = dev->next) { + struct inet6_dev *idev; + + if ((idev = ipv6_get_idev(dev)) == NULL) + continue; + + for (im = idev->mc_list; im; im = im->if_next) { + int i; + + len += sprintf(buffer+len,"%-4d %-15s ", dev->ifindex, dev->name); + + for (i=0; i<16; i++) + len += sprintf(buffer+len, "%02x", im->mca_addr.s6_addr[i]); + + len+=sprintf(buffer+len, + " %5d %08X %ld\n", + atomic_read(&im->mca_users), + im->mca_flags, + (im->mca_flags&MAF_TIMER_RUNNING) ? im->mca_timer.expires-jiffies : 0); + + pos=begin+len; + if (pos < offset) { + len=0; + begin=pos; + } + if (pos > offset+length) + goto done; + } + } + *eof = 1; + +done: + *start=buffer+(offset-begin); + len-=(offset-begin); + if(len>length) + len=length; + if (len<0) + len=0; + return len; +} +#endif + +__initfunc(int igmp6_init(struct net_proto_family *ops)) +{ +#ifdef CONFIG_PROC_FS + struct proc_dir_entry *ent; +#endif + struct sock *sk; + int err; + + igmp6_socket = sock_alloc(); + if (igmp6_socket == NULL) { + printk(KERN_ERR + "Failed to create the IGMP6 control socket.\n"); + return -1; + } + igmp6_socket->inode->i_uid = 0; + igmp6_socket->inode->i_gid = 0; + igmp6_socket->type = SOCK_RAW; + + if((err = ops->create(igmp6_socket, IPPROTO_ICMPV6)) < 0) { + printk(KERN_DEBUG + "Failed to initialize the IGMP6 control socket (err %d).\n", + err); + sock_release(igmp6_socket); + igmp6_socket = NULL; /* For safety. */ + return err; + } + + sk = igmp6_socket->sk; + sk->allocation = GFP_ATOMIC; + sk->num = 256; /* Don't receive any data */ + + sk->net_pinfo.af_inet6.hop_limit = 1; +#ifdef CONFIG_PROC_FS + ent = create_proc_entry("net/igmp6", 0, 0); + ent->read_proc = igmp6_read_proc; +#endif + + return 0; +} + +#ifdef MODULE +void igmp6_cleanup(void) +{ + sock_release(igmp6_socket); + igmp6_socket = NULL; /* for safety */ +#ifdef CONFIG_PROC_FS + remove_proc_entry("net/igmp6", 0); +#endif +} +#endif diff --git a/pfinet/linux-src/net/ipv6/ndisc.c b/pfinet/linux-src/net/ipv6/ndisc.c new file mode 100644 index 00000000..bb5e0837 --- /dev/null +++ b/pfinet/linux-src/net/ipv6/ndisc.c @@ -0,0 +1,1215 @@ +/* + * Neighbour Discovery for IPv6 + * Linux INET6 implementation + * + * Authors: + * Pedro Roque <roque@di.fc.ul.pt> + * Mike Shaver <shaver@ingenia.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +/* + * Changes: + * + * Lars Fenneberg : fixed MTU setting on receipt + * of an RA. + * + * Janos Farkas : kmalloc failure checks + * Alexey Kuznetsov : state machine reworked + * and moved to net/core. + */ + +/* Set to 3 to get tracing... */ +#define ND_DEBUG 1 + +#define ND_PRINTK(x...) printk(KERN_DEBUG x) +#define ND_NOPRINTK(x...) do { ; } while(0) +#define ND_PRINTK0 ND_PRINTK +#define ND_PRINTK1 ND_NOPRINTK +#define ND_PRINTK2 ND_NOPRINTK +#if ND_DEBUG >= 1 +#undef ND_PRINTK1 +#define ND_PRINTK1 ND_PRINTK +#endif +#if ND_DEBUG >= 2 +#undef ND_PRINTK2 +#define ND_PRINTK2 ND_PRINTK +#endif + +#define __NO_VERSION__ +#include <linux/module.h> +#include <linux/config.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/sockios.h> +#include <linux/sched.h> +#include <linux/net.h> +#include <linux/in6.h> +#include <linux/route.h> +#include <linux/init.h> +#ifdef CONFIG_SYSCTL +#include <linux/sysctl.h> +#endif + +#include <linux/if_arp.h> +#include <linux/ipv6.h> +#include <linux/icmpv6.h> + +#include <net/sock.h> +#include <net/snmp.h> + +#include <net/ipv6.h> +#include <net/protocol.h> +#include <net/ndisc.h> +#include <net/ip6_route.h> +#include <net/addrconf.h> +#include <net/icmp.h> + +#include <net/checksum.h> +#include <linux/proc_fs.h> + +static struct socket *ndisc_socket; + +static int ndisc_constructor(struct neighbour *neigh); +static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb); +static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb); +static int pndisc_constructor(struct pneigh_entry *n); +static void pndisc_destructor(struct pneigh_entry *n); +static void pndisc_redo(struct sk_buff *skb); + +static struct neigh_ops ndisc_generic_ops = +{ + AF_INET6, + NULL, + ndisc_solicit, + ndisc_error_report, + neigh_resolve_output, + neigh_connected_output, + dev_queue_xmit, + dev_queue_xmit +}; + +static struct neigh_ops ndisc_hh_ops = +{ + AF_INET6, + NULL, + ndisc_solicit, + ndisc_error_report, + neigh_resolve_output, + neigh_resolve_output, + dev_queue_xmit, + dev_queue_xmit +}; + + +static struct neigh_ops ndisc_direct_ops = +{ + AF_INET6, + NULL, + NULL, + NULL, + dev_queue_xmit, + dev_queue_xmit, + dev_queue_xmit, + dev_queue_xmit +}; + +struct neigh_table nd_tbl = +{ + NULL, + AF_INET6, + sizeof(struct neighbour) + sizeof(struct in6_addr), + sizeof(struct in6_addr), + ndisc_constructor, + pndisc_constructor, + pndisc_destructor, + pndisc_redo, + { NULL, NULL, &nd_tbl, 0, NULL, NULL, + 30*HZ, 1*HZ, 60*HZ, 30*HZ, 5*HZ, 3, 3, 0, 3, 1*HZ, (8*HZ)/10, 64, 0 }, + 30*HZ, 128, 512, 1024, +}; + +#define NDISC_OPT_SPACE(len) (((len)+2+7)&~7) + +static u8 *ndisc_fill_option(u8 *opt, int type, void *data, int data_len) +{ + int space = NDISC_OPT_SPACE(data_len); + + opt[0] = type; + opt[1] = space>>3; + memcpy(opt+2, data, data_len); + data_len += 2; + opt += data_len; + if ((space -= data_len) > 0) + memset(opt, 0, space); + return opt + space; +} + +int ndisc_mc_map(struct in6_addr *addr, char *buf, struct device *dev, int dir) +{ + switch (dev->type) { + case ARPHRD_ETHER: + case ARPHRD_IEEE802: /* Not sure. Check it later. --ANK */ + case ARPHRD_FDDI: + ipv6_eth_mc_map(addr, buf); + return 0; + default: + if (dir) { + memcpy(buf, dev->broadcast, dev->addr_len); + return 0; + } + } + return -EINVAL; +} + +static int ndisc_constructor(struct neighbour *neigh) +{ + struct in6_addr *addr = (struct in6_addr*)&neigh->primary_key; + struct device *dev = neigh->dev; + struct inet6_dev *in6_dev = ipv6_get_idev(dev); + int addr_type; + + if (in6_dev == NULL) + return -EINVAL; + + addr_type = ipv6_addr_type(addr); + if (in6_dev->nd_parms) + neigh->parms = in6_dev->nd_parms; + + if (addr_type&IPV6_ADDR_MULTICAST) + neigh->type = RTN_MULTICAST; + else + neigh->type = RTN_UNICAST; + if (dev->hard_header == NULL) { + neigh->nud_state = NUD_NOARP; + neigh->ops = &ndisc_direct_ops; + neigh->output = neigh->ops->queue_xmit; + } else { + if (addr_type&IPV6_ADDR_MULTICAST) { + neigh->nud_state = NUD_NOARP; + ndisc_mc_map(addr, neigh->ha, dev, 1); + } else if (dev->flags&(IFF_NOARP|IFF_LOOPBACK)) { + neigh->nud_state = NUD_NOARP; + memcpy(neigh->ha, dev->dev_addr, dev->addr_len); + if (dev->flags&IFF_LOOPBACK) + neigh->type = RTN_LOCAL; + } else if (dev->flags&IFF_POINTOPOINT) { + neigh->nud_state = NUD_NOARP; + memcpy(neigh->ha, dev->broadcast, dev->addr_len); + } + if (dev->hard_header_cache) + neigh->ops = &ndisc_hh_ops; + else + neigh->ops = &ndisc_generic_ops; + if (neigh->nud_state&NUD_VALID) + neigh->output = neigh->ops->connected_output; + else + neigh->output = neigh->ops->output; + } + + return 0; +} + +static int pndisc_constructor(struct pneigh_entry *n) +{ + struct in6_addr *addr = (struct in6_addr*)&n->key; + struct in6_addr maddr; + struct device *dev = n->dev; + + if (dev == NULL || ipv6_get_idev(dev) == NULL) + return -EINVAL; +#ifndef CONFIG_IPV6_NO_PB + addrconf_addr_solict_mult_old(addr, &maddr); + ipv6_dev_mc_inc(dev, &maddr); +#endif +#ifdef CONFIG_IPV6_EUI64 + addrconf_addr_solict_mult_new(addr, &maddr); + ipv6_dev_mc_inc(dev, &maddr); +#endif + return 0; +} + +static void pndisc_destructor(struct pneigh_entry *n) +{ + struct in6_addr *addr = (struct in6_addr*)&n->key; + struct in6_addr maddr; + struct device *dev = n->dev; + + if (dev == NULL || ipv6_get_idev(dev) == NULL) + return; +#ifndef CONFIG_IPV6_NO_PB + addrconf_addr_solict_mult_old(addr, &maddr); + ipv6_dev_mc_dec(dev, &maddr); +#endif +#ifdef CONFIG_IPV6_EUI64 + addrconf_addr_solict_mult_new(addr, &maddr); + ipv6_dev_mc_dec(dev, &maddr); +#endif +} + + + +static int +ndisc_build_ll_hdr(struct sk_buff *skb, struct device *dev, + struct in6_addr *daddr, struct neighbour *neigh, int len) +{ + unsigned char ha[MAX_ADDR_LEN]; + unsigned char *h_dest = NULL; + + skb_reserve(skb, (dev->hard_header_len + 15) & ~15); + + if (dev->hard_header) { + if (ipv6_addr_type(daddr) & IPV6_ADDR_MULTICAST) { + ndisc_mc_map(daddr, ha, dev, 1); + h_dest = ha; + } else if (neigh) { + h_dest = neigh->ha; + } else { + neigh = neigh_lookup(&nd_tbl, daddr, dev); + if (neigh) { + if (neigh->nud_state&NUD_VALID) { + memcpy(ha, neigh->ha, dev->addr_len); + h_dest = ha; + } + neigh_release(neigh); + } + } + + if (dev->hard_header(skb, dev, ETH_P_IPV6, h_dest, NULL, len) < 0) + return 0; + } + + return 1; +} + + +/* + * Send a Neighbour Advertisement + */ + +void ndisc_send_na(struct device *dev, struct neighbour *neigh, + struct in6_addr *daddr, struct in6_addr *solicited_addr, + int router, int solicited, int override, int inc_opt) +{ + struct sock *sk = ndisc_socket->sk; + struct nd_msg *msg; + int len; + struct sk_buff *skb; + int err; + + len = sizeof(struct icmp6hdr) + sizeof(struct in6_addr); + + if (inc_opt) { + if (dev->addr_len) + len += NDISC_OPT_SPACE(dev->addr_len); + else + inc_opt = 0; + } + + skb = sock_alloc_send_skb(sk, MAX_HEADER + len + dev->hard_header_len + 15, + 0, 0, &err); + + if (skb == NULL) { + ND_PRINTK1("send_na: alloc skb failed\n"); + return; + } + + if (ndisc_build_ll_hdr(skb, dev, daddr, neigh, len) == 0) { + kfree_skb(skb); + return; + } + + ip6_nd_hdr(sk, skb, dev, solicited_addr, daddr, IPPROTO_ICMPV6, len); + + msg = (struct nd_msg *) skb_put(skb, len); + + msg->icmph.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT; + msg->icmph.icmp6_code = 0; + msg->icmph.icmp6_cksum = 0; + + msg->icmph.icmp6_unused = 0; + msg->icmph.icmp6_router = router; + msg->icmph.icmp6_solicited = solicited; + msg->icmph.icmp6_override = !!override; + + /* Set the target address. */ + ipv6_addr_copy(&msg->target, solicited_addr); + + if (inc_opt) + ndisc_fill_option((void*)&msg->opt, ND_OPT_TARGET_LL_ADDR, dev->dev_addr, dev->addr_len); + + /* checksum */ + msg->icmph.icmp6_cksum = csum_ipv6_magic(solicited_addr, daddr, len, + IPPROTO_ICMPV6, + csum_partial((__u8 *) msg, + len, 0)); + + dev_queue_xmit(skb); + + icmpv6_statistics.Icmp6OutNeighborAdvertisements++; + icmpv6_statistics.Icmp6OutMsgs++; +} + +void ndisc_send_ns(struct device *dev, struct neighbour *neigh, + struct in6_addr *solicit, + struct in6_addr *daddr, struct in6_addr *saddr) +{ + struct sock *sk = ndisc_socket->sk; + struct sk_buff *skb; + struct nd_msg *msg; + int len; + int err; + + len = sizeof(struct icmp6hdr) + sizeof(struct in6_addr); + if (dev->addr_len) + len += NDISC_OPT_SPACE(dev->addr_len); + + skb = sock_alloc_send_skb(sk, MAX_HEADER + len + dev->hard_header_len + 15, + 0, 0, &err); + if (skb == NULL) { + ND_PRINTK1("send_ns: alloc skb failed\n"); + return; + } + + if (saddr == NULL) { + struct inet6_ifaddr *ifa; + + /* use link local address */ + ifa = ipv6_get_lladdr(dev); + + if (ifa) + saddr = &ifa->addr; + } + + if (ndisc_build_ll_hdr(skb, dev, daddr, neigh, len) == 0) { + kfree_skb(skb); + return; + } + + ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len); + + msg = (struct nd_msg *)skb_put(skb, len); + msg->icmph.icmp6_type = NDISC_NEIGHBOUR_SOLICITATION; + msg->icmph.icmp6_code = 0; + msg->icmph.icmp6_cksum = 0; + msg->icmph.icmp6_unused = 0; + + /* Set the target address. */ + ipv6_addr_copy(&msg->target, solicit); + + if (dev->addr_len) + ndisc_fill_option((void*)&msg->opt, ND_OPT_SOURCE_LL_ADDR, dev->dev_addr, dev->addr_len); + + /* checksum */ + msg->icmph.icmp6_cksum = csum_ipv6_magic(&skb->nh.ipv6h->saddr, + daddr, len, + IPPROTO_ICMPV6, + csum_partial((__u8 *) msg, + len, 0)); + /* send it! */ + dev_queue_xmit(skb); + + icmpv6_statistics.Icmp6OutNeighborSolicits++; + icmpv6_statistics.Icmp6OutMsgs++; +} + +void ndisc_send_rs(struct device *dev, struct in6_addr *saddr, + struct in6_addr *daddr) +{ + struct sock *sk = ndisc_socket->sk; + struct sk_buff *skb; + struct icmp6hdr *hdr; + __u8 * opt; + int len; + int err; + + len = sizeof(struct icmp6hdr); + if (dev->addr_len) + len += NDISC_OPT_SPACE(dev->addr_len); + + skb = sock_alloc_send_skb(sk, MAX_HEADER + len + dev->hard_header_len + 15, + 0, 0, &err); + if (skb == NULL) { + ND_PRINTK1("send_ns: alloc skb failed\n"); + return; + } + + if (ndisc_build_ll_hdr(skb, dev, daddr, NULL, len) == 0) { + kfree_skb(skb); + return; + } + + ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len); + + hdr = (struct icmp6hdr *) skb_put(skb, len); + hdr->icmp6_type = NDISC_ROUTER_SOLICITATION; + hdr->icmp6_code = 0; + hdr->icmp6_cksum = 0; + hdr->icmp6_unused = 0; + + opt = (u8*) (hdr + 1); + + if (dev->addr_len) + ndisc_fill_option(opt, ND_OPT_SOURCE_LL_ADDR, dev->dev_addr, dev->addr_len); + + /* checksum */ + hdr->icmp6_cksum = csum_ipv6_magic(&skb->nh.ipv6h->saddr, daddr, len, + IPPROTO_ICMPV6, + csum_partial((__u8 *) hdr, len, 0)); + + /* send it! */ + dev_queue_xmit(skb); + + icmpv6_statistics.Icmp6OutRouterSolicits++; + icmpv6_statistics.Icmp6OutMsgs++; +} + + +static u8 * ndisc_find_option(u8 *opt, int opt_len, int len, int option) +{ + while (opt_len <= len) { + int l = opt[1]<<3; + + if (opt[0] == option && l >= opt_len) + return opt + 2; + + if (l == 0) { + if (net_ratelimit()) + printk(KERN_WARNING "ndisc: option has 0 len\n"); + return NULL; + } + + opt += l; + len -= l; + } + return NULL; +} + + +static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb) +{ + /* + * "The sender MUST return an ICMP + * destination unreachable" + */ + dst_link_failure(skb); + kfree_skb(skb); +} + +static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb) +{ + struct in6_addr *saddr = NULL; + struct in6_addr mcaddr; + struct device *dev = neigh->dev; + struct in6_addr *target = (struct in6_addr *)&neigh->primary_key; + int probes = neigh->probes; + + if (skb && ipv6_chk_addr(&skb->nh.ipv6h->saddr, dev, 0)) + saddr = &skb->nh.ipv6h->saddr; + + if ((probes -= neigh->parms->ucast_probes) < 0) { + if (!(neigh->nud_state&NUD_VALID)) + ND_PRINTK1("trying to ucast probe in NUD_INVALID\n"); + ndisc_send_ns(dev, neigh, target, target, saddr); + } else if ((probes -= neigh->parms->app_probes) < 0) { +#ifdef CONFIG_ARPD + neigh_app_ns(neigh); +#endif + } else { +#ifdef CONFIG_IPV6_EUI64 + addrconf_addr_solict_mult_new(target, &mcaddr); + ndisc_send_ns(dev, NULL, target, &mcaddr, saddr); +#endif +#ifndef CONFIG_IPV6_NO_PB + addrconf_addr_solict_mult_old(target, &mcaddr); + ndisc_send_ns(dev, NULL, target, &mcaddr, saddr); +#endif + } +} + + +static void ndisc_update(struct neighbour *neigh, u8* opt, int len, int type) +{ + opt = ndisc_find_option(opt, neigh->dev->addr_len+2, len, type); + neigh_update(neigh, opt, NUD_STALE, 1, 1); +} + +static void ndisc_router_discovery(struct sk_buff *skb) +{ + struct ra_msg *ra_msg = (struct ra_msg *) skb->h.raw; + struct neighbour *neigh; + struct inet6_dev *in6_dev; + struct rt6_info *rt; + int lifetime; + int optlen; + + __u8 * opt = (__u8 *)(ra_msg + 1); + + optlen = (skb->tail - skb->h.raw) - sizeof(struct ra_msg); + + if (skb->nh.ipv6h->hop_limit != 255) { + printk(KERN_INFO + "NDISC: fake router advertisment received\n"); + return; + } + + /* + * set the RA_RECV flag in the interface + */ + + in6_dev = ipv6_get_idev(skb->dev); + if (in6_dev == NULL) { + ND_PRINTK1("RA: can't find in6 device\n"); + return; + } + if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_ra) + return; + + if (in6_dev->if_flags & IF_RS_SENT) { + /* + * flag that an RA was received after an RS was sent + * out on this interface. + */ + in6_dev->if_flags |= IF_RA_RCVD; + } + + lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime); + + rt = rt6_get_dflt_router(&skb->nh.ipv6h->saddr, skb->dev); + + if (rt && lifetime == 0) { + ip6_del_rt(rt); + dst_release(&rt->u.dst); + rt = NULL; + } + + if (rt == NULL && lifetime) { + ND_PRINTK2("ndisc_rdisc: adding default router\n"); + + rt = rt6_add_dflt_router(&skb->nh.ipv6h->saddr, skb->dev); + if (rt == NULL) { + ND_PRINTK1("route_add failed\n"); + return; + } + + neigh = rt->rt6i_nexthop; + if (neigh == NULL) { + ND_PRINTK1("nd: add default router: null neighbour\n"); + dst_release(&rt->u.dst); + return; + } + neigh->flags |= NTF_ROUTER; + + /* + * If we where using an "all destinations on link" route + * delete it + */ + + rt6_purge_dflt_routers(RTF_ALLONLINK); + } + + if (rt) + rt->rt6i_expires = jiffies + (HZ * lifetime); + + if (ra_msg->icmph.icmp6_hop_limit) + in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit; + + /* + * Update Reachable Time and Retrans Timer + */ + + if (in6_dev->nd_parms) { + if (ra_msg->retrans_timer) + in6_dev->nd_parms->retrans_time = (ntohl(ra_msg->retrans_timer)*HZ)/1000; + + if (ra_msg->reachable_time) { + __u32 rtime = (ntohl(ra_msg->reachable_time)*HZ)/1000; + + if (rtime != in6_dev->nd_parms->base_reachable_time) { + in6_dev->nd_parms->base_reachable_time = rtime; + in6_dev->nd_parms->gc_staletime = 3 * rtime; + in6_dev->nd_parms->reachable_time = neigh_rand_reach_time(rtime); + } + } + } + + /* + * Process options. + */ + + while (optlen > 0) { + int len = (opt[1] << 3); + + if (len == 0) { + ND_PRINTK0("RA: opt has 0 len\n"); + break; + } + + switch(*opt) { + case ND_OPT_SOURCE_LL_ADDR: + + if (rt == NULL) + break; + + if ((neigh = rt->rt6i_nexthop) != NULL && + skb->dev->addr_len + 2 >= len) + neigh_update(neigh, opt+2, NUD_STALE, 1, 1); + break; + + case ND_OPT_PREFIX_INFO: + addrconf_prefix_rcv(skb->dev, opt, len); + break; + + case ND_OPT_MTU: + { + int mtu; + + mtu = htonl(*(__u32 *)(opt+4)); + + if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) { + ND_PRINTK0("NDISC: router " + "announcement with mtu = %d\n", + mtu); + break; + } + + if (in6_dev->cnf.mtu6 != mtu) { + in6_dev->cnf.mtu6 = mtu; + + if (rt) + rt->u.dst.pmtu = mtu; + + rt6_mtu_change(skb->dev, mtu); + } + } + break; + + case ND_OPT_TARGET_LL_ADDR: + case ND_OPT_REDIRECT_HDR: + ND_PRINTK0("got illegal option with RA"); + break; + default: + ND_PRINTK0("unkown option in RA\n"); + }; + optlen -= len; + opt += len; + } + if (rt) + dst_release(&rt->u.dst); +} + +static void ndisc_redirect_rcv(struct sk_buff *skb) +{ + struct inet6_dev *in6_dev; + struct icmp6hdr *icmph; + struct in6_addr *dest; + struct in6_addr *target; /* new first hop to destination */ + struct neighbour *neigh; + int on_link = 0; + int optlen; + + if (skb->nh.ipv6h->hop_limit != 255) { + printk(KERN_WARNING "NDISC: fake ICMP redirect received\n"); + return; + } + + if (!(ipv6_addr_type(&skb->nh.ipv6h->saddr) & IPV6_ADDR_LINKLOCAL)) { + printk(KERN_WARNING "ICMP redirect: source address is not linklocal\n"); + return; + } + + optlen = skb->tail - skb->h.raw; + optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr); + + if (optlen < 0) { + printk(KERN_WARNING "ICMP redirect: packet too small\n"); + return; + } + + icmph = (struct icmp6hdr *) skb->h.raw; + target = (struct in6_addr *) (icmph + 1); + dest = target + 1; + + if (ipv6_addr_type(dest) & IPV6_ADDR_MULTICAST) { + printk(KERN_WARNING "ICMP redirect for multicast addr\n"); + return; + } + + if (ipv6_addr_cmp(dest, target) == 0) { + on_link = 1; + } else if (!(ipv6_addr_type(target) & IPV6_ADDR_LINKLOCAL)) { + printk(KERN_WARNING "ICMP redirect: target address is not linklocal\n"); + return; + } + + in6_dev = ipv6_get_idev(skb->dev); + if (!in6_dev || in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects) + return; + + /* passed validation tests */ + + /* + We install redirect only if nexthop state is valid. + */ + + neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1); + if (neigh) { + ndisc_update(neigh, (u8*)(dest + 1), optlen, ND_OPT_TARGET_LL_ADDR); + if (neigh->nud_state&NUD_VALID) + rt6_redirect(dest, &skb->nh.ipv6h->saddr, neigh, on_link); + else + __neigh_event_send(neigh, NULL); + neigh_release(neigh); + } +} + +void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, + struct in6_addr *target) +{ + struct sock *sk = ndisc_socket->sk; + int len = sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr); + struct sk_buff *buff; + struct inet6_ifaddr *ifp; + struct icmp6hdr *icmph; + struct in6_addr *addrp; + struct device *dev; + struct rt6_info *rt; + u8 *opt; + int rd_len; + int err; + int hlen; + + dev = skb->dev; + rt = rt6_lookup(&skb->nh.ipv6h->saddr, NULL, dev->ifindex, 1); + + if (rt == NULL) + return; + + if (rt->rt6i_flags & RTF_GATEWAY) { + ND_PRINTK1("ndisc_send_redirect: not a neighbour\n"); + dst_release(&rt->u.dst); + return; + } + if (!xrlim_allow(&rt->u.dst, 1*HZ)) { + dst_release(&rt->u.dst); + return; + } + dst_release(&rt->u.dst); + + if (dev->addr_len) { + if (neigh->nud_state&NUD_VALID) { + len += NDISC_OPT_SPACE(dev->addr_len); + } else { + /* If nexthop is not valid, do not redirect! + We will make it later, when will be sure, + that it is alive. + */ + return; + } + } + + rd_len = min(IPV6_MIN_MTU-sizeof(struct ipv6hdr)-len, skb->len + 8); + rd_len &= ~0x7; + len += rd_len; + + ifp = ipv6_get_lladdr(dev); + + if (ifp == NULL) { + ND_PRINTK1("redirect: no link_local addr for dev\n"); + return; + } + + buff = sock_alloc_send_skb(sk, MAX_HEADER + len + dev->hard_header_len + 15, + 0, 0, &err); + if (buff == NULL) { + ND_PRINTK1("ndisc_send_redirect: alloc_skb failed\n"); + return; + } + + hlen = 0; + + if (ndisc_build_ll_hdr(buff, dev, &skb->nh.ipv6h->saddr, NULL, len) == 0) { + kfree_skb(buff); + return; + } + + ip6_nd_hdr(sk, buff, dev, &ifp->addr, &skb->nh.ipv6h->saddr, + IPPROTO_ICMPV6, len); + + icmph = (struct icmp6hdr *) skb_put(buff, len); + + memset(icmph, 0, sizeof(struct icmp6hdr)); + icmph->icmp6_type = NDISC_REDIRECT; + + /* + * copy target and destination addresses + */ + + addrp = (struct in6_addr *)(icmph + 1); + ipv6_addr_copy(addrp, target); + addrp++; + ipv6_addr_copy(addrp, &skb->nh.ipv6h->daddr); + + opt = (u8*) (addrp + 1); + + /* + * include target_address option + */ + + if (dev->addr_len) + opt = ndisc_fill_option(opt, ND_OPT_TARGET_LL_ADDR, neigh->ha, dev->addr_len); + + /* + * build redirect option and copy skb over to the new packet. + */ + + memset(opt, 0, 8); + *(opt++) = ND_OPT_REDIRECT_HDR; + *(opt++) = (rd_len >> 3); + opt += 6; + + memcpy(opt, skb->nh.ipv6h, rd_len - 8); + + icmph->icmp6_cksum = csum_ipv6_magic(&ifp->addr, &skb->nh.ipv6h->saddr, + len, IPPROTO_ICMPV6, + csum_partial((u8 *) icmph, len, 0)); + + dev_queue_xmit(buff); + + icmpv6_statistics.Icmp6OutRedirects++; + icmpv6_statistics.Icmp6OutMsgs++; +} + +static __inline__ struct neighbour * +ndisc_recv_ns(struct in6_addr *saddr, struct sk_buff *skb) +{ + u8 *opt; + + opt = skb->h.raw; + opt += sizeof(struct icmp6hdr) + sizeof(struct in6_addr); + opt = ndisc_find_option(opt, skb->dev->addr_len+2, skb->tail - opt, ND_OPT_SOURCE_LL_ADDR); + + return neigh_event_ns(&nd_tbl, opt, saddr, skb->dev); +} + +static __inline__ int ndisc_recv_na(struct neighbour *neigh, struct sk_buff *skb) +{ + struct nd_msg *msg = (struct nd_msg *) skb->h.raw; + u8 *opt; + + opt = skb->h.raw; + opt += sizeof(struct icmp6hdr) + sizeof(struct in6_addr); + opt = ndisc_find_option(opt, skb->dev->addr_len+2, skb->tail - opt, ND_OPT_TARGET_LL_ADDR); + + return neigh_update(neigh, opt, + msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE, + msg->icmph.icmp6_override, 1); +} + +static void pndisc_redo(struct sk_buff *skb) +{ + ndisc_rcv(skb, skb->len); + kfree_skb(skb); +} + +int ndisc_rcv(struct sk_buff *skb, unsigned long len) +{ + struct device *dev = skb->dev; + struct in6_addr *saddr = &skb->nh.ipv6h->saddr; + struct in6_addr *daddr = &skb->nh.ipv6h->daddr; + struct nd_msg *msg = (struct nd_msg *) skb->h.raw; + struct neighbour *neigh; + struct inet6_ifaddr *ifp; + + switch (msg->icmph.icmp6_type) { + case NDISC_NEIGHBOUR_SOLICITATION: + if ((ifp = ipv6_chk_addr(&msg->target, dev, 1)) != NULL) { + int addr_type = ipv6_addr_type(saddr); + + if (ifp->flags & ADDR_INVALID) + return 0; + if (ifp->flags & DAD_INCOMPLETE) { + /* Address is tentative. If the source + is unspecified address, it is someone + does DAD, otherwise we ignore solicitations + until DAD timer expires. + */ + if (addr_type == IPV6_ADDR_ANY) + addrconf_dad_failure(ifp); + return 0; + } + + if (addr_type == IPV6_ADDR_ANY) { + struct in6_addr maddr; + + ipv6_addr_all_nodes(&maddr); + ndisc_send_na(dev, NULL, &maddr, &ifp->addr, + ifp->idev->cnf.forwarding, 0, 1, 1); + return 0; + } + + if (addr_type & IPV6_ADDR_UNICAST) { + int inc = ipv6_addr_type(daddr)&IPV6_ADDR_MULTICAST; + + if (inc) + nd_tbl.stats.rcv_probes_mcast++; + else + nd_tbl.stats.rcv_probes_ucast++; + + /* + * update / create cache entry + * for the source adddress + */ + + neigh = ndisc_recv_ns(saddr, skb); + + if (neigh) { + ndisc_send_na(dev, neigh, saddr, &ifp->addr, + ifp->idev->cnf.forwarding, 1, inc, inc); + neigh_release(neigh); + } + } + } else { + struct inet6_dev *in6_dev = ipv6_get_idev(dev); + int addr_type = ipv6_addr_type(saddr); + + if (in6_dev && in6_dev->cnf.forwarding && + (addr_type & IPV6_ADDR_UNICAST) && + pneigh_lookup(&nd_tbl, &msg->target, dev, 0)) { + int inc = ipv6_addr_type(daddr)&IPV6_ADDR_MULTICAST; + + if (skb->stamp.tv_sec == 0 || + skb->pkt_type == PACKET_HOST || + inc == 0 || + in6_dev->nd_parms->proxy_delay == 0) { + if (inc) + nd_tbl.stats.rcv_probes_mcast++; + else + nd_tbl.stats.rcv_probes_ucast++; + + neigh = ndisc_recv_ns(saddr, skb); + + if (neigh) { + ndisc_send_na(dev, neigh, saddr, &msg->target, + 0, 1, 0, inc); + neigh_release(neigh); + } + } else { + /* Hack. It will be freed upon exit from + ndisc_rcv + */ + atomic_inc(&skb->users); + pneigh_enqueue(&nd_tbl, in6_dev->nd_parms, skb); + return 0; + } + } + } + return 0; + + case NDISC_NEIGHBOUR_ADVERTISEMENT: + if ((ipv6_addr_type(saddr)&IPV6_ADDR_MULTICAST) && + msg->icmph.icmp6_solicited) { + ND_PRINTK0("NDISC: solicited NA is multicasted\n"); + return 0; + } + /* BUG! Target can be link-local on ANOTHER interface. Fixed. */ + if ((ifp = ipv6_chk_addr(&msg->target, dev, 1))) { + if (ifp->flags & ADDR_INVALID) + return 0; + if (ifp->flags & DAD_INCOMPLETE) { + addrconf_dad_failure(ifp); + return 0; + } + /* What should we make now? The advertisement + is invalid, but ndisc specs say nothing + about it. It could be misconfiguration, or + an smart proxy agent tries to help us :-) + */ + ND_PRINTK0("%s: someone avertise our address!\n", + ifp->idev->dev->name); + return 0; + } + neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 0); + + if (neigh) { + if (neigh->flags & NTF_ROUTER) { + if (msg->icmph.icmp6_router == 0) { + /* + * Change: router to host + */ + struct rt6_info *rt; + rt = rt6_get_dflt_router(saddr, skb->dev); + if (rt) { + /* It is safe only because + we aer in BH */ + dst_release(&rt->u.dst); + ip6_del_rt(rt); + } + } + } else { + if (msg->icmph.icmp6_router) + neigh->flags |= NTF_ROUTER; + } + + ndisc_recv_na(neigh, skb); + neigh_release(neigh); + } + break; + + case NDISC_ROUTER_ADVERTISEMENT: + ndisc_router_discovery(skb); + break; + + case NDISC_REDIRECT: + ndisc_redirect_rcv(skb); + break; + }; + + return 0; +} + +#ifdef CONFIG_PROC_FS +#ifndef CONFIG_RTNETLINK +int ndisc_get_info(char *buffer, char **start, off_t offset, int length, int dummy) +{ + int len=0; + off_t pos=0; + int size; + unsigned long now = jiffies; + int i; + + neigh_table_lock(&nd_tbl); + + for (i = 0; i <= NEIGH_HASHMASK; i++) { + struct neighbour *neigh; + + for (neigh = nd_tbl.hash_buckets[i]; neigh; neigh = neigh->next) { + int j; + + size = 0; + for (j=0; j<16; j++) { + sprintf(buffer+len+size, "%02x", neigh->primary_key[j]); + size += 2; + } + + size += sprintf(buffer+len+size, + " %02x %02x %02x %02x %08lx %08lx %08x %04x %04x %04x %8s ", i, + 128, + neigh->type, + neigh->nud_state, + now - neigh->used, + now - neigh->confirmed, + neigh->parms->reachable_time, + neigh->parms->gc_staletime, + atomic_read(&neigh->refcnt), + neigh->flags | (!neigh->hh ? 0 : (neigh->hh->hh_output==dev_queue_xmit ? 4 : 2)), + neigh->dev->name); + + if ((neigh->nud_state&NUD_VALID) && neigh->dev->addr_len) { + for (j=0; j < neigh->dev->addr_len; j++) { + sprintf(buffer+len+size, "%02x", neigh->ha[j]); + size += 2; + } + } else { + size += sprintf(buffer+len+size, "000000000000"); + } + size += sprintf(buffer+len+size, "\n"); + len += size; + pos += size; + + if (pos <= offset) + len=0; + if (pos >= offset+length) + goto done; + } + } + +done: + neigh_table_unlock(&nd_tbl); + + *start = buffer+len-(pos-offset); /* Start of wanted data */ + len = pos-offset; /* Start slop */ + if (len>length) + len = length; /* Ending slop */ + if (len<0) + len = 0; + return len; +} + +struct proc_dir_entry ndisc_proc_entry = +{ + PROC_NET_NDISC, 5, "ndisc", + S_IFREG | S_IRUGO, 1, 0, 0, + 0, NULL, + &ndisc_get_info +}; +#endif +#endif /* CONFIG_PROC_FS */ + + + +__initfunc(int ndisc_init(struct net_proto_family *ops)) +{ + struct sock *sk; + int err; + + ndisc_socket = sock_alloc(); + if (ndisc_socket == NULL) { + printk(KERN_ERR + "Failed to create the NDISC control socket.\n"); + return -1; + } + ndisc_socket->inode->i_uid = 0; + ndisc_socket->inode->i_gid = 0; + ndisc_socket->type = SOCK_RAW; + + if((err = ops->create(ndisc_socket, IPPROTO_ICMPV6)) < 0) { + printk(KERN_DEBUG + "Failed to initializee the NDISC control socket (err %d).\n", + err); + sock_release(ndisc_socket); + ndisc_socket = NULL; /* For safety. */ + return err; + } + + sk = ndisc_socket->sk; + sk->allocation = GFP_ATOMIC; + sk->net_pinfo.af_inet6.hop_limit = 255; + /* Do not loopback ndisc messages */ + sk->net_pinfo.af_inet6.mc_loop = 0; + sk->num = 256; + + /* + * Initialize the neighbour table + */ + + neigh_table_init(&nd_tbl); + +#ifdef CONFIG_PROC_FS +#ifndef CONFIG_RTNETLINK + proc_net_register(&ndisc_proc_entry); +#endif +#endif +#ifdef CONFIG_SYSCTL + neigh_sysctl_register(NULL, &nd_tbl.parms, NET_IPV6, NET_IPV6_NEIGH, "ipv6"); +#endif + + return 0; +} + +void ndisc_cleanup(void) +{ +#ifdef CONFIG_PROC_FS +#ifndef CONFIG_RTNETLINK + proc_net_unregister(ndisc_proc_entry.low_ino); +#endif +#endif + neigh_table_clear(&nd_tbl); + sock_release(ndisc_socket); + ndisc_socket = NULL; /* For safety. */ +} diff --git a/pfinet/linux-src/net/ipv6/protocol_ipv6.c b/pfinet/linux-src/net/ipv6/protocol_ipv6.c new file mode 100644 index 00000000..ad871914 --- /dev/null +++ b/pfinet/linux-src/net/ipv6/protocol_ipv6.c @@ -0,0 +1,117 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * PF_INET6 protocol dispatch tables. + * + * Version: $Id: protocol_ipv6.c,v 1.1 2007/10/08 21:12:31 stesie Exp $ + * + * Authors: Pedro Roque <roque@di.fc.ul.pt> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/sockios.h> +#include <linux/sched.h> +#include <linux/net.h> +#include <linux/in6.h> +#include <linux/netdevice.h> +#include <linux/if_arp.h> + +#include <net/sock.h> +#include <net/snmp.h> + +#include <net/ipv6.h> +#include <net/protocol.h> + +struct inet6_protocol *inet6_protocol_base = NULL; +struct inet6_protocol *inet6_protos[MAX_INET_PROTOS] = +{ + NULL +}; + + +struct inet6_protocol *inet6_get_protocol(unsigned char prot) +{ + unsigned char hash; + struct inet6_protocol *p; + + hash = prot & (MAX_INET_PROTOS - 1); + for (p = inet6_protos[hash] ; p != NULL; p=p->next) { + if (p->protocol == prot) + return((struct inet6_protocol *) p); + } + return(NULL); +} + +void inet6_add_protocol(struct inet6_protocol *prot) +{ + unsigned char hash; + struct inet6_protocol *p2; + + hash = prot->protocol & (MAX_INET_PROTOS - 1); + prot->next = inet6_protos[hash]; + inet6_protos[hash] = prot; + prot->copy = 0; + + /* + * Set the copy bit if we need to. + */ + + p2 = (struct inet6_protocol *) prot->next; + while(p2 != NULL) { + if (p2->protocol == prot->protocol) { + prot->copy = 1; + break; + } + p2 = (struct inet6_protocol *) p2->next; + } +} + +/* + * Remove a protocol from the hash tables. + */ + +int inet6_del_protocol(struct inet6_protocol *prot) +{ + struct inet6_protocol *p; + struct inet6_protocol *lp = NULL; + unsigned char hash; + + hash = prot->protocol & (MAX_INET_PROTOS - 1); + if (prot == inet6_protos[hash]) { + inet6_protos[hash] = (struct inet6_protocol *) inet6_protos[hash]->next; + return(0); + } + + p = (struct inet6_protocol *) inet6_protos[hash]; + while(p != NULL) { + /* + * We have to worry if the protocol being deleted is + * the last one on the list, then we may need to reset + * someone's copied bit. + */ + if (p->next != NULL && p->next == prot) { + /* + * if we are the last one with this protocol and + * there is a previous one, reset its copy bit. + */ + if (p->copy == 0 && lp != NULL) + lp->copy = 0; + p->next = prot->next; + return(0); + } + if (p->next != NULL && p->next->protocol == prot->protocol) + lp = p; + + p = (struct inet6_protocol *) p->next; + } + return(-1); +} diff --git a/pfinet/linux-src/net/ipv6/raw_ipv6.c b/pfinet/linux-src/net/ipv6/raw_ipv6.c new file mode 100644 index 00000000..95856ea7 --- /dev/null +++ b/pfinet/linux-src/net/ipv6/raw_ipv6.c @@ -0,0 +1,644 @@ +/* + * RAW sockets for IPv6 + * Linux INET6 implementation + * + * Authors: + * Pedro Roque <roque@di.fc.ul.pt> + * + * Adapted from linux/net/ipv4/raw.c + * + * $Id: raw_ipv6.c,v 1.1 2007/10/08 21:12:31 stesie Exp $ + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/sockios.h> +#include <linux/sched.h> +#include <linux/net.h> +#include <linux/in6.h> +#include <linux/netdevice.h> +#include <linux/if_arp.h> +#include <linux/icmpv6.h> +#include <asm/uaccess.h> + +#include <net/sock.h> +#include <net/snmp.h> + +#include <net/ipv6.h> +#include <net/ndisc.h> +#include <net/protocol.h> +#include <net/ip6_route.h> +#include <net/addrconf.h> +#include <net/transp_v6.h> + +#include <net/rawv6.h> + +#include <asm/uaccess.h> + +struct sock *raw_v6_htable[RAWV6_HTABLE_SIZE]; + +static void raw_v6_hash(struct sock *sk) +{ + struct sock **skp = &raw_v6_htable[sk->num & (RAWV6_HTABLE_SIZE - 1)]; + + SOCKHASH_LOCK(); + if ((sk->next = *skp) != NULL) + (*skp)->pprev = &sk->next; + *skp = sk; + sk->pprev = skp; + SOCKHASH_UNLOCK(); +} + +static void raw_v6_unhash(struct sock *sk) +{ + SOCKHASH_LOCK(); + if (sk->pprev) { + if (sk->next) + sk->next->pprev = sk->pprev; + *sk->pprev = sk->next; + sk->pprev = NULL; + } + SOCKHASH_UNLOCK(); +} + +static __inline__ int inet6_mc_check(struct sock *sk, struct in6_addr *addr) +{ + struct ipv6_mc_socklist *mc; + + for (mc = sk->net_pinfo.af_inet6.ipv6_mc_list; mc; mc=mc->next) { + if (ipv6_addr_cmp(&mc->addr, addr) == 0) + return 1; + } + + return 0; +} + +/* Grumble... icmp and ip_input want to get at this... */ +struct sock *raw_v6_lookup(struct sock *sk, unsigned short num, + struct in6_addr *loc_addr, struct in6_addr *rmt_addr) +{ + struct sock *s = sk; + int addr_type = ipv6_addr_type(loc_addr); + + for(s = sk; s; s = s->next) { + if((s->num == num) && + !(s->dead && (s->state == TCP_CLOSE))) { + struct ipv6_pinfo *np = &s->net_pinfo.af_inet6; + + if (!ipv6_addr_any(&np->daddr) && + ipv6_addr_cmp(&np->daddr, rmt_addr)) + continue; + + if (!ipv6_addr_any(&np->rcv_saddr)) { + if (ipv6_addr_cmp(&np->rcv_saddr, loc_addr) == 0) + return(s); + if ((addr_type & IPV6_ADDR_MULTICAST) && + inet6_mc_check(s, loc_addr)) + return (s); + continue; + } + return(s); + } + } + return NULL; +} + +/* This cleans up af_inet6 a bit. -DaveM */ +static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) +{ + struct sockaddr_in6 *addr = (struct sockaddr_in6 *) uaddr; + __u32 v4addr = 0; + int addr_type; + + /* Check these errors. */ + if (sk->state != TCP_CLOSE || (addr_len < sizeof(struct sockaddr_in6))) + return -EINVAL; + + addr_type = ipv6_addr_type(&addr->sin6_addr); + + /* Check if the address belongs to the host. */ + if (addr_type == IPV6_ADDR_MAPPED) { + /* Raw sockets are IPv6 only */ + return(-EADDRNOTAVAIL); + } else { + if (addr_type != IPV6_ADDR_ANY) { + /* ipv4 addr of the socket is invalid. Only the + * unpecified and mapped address have a v4 equivalent. + */ + v4addr = LOOPBACK4_IPV6; + if (!(addr_type & IPV6_ADDR_MULTICAST)) { + if (ipv6_chk_addr(&addr->sin6_addr, NULL, 0) == NULL) + return(-EADDRNOTAVAIL); + } + } + } + + sk->rcv_saddr = v4addr; + sk->saddr = v4addr; + memcpy(&sk->net_pinfo.af_inet6.rcv_saddr, &addr->sin6_addr, + sizeof(struct in6_addr)); + if (!(addr_type & IPV6_ADDR_MULTICAST)) + memcpy(&sk->net_pinfo.af_inet6.saddr, &addr->sin6_addr, + sizeof(struct in6_addr)); + return 0; +} + +void rawv6_err(struct sock *sk, struct sk_buff *skb, struct ipv6hdr *hdr, + struct inet6_skb_parm *opt, + int type, int code, unsigned char *buff, u32 info) +{ + int err; + int harderr; + + if (buff > skb->tail) + return; + + /* Report error on raw socket, if: + 1. User requested recverr. + 2. Socket is connected (otherwise the error indication + is useless without recverr and error is hard. + */ + if (!sk->net_pinfo.af_inet6.recverr && sk->state != TCP_ESTABLISHED) + return; + + harderr = icmpv6_err_convert(type, code, &err); + if (type == ICMPV6_PKT_TOOBIG) + harderr = (sk->net_pinfo.af_inet6.pmtudisc == IPV6_PMTUDISC_DO); + + if (sk->net_pinfo.af_inet6.recverr) + ipv6_icmp_error(sk, skb, err, 0, ntohl(info), buff); + + if (sk->net_pinfo.af_inet6.recverr || harderr) { + sk->err = err; + sk->error_report(sk); + } +} + +static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb) +{ + /* Charge it to the socket. */ + if (sock_queue_rcv_skb(sk,skb)<0) { + ipv6_statistics.Ip6InDiscards++; + kfree_skb(skb); + return 0; + } + + ipv6_statistics.Ip6InDelivers++; + return 0; +} + +/* + * This is next to useless... + * if we demultiplex in network layer we don't need the extra call + * just to queue the skb... + * maybe we could have the network decide uppon a hint if it + * should call raw_rcv for demultiplexing + */ +int rawv6_rcv(struct sock *sk, struct sk_buff *skb, unsigned long len) +{ + if (sk->ip_hdrincl) + skb->h.raw = skb->nh.raw; + + rawv6_rcv_skb(sk, skb); + return 0; +} + + +/* + * This should be easy, if there is something there + * we return it, otherwise we block. + */ + +int rawv6_recvmsg(struct sock *sk, struct msghdr *msg, int len, + int noblock, int flags, int *addr_len) +{ + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)msg->msg_name; + struct sk_buff *skb; + int copied, err; + + if (flags & MSG_OOB) + return -EOPNOTSUPP; + + if (addr_len) + *addr_len=sizeof(*sin6); + + if (flags & MSG_ERRQUEUE) + return ipv6_recv_error(sk, msg, len); + + skb = skb_recv_datagram(sk, flags, noblock, &err); + if (!skb) + goto out; + + copied = skb->tail - skb->h.raw; + if (copied > len) { + copied = len; + msg->msg_flags |= MSG_TRUNC; + } + + err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); + sk->stamp=skb->stamp; + if (err) + goto out_free; + + /* Copy the address. */ + if (sin6) { + sin6->sin6_family = AF_INET6; + memcpy(&sin6->sin6_addr, &skb->nh.ipv6h->saddr, + sizeof(struct in6_addr)); + sin6->sin6_flowinfo = 0; + } + + if (sk->net_pinfo.af_inet6.rxopt.all) + datagram_recv_ctl(sk, msg, skb); + err = copied; + +out_free: + skb_free_datagram(sk, skb); +out: + return err; +} + +/* + * Sending... + */ + +struct rawv6_fakehdr { + struct iovec *iov; + struct sock *sk; + __u32 len; + __u32 cksum; + __u32 proto; + struct in6_addr *daddr; +}; + +static int rawv6_getfrag(const void *data, struct in6_addr *saddr, + char *buff, unsigned int offset, unsigned int len) +{ + struct iovec *iov = (struct iovec *) data; + + return memcpy_fromiovecend(buff, iov, offset, len); +} + +static int rawv6_frag_cksum(const void *data, struct in6_addr *addr, + char *buff, unsigned int offset, + unsigned int len) +{ + struct rawv6_fakehdr *hdr = (struct rawv6_fakehdr *) data; + + if (csum_partial_copy_fromiovecend(buff, hdr->iov, offset, + len, &hdr->cksum)) + return -EFAULT; + + if (offset == 0) { + struct sock *sk; + struct raw6_opt *opt; + struct in6_addr *daddr; + + sk = hdr->sk; + opt = &sk->tp_pinfo.tp_raw; + + if (hdr->daddr) + daddr = hdr->daddr; + else + daddr = addr + 1; + + hdr->cksum = csum_ipv6_magic(addr, daddr, hdr->len, + hdr->proto, hdr->cksum); + + if (opt->offset < len) { + __u16 *csum; + + csum = (__u16 *) (buff + opt->offset); + *csum = hdr->cksum; + } else { + if (net_ratelimit()) + printk(KERN_DEBUG "icmp: cksum offset too big\n"); + return -EINVAL; + } + } + return 0; +} + + +static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, int len) +{ + struct ipv6_txoptions opt_space; + struct sockaddr_in6 * sin6 = (struct sockaddr_in6 *) msg->msg_name; + struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6; + struct ipv6_txoptions *opt = NULL; + struct ip6_flowlabel *flowlabel = NULL; + struct flowi fl; + int addr_len = msg->msg_namelen; + struct in6_addr *daddr; + struct raw6_opt *raw_opt; + int hlimit = -1; + u16 proto; + int err; + + /* Rough check on arithmetic overflow, + better check is made in ip6_build_xmit + */ + if (len < 0) + return -EMSGSIZE; + + /* Mirror BSD error message compatibility */ + if (msg->msg_flags & MSG_OOB) + return -EOPNOTSUPP; + + if (msg->msg_flags & ~(MSG_DONTROUTE|MSG_DONTWAIT)) + return(-EINVAL); + /* + * Get and verify the address. + */ + + fl.fl6_flowlabel = 0; + + if (sin6) { + if (addr_len < sizeof(struct sockaddr_in6)) + return(-EINVAL); + + if (sin6->sin6_family && sin6->sin6_family != AF_INET6) + return(-EINVAL); + + /* port is the proto value [0..255] carried in nexthdr */ + proto = ntohs(sin6->sin6_port); + + if (!proto) + proto = sk->num; + + if (proto > 255) + return(-EINVAL); + + daddr = &sin6->sin6_addr; + if (np->sndflow) { + fl.fl6_flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; + if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) { + flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); + if (flowlabel == NULL) + return -EINVAL; + daddr = &flowlabel->dst; + } + } + + + /* Otherwise it will be difficult to maintain sk->dst_cache. */ + if (sk->state == TCP_ESTABLISHED && + !ipv6_addr_cmp(daddr, &sk->net_pinfo.af_inet6.daddr)) + daddr = &sk->net_pinfo.af_inet6.daddr; + } else { + if (sk->state != TCP_ESTABLISHED) + return(-EINVAL); + + proto = sk->num; + daddr = &(sk->net_pinfo.af_inet6.daddr); + fl.fl6_flowlabel = np->flow_label; + } + + if (ipv6_addr_any(daddr)) { + /* + * unspecfied destination address + * treated as error... is this correct ? + */ + return(-EINVAL); + } + + fl.oif = sk->bound_dev_if; + fl.fl6_src = NULL; + + if (msg->msg_controllen) { + opt = &opt_space; + memset(opt, 0, sizeof(struct ipv6_txoptions)); + + err = datagram_send_ctl(msg, &fl, opt, &hlimit); + if (err < 0) { + fl6_sock_release(flowlabel); + return err; + } + if ((fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { + flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); + if (flowlabel == NULL) + return -EINVAL; + } + if (!(opt->opt_nflen|opt->opt_flen)) + opt = NULL; + } + if (opt == NULL) + opt = np->opt; + if (flowlabel) + opt = fl6_merge_options(&opt_space, flowlabel, opt); + + raw_opt = &sk->tp_pinfo.tp_raw; + + fl.proto = proto; + fl.fl6_dst = daddr; + fl.uli_u.icmpt.type = 0; + fl.uli_u.icmpt.code = 0; + + if (raw_opt->checksum) { + struct rawv6_fakehdr hdr; + + hdr.iov = msg->msg_iov; + hdr.sk = sk; + hdr.len = len; + hdr.cksum = 0; + hdr.proto = proto; + + if (opt && opt->srcrt) + hdr.daddr = daddr; + else + hdr.daddr = NULL; + + err = ip6_build_xmit(sk, rawv6_frag_cksum, &hdr, &fl, len, + opt, hlimit, msg->msg_flags); + } else { + err = ip6_build_xmit(sk, rawv6_getfrag, msg->msg_iov, &fl, len, + opt, hlimit, msg->msg_flags); + } + + fl6_sock_release(flowlabel); + + return err<0?err:len; +} + +static int rawv6_seticmpfilter(struct sock *sk, int level, int optname, + char *optval, int optlen) +{ + switch (optname) { + case ICMPV6_FILTER: + if (optlen > sizeof(struct icmp6_filter)) + optlen = sizeof(struct icmp6_filter); + if (copy_from_user(&sk->tp_pinfo.tp_raw.filter, optval, optlen)) + return -EFAULT; + return 0; + default: + return -ENOPROTOOPT; + }; + + return 0; +} + +static int rawv6_geticmpfilter(struct sock *sk, int level, int optname, + char *optval, int *optlen) +{ + int len; + + switch (optname) { + case ICMPV6_FILTER: + if (get_user(len, optlen)) + return -EFAULT; + if (len > sizeof(struct icmp6_filter)) + len = sizeof(struct icmp6_filter); + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(optval, &sk->tp_pinfo.tp_raw.filter, len)) + return -EFAULT; + return 0; + default: + return -ENOPROTOOPT; + }; + + return 0; +} + + +static int rawv6_setsockopt(struct sock *sk, int level, int optname, + char *optval, int optlen) +{ + struct raw6_opt *opt = &sk->tp_pinfo.tp_raw; + int val; + + switch(level) { + case SOL_RAW: + break; + + case SOL_ICMPV6: + if (sk->num != IPPROTO_ICMPV6) + return -EOPNOTSUPP; + return rawv6_seticmpfilter(sk, level, optname, optval, + optlen); + case SOL_IPV6: + if (optname == IPV6_CHECKSUM) + break; + default: + return ipv6_setsockopt(sk, level, optname, optval, + optlen); + }; + + if (get_user(val, (int *)optval)) + return -EFAULT; + + switch (optname) { + case IPV6_CHECKSUM: + if (val < 0) { + opt->checksum = 0; + } else { + opt->checksum = 1; + opt->offset = val; + } + + return 0; + break; + + default: + return(-ENOPROTOOPT); + } +} + +static int rawv6_getsockopt(struct sock *sk, int level, int optname, + char *optval, int *optlen) +{ + struct raw6_opt *opt = &sk->tp_pinfo.tp_raw; + int val, len; + + switch(level) { + case SOL_RAW: + break; + + case SOL_ICMPV6: + if (sk->num != IPPROTO_ICMPV6) + return -EOPNOTSUPP; + return rawv6_geticmpfilter(sk, level, optname, optval, + optlen); + case SOL_IPV6: + if (optname == IPV6_CHECKSUM) + break; + default: + return ipv6_getsockopt(sk, level, optname, optval, + optlen); + }; + + if (get_user(len,optlen)) + return -EFAULT; + + switch (optname) { + case IPV6_CHECKSUM: + if (opt->checksum == 0) + val = -1; + else + val = opt->offset; + + default: + return -ENOPROTOOPT; + } + + len=min(sizeof(int),len); + + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(optval,&val,len)) + return -EFAULT; + return 0; +} + + +static void rawv6_close(struct sock *sk, long timeout) +{ + /* See for explanation: raw_close in ipv4/raw.c */ + sk->state = TCP_CLOSE; + raw_v6_unhash(sk); + if (sk->num == IPPROTO_RAW) + ip6_ra_control(sk, -1, NULL); + sk->dead = 1; + destroy_sock(sk); +} + +static int rawv6_init_sk(struct sock *sk) +{ + return(0); +} + +struct proto rawv6_prot = { + (struct sock *)&rawv6_prot, /* sklist_next */ + (struct sock *)&rawv6_prot, /* sklist_prev */ + rawv6_close, /* close */ + udpv6_connect, /* connect */ + NULL, /* accept */ + NULL, /* retransmit */ + NULL, /* write_wakeup */ + NULL, /* read_wakeup */ + datagram_poll, /* poll */ + NULL, /* ioctl */ + rawv6_init_sk, /* init */ + inet6_destroy_sock, /* destroy */ + NULL, /* shutdown */ + rawv6_setsockopt, /* setsockopt */ + rawv6_getsockopt, /* getsockopt */ + rawv6_sendmsg, /* sendmsg */ + rawv6_recvmsg, /* recvmsg */ + rawv6_bind, /* bind */ + rawv6_rcv_skb, /* backlog_rcv */ + raw_v6_hash, /* hash */ + raw_v6_unhash, /* unhash */ + NULL, /* get_port */ + 128, /* max_header */ + 0, /* retransmits */ + "RAW", /* name */ + 0, /* inuse */ + 0 /* highestinuse */ +}; diff --git a/pfinet/linux-src/net/ipv6/reassembly.c b/pfinet/linux-src/net/ipv6/reassembly.c new file mode 100644 index 00000000..3e1575dd --- /dev/null +++ b/pfinet/linux-src/net/ipv6/reassembly.c @@ -0,0 +1,492 @@ +/* + * IPv6 fragment reassembly + * Linux INET6 implementation + * + * Authors: + * Pedro Roque <roque@di.fc.ul.pt> + * + * $Id: reassembly.c,v 1.1 2007/10/08 21:12:31 stesie Exp $ + * + * Based on: net/ipv4/ip_fragment.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +/* + * Fixes: + * Andi Kleen Make it work with multiple hosts. + * More RFC compliance. + */ +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/sockios.h> +#include <linux/sched.h> +#include <linux/net.h> +#include <linux/netdevice.h> +#include <linux/in6.h> +#include <linux/ipv6.h> +#include <linux/icmpv6.h> + +#include <net/sock.h> +#include <net/snmp.h> + +#include <net/ipv6.h> +#include <net/protocol.h> +#include <net/transp_v6.h> +#include <net/rawv6.h> +#include <net/ndisc.h> +#include <net/addrconf.h> + +int sysctl_ip6frag_high_thresh = 256*1024; +int sysctl_ip6frag_low_thresh = 192*1024; +int sysctl_ip6frag_time = IPV6_FRAG_TIMEOUT; + +atomic_t ip6_frag_mem = ATOMIC_INIT(0); + +struct ipv6_frag { + __u16 offset; + __u16 len; + struct sk_buff *skb; + + struct frag_hdr *fhdr; + + struct ipv6_frag *next; +}; + +/* + * Equivalent of ipv4 struct ipq + */ + +struct frag_queue { + + struct frag_queue *next; + struct frag_queue *prev; + + __u32 id; /* fragment id */ + struct in6_addr saddr; + struct in6_addr daddr; + struct timer_list timer; /* expire timer */ + struct ipv6_frag *fragments; + struct device *dev; + int iif; + __u8 last_in; /* has first/last segment arrived? */ +#define FIRST_IN 2 +#define LAST_IN 1 + __u8 nexthdr; + __u16 nhoffset; +}; + +static struct frag_queue ipv6_frag_queue = { + &ipv6_frag_queue, &ipv6_frag_queue, + 0, {{{0}}}, {{{0}}}, + {0}, NULL, NULL, + 0, 0, 0, 0 +}; + +/* Memory Tracking Functions. */ +extern __inline__ void frag_kfree_skb(struct sk_buff *skb) +{ + atomic_sub(skb->truesize, &ip6_frag_mem); + kfree_skb(skb); +} + +extern __inline__ void frag_kfree_s(void *ptr, int len) +{ + atomic_sub(len, &ip6_frag_mem); + kfree(ptr); +} + +extern __inline__ void *frag_kmalloc(int size, int pri) +{ + void *vp = kmalloc(size, pri); + + if(!vp) + return NULL; + atomic_add(size, &ip6_frag_mem); + return vp; +} + + +static void create_frag_entry(struct sk_buff *skb, + __u8 *nhptr, + struct frag_hdr *fhdr); +static u8 * reasm_frag(struct frag_queue *fq, + struct sk_buff **skb_in); + +static void reasm_queue(struct frag_queue *fq, + struct sk_buff *skb, + struct frag_hdr *fhdr, + u8 *nhptr); + +static void fq_free(struct frag_queue *fq); + +static void frag_prune(void) +{ + struct frag_queue *fq; + + while ((fq = ipv6_frag_queue.next) != &ipv6_frag_queue) { + ipv6_statistics.Ip6ReasmFails++; + fq_free(fq); + if (atomic_read(&ip6_frag_mem) <= sysctl_ip6frag_low_thresh) + return; + } + if (atomic_read(&ip6_frag_mem)) + printk(KERN_DEBUG "IPv6 frag_prune: memleak\n"); + atomic_set(&ip6_frag_mem, 0); +} + + +u8* ipv6_reassembly(struct sk_buff **skbp, __u8 *nhptr) +{ + struct sk_buff *skb = *skbp; + struct frag_hdr *fhdr = (struct frag_hdr *) (skb->h.raw); + struct frag_queue *fq; + struct ipv6hdr *hdr; + + hdr = skb->nh.ipv6h; + + ipv6_statistics.Ip6ReasmReqds++; + + /* Jumbo payload inhibits frag. header */ + if (hdr->payload_len==0) { + icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb->h.raw); + return NULL; + } + if ((u8 *)(fhdr+1) > skb->tail) { + icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb->h.raw); + return NULL; + } + if (atomic_read(&ip6_frag_mem) > sysctl_ip6frag_high_thresh) + frag_prune(); + + for (fq = ipv6_frag_queue.next; fq != &ipv6_frag_queue; fq = fq->next) { + if (fq->id == fhdr->identification && + !ipv6_addr_cmp(&hdr->saddr, &fq->saddr) && + !ipv6_addr_cmp(&hdr->daddr, &fq->daddr)) { + + reasm_queue(fq, skb, fhdr, nhptr); + + if (fq->last_in == (FIRST_IN|LAST_IN)) + return reasm_frag(fq, skbp); + + return NULL; + } + } + + create_frag_entry(skb, nhptr, fhdr); + + return NULL; +} + + +static void fq_free(struct frag_queue *fq) +{ + struct ipv6_frag *fp, *back; + + del_timer(&fq->timer); + + for (fp = fq->fragments; fp; ) { + frag_kfree_skb(fp->skb); + back = fp; + fp=fp->next; + frag_kfree_s(back, sizeof(*back)); + } + + fq->prev->next = fq->next; + fq->next->prev = fq->prev; + + fq->prev = fq->next = NULL; + + frag_kfree_s(fq, sizeof(*fq)); +} + +static void frag_expire(unsigned long data) +{ + struct frag_queue *fq; + struct ipv6_frag *frag; + + fq = (struct frag_queue *) data; + + frag = fq->fragments; + + ipv6_statistics.Ip6ReasmTimeout++; + ipv6_statistics.Ip6ReasmFails++; + + if (frag == NULL) { + printk(KERN_DEBUG "invalid fragment queue\n"); + return; + } + + /* Send error only if the first segment arrived. + (fixed --ANK (980728)) + */ + if (fq->last_in&FIRST_IN) { + struct device *dev = dev_get_by_index(fq->iif); + + /* + But use as source device on which LAST ARRIVED + segment was received. And do not use fq->dev + pointer directly, device might already disappeared. + */ + if (dev) { + frag->skb->dev = dev; + icmpv6_send(frag->skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0, + dev); + } + } + + fq_free(fq); +} + + +static void create_frag_entry(struct sk_buff *skb, + __u8 *nhptr, + struct frag_hdr *fhdr) +{ + struct frag_queue *fq; + struct ipv6hdr *hdr; + + fq = (struct frag_queue *) frag_kmalloc(sizeof(struct frag_queue), + GFP_ATOMIC); + + if (fq == NULL) { + ipv6_statistics.Ip6ReasmFails++; + kfree_skb(skb); + return; + } + + memset(fq, 0, sizeof(struct frag_queue)); + + fq->id = fhdr->identification; + + hdr = skb->nh.ipv6h; + ipv6_addr_copy(&fq->saddr, &hdr->saddr); + ipv6_addr_copy(&fq->daddr, &hdr->daddr); + + /* init_timer has been done by the memset */ + fq->timer.function = frag_expire; + fq->timer.data = (long) fq; + fq->timer.expires = jiffies + sysctl_ip6frag_time; + + reasm_queue(fq, skb, fhdr, nhptr); + + if (fq->fragments) { + fq->prev = ipv6_frag_queue.prev; + fq->next = &ipv6_frag_queue; + fq->prev->next = fq; + ipv6_frag_queue.prev = fq; + + add_timer(&fq->timer); + } else + frag_kfree_s(fq, sizeof(*fq)); +} + + +/* + * We queue the packet even if it's the last. + * It's a trade off. This allows the reassembly + * code to be simpler (=faster) and of the + * steps we do for queueing the only unnecessary + * one it's the kmalloc for a struct ipv6_frag. + * Feel free to try other alternatives... + */ + +static void reasm_queue(struct frag_queue *fq, struct sk_buff *skb, + struct frag_hdr *fhdr, u8 *nhptr) +{ + struct ipv6_frag *nfp, *fp, **bptr; + + nfp = (struct ipv6_frag *) frag_kmalloc(sizeof(struct ipv6_frag), + GFP_ATOMIC); + + if (nfp == NULL) { + kfree_skb(skb); + return; + } + + nfp->offset = ntohs(fhdr->frag_off) & ~0x7; + nfp->len = (ntohs(skb->nh.ipv6h->payload_len) - + ((u8 *) (fhdr + 1) - (u8 *) (skb->nh.ipv6h + 1))); + + if ((u32)nfp->offset + (u32)nfp->len >= 65536) { + icmpv6_param_prob(skb,ICMPV6_HDR_FIELD, (u8*)&fhdr->frag_off); + goto err; + } + if (fhdr->frag_off & __constant_htons(0x0001)) { + /* Check if the fragment is rounded to 8 bytes. + * Required by the RFC. + * ... and would break our defragmentation algorithm 8) + */ + if (nfp->len & 0x7) { + printk(KERN_DEBUG "fragment not rounded to 8bytes\n"); + + /* + It is not in specs, but I see no reasons + to send an error in this case. --ANK + */ + if (nfp->offset == 0) + icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, + &skb->nh.ipv6h->payload_len); + goto err; + } + } + + nfp->skb = skb; + nfp->fhdr = fhdr; + nfp->next = NULL; + + bptr = &fq->fragments; + + for (fp = fq->fragments; fp; fp=fp->next) { + if (nfp->offset <= fp->offset) + break; + bptr = &fp->next; + } + if (fp && fp->offset == nfp->offset) { + if (nfp->len != fp->len) { + printk(KERN_DEBUG "reasm_queue: dup with wrong len\n"); + } + + /* duplicate. discard it. */ + goto err; + } + + atomic_add(skb->truesize, &ip6_frag_mem); + + /* All the checks are done, fragment is acepted. + Only now we are allowed to update reassembly data! + (fixed --ANK (980728)) + */ + + /* iif always set to one of the last arrived segment */ + fq->dev = skb->dev; + fq->iif = skb->dev->ifindex; + + /* Last fragment */ + if ((fhdr->frag_off & __constant_htons(0x0001)) == 0) + fq->last_in |= LAST_IN; + + /* First fragment. + nexthdr and nhptr are get from the first fragment. + Moreover, nexthdr is UNDEFINED for all the fragments but the + first one. + (fixed --ANK (980728)) + */ + if (nfp->offset == 0) { + fq->nexthdr = fhdr->nexthdr; + fq->last_in |= FIRST_IN; + fq->nhoffset = nhptr - skb->nh.raw; + } + + *bptr = nfp; + nfp->next = fp; + return; + +err: + frag_kfree_s(nfp, sizeof(*nfp)); + kfree_skb(skb); +} + +/* + * check if this fragment completes the packet + * returns true on success + */ +static u8* reasm_frag(struct frag_queue *fq, struct sk_buff **skb_in) +{ + struct ipv6_frag *fp; + struct ipv6_frag *head = fq->fragments; + struct ipv6_frag *tail = NULL; + struct sk_buff *skb; + __u32 offset = 0; + __u32 payload_len; + __u16 unfrag_len; + __u16 copy; + u8 *nhptr; + + for(fp = head; fp; fp=fp->next) { + if (offset != fp->offset) + return NULL; + + offset += fp->len; + tail = fp; + } + + /* + * we know the m_flag arrived and we have a queue, + * starting from 0, without gaps. + * this means we have all fragments. + */ + + /* Unfragmented part is taken from the first segment. + (fixed --ANK (980728)) + */ + unfrag_len = (u8 *) (head->fhdr) - (u8 *) (head->skb->nh.ipv6h + 1); + + payload_len = (unfrag_len + tail->offset + + (tail->skb->tail - (__u8 *) (tail->fhdr + 1))); + + if (payload_len > 65535) { + if (net_ratelimit()) + printk(KERN_DEBUG "reasm_frag: payload len = %d\n", payload_len); + ipv6_statistics.Ip6ReasmFails++; + fq_free(fq); + return NULL; + } + + if ((skb = dev_alloc_skb(sizeof(struct ipv6hdr) + payload_len))==NULL) { + if (net_ratelimit()) + printk(KERN_DEBUG "reasm_frag: no memory for reassembly\n"); + ipv6_statistics.Ip6ReasmFails++; + fq_free(fq); + return NULL; + } + + copy = unfrag_len + sizeof(struct ipv6hdr); + + skb->nh.ipv6h = (struct ipv6hdr *) skb->data; + skb->dev = fq->dev; + skb->protocol = __constant_htons(ETH_P_IPV6); + skb->pkt_type = head->skb->pkt_type; + memcpy(skb->cb, head->skb->cb, sizeof(skb->cb)); + skb->dst = dst_clone(head->skb->dst); + + memcpy(skb_put(skb, copy), head->skb->nh.ipv6h, copy); + nhptr = skb->nh.raw + fq->nhoffset; + *nhptr = fq->nexthdr; + + skb->h.raw = skb->tail; + + skb->nh.ipv6h->payload_len = ntohs(payload_len); + + *skb_in = skb; + + /* + * FIXME: If we don't have a checksum we ought to be able + * to defragment and checksum in this pass. [AC] + * Note that we don't really know yet whether the protocol + * needs checksums at all. It might still be a good idea. -AK + */ + for(fp = fq->fragments; fp; ) { + struct ipv6_frag *back; + + memcpy(skb_put(skb, fp->len), (__u8*)(fp->fhdr + 1), fp->len); + frag_kfree_skb(fp->skb); + back = fp; + fp=fp->next; + frag_kfree_s(back, sizeof(*back)); + } + + del_timer(&fq->timer); + fq->prev->next = fq->next; + fq->next->prev = fq->prev; + fq->prev = fq->next = NULL; + + frag_kfree_s(fq, sizeof(*fq)); + + ipv6_statistics.Ip6ReasmOKs++; + return nhptr; +} diff --git a/pfinet/linux-src/net/ipv6/route_ipv6.c b/pfinet/linux-src/net/ipv6/route_ipv6.c new file mode 100644 index 00000000..c34a3b5d --- /dev/null +++ b/pfinet/linux-src/net/ipv6/route_ipv6.c @@ -0,0 +1,1972 @@ +/* + * Linux INET6 implementation + * FIB front-end. + * + * Authors: + * Pedro Roque <roque@di.fc.ul.pt> + * + * $Id: route_ipv6.c,v 1.1 2007/10/08 21:12:31 stesie Exp $ + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/config.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/sockios.h> +#include <linux/net.h> +#include <linux/route.h> +#include <linux/netdevice.h> +#include <linux/in6.h> +#include <linux/init.h> +#include <linux/netlink.h> +#include <linux/if_arp.h> + +#ifdef CONFIG_PROC_FS +#include <linux/proc_fs.h> +#endif + +#include <net/snmp.h> +#include <net/ipv6.h> +#include <net/ip6_fib.h> +#include <net/ip6_route.h> +#include <net/ndisc.h> +#include <net/addrconf.h> +#include <net/tcp.h> +#include <linux/netlink.h> +#include <linux/rtnetlink.h> + +#include <asm/uaccess.h> + +#ifdef CONFIG_SYSCTL +#include <linux/sysctl.h> +#endif + +#undef CONFIG_RT6_POLICY + +/* Set to 3 to get tracing. */ +#define RT6_DEBUG 2 + +#if RT6_DEBUG >= 3 +#define RDBG(x) printk x +#define RT6_TRACE(x...) printk(KERN_DEBUG x) +#else +#define RDBG(x) +#define RT6_TRACE(x...) do { ; } while (0) +#endif + +#if RT6_DEBUG >= 1 +#define BUG_TRAP(x) ({ if (!(x)) { printk("Assertion (" #x ") failed at " __FILE__ "(%d):" __FUNCTION__ "\n", __LINE__); } }) +#else +#define BUG_TRAP(x) do { ; } while (0) +#endif + + +int ip6_rt_max_size = 4096; +int ip6_rt_gc_min_interval = 5*HZ; +int ip6_rt_gc_timeout = 60*HZ; +int ip6_rt_gc_interval = 30*HZ; +int ip6_rt_gc_elasticity = 9; +int ip6_rt_mtu_expires = 10*60*HZ; + +static struct rt6_info * ip6_rt_copy(struct rt6_info *ort); +static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); +static struct dst_entry *ip6_dst_reroute(struct dst_entry *dst, + struct sk_buff *skb); +static struct dst_entry *ip6_negative_advice(struct dst_entry *); +static int ip6_dst_gc(void); + +static int ip6_pkt_discard(struct sk_buff *skb); +static void ip6_link_failure(struct sk_buff *skb); + +struct dst_ops ip6_dst_ops = { + AF_INET6, + __constant_htons(ETH_P_IPV6), + 1024, + + ip6_dst_gc, + ip6_dst_check, + ip6_dst_reroute, + NULL, + ip6_negative_advice, + ip6_link_failure, +}; + +struct rt6_info ip6_null_entry = { + {{NULL, ATOMIC_INIT(1), ATOMIC_INIT(1), &loopback_dev, + -1, 0, 0, 0, 0, 0, 0, 0, 0, + -ENETUNREACH, NULL, NULL, + ip6_pkt_discard, ip6_pkt_discard, +#ifdef CONFIG_NET_CLS_ROUTE + 0, +#endif + &ip6_dst_ops}}, + NULL, {{{0}}}, RTF_REJECT|RTF_NONEXTHOP, ~0U, + 255, ATOMIC_INIT(1), {NULL}, {{{{0}}}, 0}, {{{{0}}}, 0} +}; + +struct fib6_node ip6_routing_table = { + NULL, NULL, NULL, NULL, + &ip6_null_entry, + 0, RTN_ROOT|RTN_TL_ROOT|RTN_RTINFO, 0 +}; + +#ifdef CONFIG_RT6_POLICY +int ip6_rt_policy = 0; + +struct pol_chain *rt6_pol_list = NULL; + + +static int rt6_flow_match_in(struct rt6_info *rt, struct sk_buff *skb); +static int rt6_flow_match_out(struct rt6_info *rt, struct sock *sk); + +static struct rt6_info *rt6_flow_lookup(struct rt6_info *rt, + struct in6_addr *daddr, + struct in6_addr *saddr, + struct fl_acc_args *args); + +#else +#define ip6_rt_policy (0) +#endif + +/* + * Route lookup + */ + +static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt, + int oif, + int strict) +{ + struct rt6_info *local = NULL; + struct rt6_info *sprt; + + if (oif) { + for (sprt = rt; sprt; sprt = sprt->u.next) { + struct device *dev = sprt->rt6i_dev; + if (dev->ifindex == oif) + return sprt; + if (dev->flags&IFF_LOOPBACK) + local = sprt; + } + + if (local) + return local; + + if (strict) + return &ip6_null_entry; + } + return rt; +} + +/* + * pointer to the last default router chosen + */ +static struct rt6_info *rt6_dflt_pointer = NULL; + +static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif) +{ + struct rt6_info *match = NULL; + struct rt6_info *sprt; + int mpri = 0; + + for (sprt = rt; sprt; sprt = sprt->u.next) { + struct neighbour *neigh; + + RDBG(("sprt(%p): ", sprt)); + if ((neigh = sprt->rt6i_nexthop)) { + int m = -1; + + RDBG(("nxthop(%p,%d) ", neigh, neigh->nud_state)); + switch (neigh->nud_state) { + case NUD_REACHABLE: + RDBG(("NUD_REACHABLE ")); + if (sprt != rt6_dflt_pointer) { + rt = sprt; + RDBG(("sprt!=dflt_ptr -> %p\n", + sprt)); + goto out; + } + RDBG(("m=2, ")); + m = 2; + break; + + case NUD_DELAY: + RDBG(("NUD_DELAY, m=1, ")); + m = 1; + break; + + case NUD_STALE: + RDBG(("NUD_STALE, m=1, ")); + m = 1; + break; + }; + + if (oif && sprt->rt6i_dev->ifindex == oif) { + m += 2; + } + + if (m >= mpri) { + RDBG(("m>=mpri setmatch, ")); + mpri = m; + match = sprt; + } + } + } + + if (match) { + RDBG(("match, set rt, ")); + rt = match; + } else { + /* + * No default routers are known to be reachable. + * SHOULD round robin + */ + RDBG(("!match, trying rt6_dflt_pointer, ")); + if (rt6_dflt_pointer) { + struct rt6_info *next; + + if ((next = rt6_dflt_pointer->u.next) && + next->u.dst.error == 0) + rt = next; + } + } + +out: + rt6_dflt_pointer = rt; + RDBG(("returning %p, dflt_ptr set\n", rt)); + return rt; +} + +struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr, + int oif, int strict) +{ + struct fib6_node *fn; + struct rt6_info *rt; + + start_bh_atomic(); + fn = fib6_lookup(&ip6_routing_table, daddr, saddr); + rt = rt6_device_match(fn->leaf, oif, strict); + atomic_inc(&rt->u.dst.use); + atomic_inc(&rt->u.dst.refcnt); + end_bh_atomic(); + + rt->u.dst.lastuse = jiffies; + if (rt->u.dst.error == 0) + return rt; + dst_release(&rt->u.dst); + return NULL; +} + +static int rt6_ins(struct rt6_info *rt) +{ + int err; + + start_bh_atomic(); + err = fib6_add(&ip6_routing_table, rt); + end_bh_atomic(); + + return err; +} + +static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr, + struct in6_addr *saddr) +{ + int err; + struct rt6_info *rt; + + /* + * Clone the route. + */ + + rt = ip6_rt_copy(ort); + + if (rt) { + ipv6_addr_copy(&rt->rt6i_dst.addr, daddr); + + if (!(rt->rt6i_flags&RTF_GATEWAY)) + ipv6_addr_copy(&rt->rt6i_gateway, daddr); + + rt->rt6i_dst.plen = 128; + rt->rt6i_flags |= RTF_CACHE; + +#ifdef CONFIG_IPV6_SUBTREES + if (rt->rt6i_src.plen && saddr) { + ipv6_addr_copy(&rt->rt6i_src.addr, saddr); + rt->rt6i_src.plen = 128; + } +#endif + + rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); + + dst_clone(&rt->u.dst); + err = rt6_ins(rt); + if (err == 0) + return rt; + rt->u.dst.error = err; + return rt; + } + dst_clone(&ip6_null_entry.u.dst); + return &ip6_null_entry; +} + +#ifdef CONFIG_RT6_POLICY +static __inline__ struct rt6_info *rt6_flow_lookup_in(struct rt6_info *rt, + struct sk_buff *skb) +{ + struct in6_addr *daddr, *saddr; + struct fl_acc_args arg; + + arg.type = FL_ARG_FORWARD; + arg.fl_u.skb = skb; + + saddr = &skb->nh.ipv6h->saddr; + daddr = &skb->nh.ipv6h->daddr; + + return rt6_flow_lookup(rt, daddr, saddr, &arg); +} + +static __inline__ struct rt6_info *rt6_flow_lookup_out(struct rt6_info *rt, + struct sock *sk, + struct flowi *fl) +{ + struct fl_acc_args arg; + + arg.type = FL_ARG_ORIGIN; + arg.fl_u.fl_o.sk = sk; + arg.fl_u.fl_o.flow = fl; + + return rt6_flow_lookup(rt, fl->nl_u.ip6_u.daddr, fl->nl_u.ip6_u.saddr, + &arg); +} + +#endif + +#define BACKTRACK() \ +if (rt == &ip6_null_entry && strict) { \ + while ((fn = fn->parent) != NULL) { \ + if (fn->fn_flags & RTN_ROOT) { \ + dst_clone(&rt->u.dst); \ + goto out; \ + } \ + if (fn->fn_flags & RTN_RTINFO) \ + goto restart; \ + } \ +} + + +void ip6_route_input(struct sk_buff *skb) +{ + struct fib6_node *fn; + struct rt6_info *rt; + int strict; + + strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL); + + fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr, + &skb->nh.ipv6h->saddr); + +restart: + rt = fn->leaf; + + if ((rt->rt6i_flags & RTF_CACHE)) { + if (ip6_rt_policy == 0) { + rt = rt6_device_match(rt, skb->dev->ifindex, strict); + BACKTRACK(); + dst_clone(&rt->u.dst); + goto out; + } + +#ifdef CONFIG_RT6_POLICY + if ((rt->rt6i_flags & RTF_FLOW)) { + struct rt6_info *sprt; + + for (sprt = rt; sprt; sprt = sprt->u.next) { + if (rt6_flow_match_in(sprt, skb)) { + rt = sprt; + dst_clone(&rt->u.dst); + goto out; + } + } + } +#endif + } + + rt = rt6_device_match(rt, skb->dev->ifindex, 0); + BACKTRACK(); + + if (ip6_rt_policy == 0) { + if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) { + rt = rt6_cow(rt, &skb->nh.ipv6h->daddr, + &skb->nh.ipv6h->saddr); + goto out; + } + dst_clone(&rt->u.dst); + } else { +#ifdef CONFIG_RT6_POLICY + rt = rt6_flow_lookup_in(rt, skb); +#else + /* NEVER REACHED */ +#endif + } + +out: + rt->u.dst.lastuse = jiffies; + atomic_inc(&rt->u.dst.refcnt); + skb->dst = (struct dst_entry *) rt; +} + +struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl) +{ + struct fib6_node *fn; + struct rt6_info *rt; + int strict; + + strict = ipv6_addr_type(fl->nl_u.ip6_u.daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL); + + start_bh_atomic(); + fn = fib6_lookup(&ip6_routing_table, fl->nl_u.ip6_u.daddr, + fl->nl_u.ip6_u.saddr); + +restart: + rt = fn->leaf; + + if ((rt->rt6i_flags & RTF_CACHE)) { + if (ip6_rt_policy == 0) { + rt = rt6_device_match(rt, fl->oif, strict); + BACKTRACK(); + dst_clone(&rt->u.dst); + goto out; + } + +#ifdef CONFIG_RT6_POLICY + if ((rt->rt6i_flags & RTF_FLOW)) { + struct rt6_info *sprt; + + for (sprt = rt; sprt; sprt = sprt->u.next) { + if (rt6_flow_match_out(sprt, sk)) { + rt = sprt; + dst_clone(&rt->u.dst); + goto out; + } + } + } +#endif + } + if (rt->rt6i_flags & RTF_DEFAULT) { + if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF) + rt = rt6_best_dflt(rt, fl->oif); + } else { + rt = rt6_device_match(rt, fl->oif, strict); + BACKTRACK(); + } + + if (ip6_rt_policy == 0) { + if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) { + rt = rt6_cow(rt, fl->nl_u.ip6_u.daddr, + fl->nl_u.ip6_u.saddr); + goto out; + } + dst_clone(&rt->u.dst); + } else { +#ifdef CONFIG_RT6_POLICY + rt = rt6_flow_lookup_out(rt, sk, fl); +#else + /* NEVER REACHED */ +#endif + } + +out: + rt->u.dst.lastuse = jiffies; + atomic_inc(&rt->u.dst.refcnt); + end_bh_atomic(); + return &rt->u.dst; +} + + +/* + * Destination cache support functions + */ + +static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) +{ + struct rt6_info *rt; + + rt = (struct rt6_info *) dst; + + if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) + return dst; + + dst_release(dst); + return NULL; +} + +static struct dst_entry *ip6_dst_reroute(struct dst_entry *dst, struct sk_buff *skb) +{ + /* + * FIXME + */ + RDBG(("ip6_dst_reroute(%p,%p)[%p] (AIEEE)\n", dst, skb, + __builtin_return_address(0))); + return NULL; +} + +static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) +{ + struct rt6_info *rt = (struct rt6_info *) dst; + + if (rt) { + if (rt->rt6i_flags & RTF_CACHE) + ip6_del_rt(rt); + dst_release(dst); + } + return NULL; +} + +static void ip6_link_failure(struct sk_buff *skb) +{ + struct rt6_info *rt; + + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev); + + rt = (struct rt6_info *) skb->dst; + if (rt) { + if (rt->rt6i_flags&RTF_CACHE) { + dst_set_expires(&rt->u.dst, 0); + rt->rt6i_flags |= RTF_EXPIRES; + } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) + rt->rt6i_node->fn_sernum = -1; + } +} + +static int ip6_dst_gc() +{ + static unsigned expire = 30*HZ; + static unsigned long last_gc; + unsigned long now = jiffies; + + start_bh_atomic(); + if ((long)(now - last_gc) < ip6_rt_gc_min_interval) + goto out; + + expire++; + fib6_run_gc(expire); + last_gc = now; + if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh) + expire = ip6_rt_gc_timeout>>1; + +out: + expire -= expire>>ip6_rt_gc_elasticity; + end_bh_atomic(); + return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size); +} + +/* Clean host part of a prefix. Not necessary in radix tree, + but results in cleaner routing tables. + + Remove it only when all the things will work! + */ + +static void ipv6_wash_prefix(struct in6_addr *pfx, int plen) +{ + int b = plen&0x7; + int o = (plen + 7)>>3; + + if (o < 16) + memset(pfx->s6_addr + o, 0, 16 - o); + if (b != 0) + pfx->s6_addr[plen>>3] &= (0xFF<<(8-b)); +} + +static int ipv6_get_mtu(struct device *dev) +{ + struct inet6_dev *idev; + + idev = ipv6_get_idev(dev); + if (idev) + return idev->cnf.mtu6; + else + return IPV6_MIN_MTU; +} + +static int ipv6_get_hoplimit(struct device *dev) +{ + struct inet6_dev *idev; + + idev = ipv6_get_idev(dev); + if (idev) + return idev->cnf.hop_limit; + else + return ipv6_devconf.hop_limit; +} + +/* + * + */ + +int ip6_route_add(struct in6_rtmsg *rtmsg) +{ + int err; + struct rt6_info *rt; + struct device *dev = NULL; + int addr_type; + + if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128) + return -EINVAL; +#ifndef CONFIG_IPV6_SUBTREES + if (rtmsg->rtmsg_src_len) + return -EINVAL; +#endif + if (rtmsg->rtmsg_metric == 0) + rtmsg->rtmsg_metric = IP6_RT_PRIO_USER; + + rt = dst_alloc(sizeof(struct rt6_info), &ip6_dst_ops); + + if (rt == NULL) + return -ENOMEM; + + rt->u.dst.obsolete = -1; + rt->rt6i_expires = rtmsg->rtmsg_info; + + addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst); + + if (addr_type & IPV6_ADDR_MULTICAST) + rt->u.dst.input = ip6_mc_input; + else + rt->u.dst.input = ip6_forward; + + rt->u.dst.output = ip6_output; + + if (rtmsg->rtmsg_ifindex) { + dev = dev_get_by_index(rtmsg->rtmsg_ifindex); + err = -ENODEV; + if (dev == NULL) + goto out; + } + + ipv6_addr_copy(&rt->rt6i_dst.addr, &rtmsg->rtmsg_dst); + rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len; + ipv6_wash_prefix(&rt->rt6i_dst.addr, rt->rt6i_dst.plen); + +#ifdef CONFIG_IPV6_SUBTREES + ipv6_addr_copy(&rt->rt6i_src.addr, &rtmsg->rtmsg_src); + rt->rt6i_src.plen = rtmsg->rtmsg_src_len; + ipv6_wash_prefix(&rt->rt6i_src.addr, rt->rt6i_src.plen); +#endif + + rt->rt6i_metric = rtmsg->rtmsg_metric; + + /* We cannot add true routes via loopback here, + they would result in kernel looping; promote them to reject routes + */ + if ((rtmsg->rtmsg_flags&RTF_REJECT) || + (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) { + dev = &loopback_dev; + rt->u.dst.output = ip6_pkt_discard; + rt->u.dst.input = ip6_pkt_discard; + rt->u.dst.error = -ENETUNREACH; + rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP; + goto install_route; + } + + if (rtmsg->rtmsg_flags & RTF_GATEWAY) { + struct in6_addr *gw_addr; + int gwa_type; + + gw_addr = &rtmsg->rtmsg_gateway; + ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway); + gwa_type = ipv6_addr_type(gw_addr); + + if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { + struct rt6_info *grt; + + /* IPv6 strictly inhibits using not link-local + addresses as nexthop address. + Otherwise, router will not able to send redirects. + It is very good, but in some (rare!) curcumstances + (SIT, PtP, NBMA NOARP links) it is handy to allow + some exceptions. --ANK + */ + err = -EINVAL; + if (!(gwa_type&IPV6_ADDR_UNICAST)) + goto out; + + grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1); + + err = -EHOSTUNREACH; + if (grt == NULL) + goto out; + if (!(grt->rt6i_flags&RTF_GATEWAY)) + err = 0; + dev = grt->rt6i_dev; + dst_release(&grt->u.dst); + + if (err) + goto out; + } + err = -EINVAL; + if (dev == NULL || (dev->flags&IFF_LOOPBACK)) + goto out; + } + + err = -ENODEV; + if (dev == NULL) + goto out; + + if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) { + rt->rt6i_nexthop = ndisc_get_neigh(dev, &rt->rt6i_gateway); + err = -ENOMEM; + if (rt->rt6i_nexthop == NULL) + goto out; + } + + if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) + rt->rt6i_hoplimit = IPV6_DEFAULT_MCASTHOPS; + else + rt->rt6i_hoplimit = ipv6_get_hoplimit(dev); + rt->rt6i_flags = rtmsg->rtmsg_flags; + +install_route: + rt->u.dst.pmtu = ipv6_get_mtu(dev); + rt->u.dst.rtt = TCP_TIMEOUT_INIT; + rt->rt6i_dev = dev; + return rt6_ins(rt); + +out: + dst_free((struct dst_entry *) rt); + return err; +} + +int ip6_del_rt(struct rt6_info *rt) +{ + int err; + + start_bh_atomic(); + rt6_dflt_pointer = NULL; + err = fib6_del(rt); + end_bh_atomic(); + + return err; +} + +int ip6_route_del(struct in6_rtmsg *rtmsg) +{ + struct fib6_node *fn; + struct rt6_info *rt; + int err = -ESRCH; + + start_bh_atomic(); + + fn = fib6_locate(&ip6_routing_table, + &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len, + &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len); + + if (fn) { + for (rt = fn->leaf; rt; rt = rt->u.next) { + if (rtmsg->rtmsg_ifindex && + (rt->rt6i_dev == NULL || + rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex)) + continue; + if (rtmsg->rtmsg_flags&RTF_GATEWAY && + ipv6_addr_cmp(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway)) + continue; + if (rtmsg->rtmsg_metric && + rtmsg->rtmsg_metric != rt->rt6i_metric) + continue; + err = ip6_del_rt(rt); + break; + } + } + end_bh_atomic(); + + return err; +} + +#ifdef CONFIG_IPV6_NETLINK +/* + * NETLINK interface + * routing socket moral equivalent + */ + +static int rt6_msgrcv(int unit, struct sk_buff *skb) +{ + int count = 0; + struct in6_rtmsg *rtmsg; + int err; + + rtnl_lock(); + while (skb->len) { + if (skb->len < sizeof(struct in6_rtmsg)) { + count = -EINVAL; + goto out; + } + + rtmsg = (struct in6_rtmsg *) skb->data; + skb_pull(skb, sizeof(struct in6_rtmsg)); + count += sizeof(struct in6_rtmsg); + + switch (rtmsg->rtmsg_type) { + case RTMSG_NEWROUTE: + err = ip6_route_add(rtmsg); + break; + case RTMSG_DELROUTE: + err = ip6_route_del(rtmsg); + break; + default: + count = -EINVAL; + goto out; + }; + } + +out: + rtnl_unlock(); + kfree_skb(skb); + return count; +} + +static void rt6_sndrtmsg(struct in6_rtmsg *rtmsg) +{ + struct sk_buff *skb; + + skb = alloc_skb(sizeof(struct in6_rtmsg), GFP_ATOMIC); + if (skb == NULL) + return; + + memcpy(skb_put(skb, sizeof(struct in6_rtmsg)), &rtmsg, + sizeof(struct in6_rtmsg)); + + if (netlink_post(NETLINK_ROUTE6, skb)) + kfree_skb(skb); +} + +void rt6_sndmsg(int type, struct in6_addr *dst, struct in6_addr *src, + struct in6_addr *gw, struct device *dev, + int dstlen, int srclen, int metric, __u32 flags) +{ + struct sk_buff *skb; + struct in6_rtmsg *msg; + + skb = alloc_skb(sizeof(struct in6_rtmsg), GFP_ATOMIC); + if (skb == NULL) + return; + + msg = (struct in6_rtmsg *) skb_put(skb, sizeof(struct in6_rtmsg)); + + memset(msg, 0, sizeof(struct in6_rtmsg)); + + msg->rtmsg_type = type; + + if (dst) + ipv6_addr_copy(&msg->rtmsg_dst, dst); + + if (src) { + ipv6_addr_copy(&msg->rtmsg_src, src); + msg->rtmsg_src_len = srclen; + } + + if (gw) + ipv6_addr_copy(&msg->rtmsg_gateway, gw); + + msg->rtmsg_dst_len = dstlen; + msg->rtmsg_metric = metric; + + if (dev) + msg->rtmsg_ifindex = dev->ifindex; + + msg->rtmsg_flags = flags; + + if (netlink_post(NETLINK_ROUTE6, skb)) + kfree_skb(skb); +} +#endif /* CONFIG_IPV6_NETLINK */ + +/* + * Handle redirects + */ +void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr, + struct neighbour *neigh, int on_link) +{ + struct rt6_info *rt, *nrt; + + /* Locate old route to this destination. */ + rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1); + + if (rt == NULL) + return; + + if (neigh->dev != rt->rt6i_dev) + goto out; + + /* Redirect received -> path was valid. + Look, redirects are sent only in response to data packets, + so that this nexthop apparently is reachable. --ANK + */ + dst_confirm(&rt->u.dst); + + /* Duplicate redirect: silently ignore. */ + if (neigh == rt->u.dst.neighbour) + goto out; + + /* Current route is on-link; redirect is always invalid. + + Seems, previous statement is not true. It could + be node, which looks for us as on-link (f.e. proxy ndisc) + But then router serving it might decide, that we should + know truth 8)8) --ANK (980726). + */ + if (!(rt->rt6i_flags&RTF_GATEWAY)) + goto out; + +#if !defined(CONFIG_IPV6_EUI64) || defined(CONFIG_IPV6_NO_PB) + /* + * During transition gateways have more than + * one link local address. Certainly, it is violation + * of basic principles, but it is temparary. + */ + /* + * RFC 1970 specifies that redirects should only be + * accepted if they come from the nexthop to the target. + * Due to the way default routers are chosen, this notion + * is a bit fuzzy and one might need to check all default + * routers. + */ + + if (ipv6_addr_cmp(saddr, &rt->rt6i_gateway)) { + if (rt->rt6i_flags & RTF_DEFAULT) { + struct rt6_info *rt1; + + for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) { + if (!ipv6_addr_cmp(saddr, &rt1->rt6i_gateway)) { + dst_clone(&rt1->u.dst); + dst_release(&rt->u.dst); + rt = rt1; + goto source_ok; + } + } + } + if (net_ratelimit()) + printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop " + "for redirect target\n"); + goto out; + } + +source_ok: +#endif + + /* + * We have finally decided to accept it. + */ + + nrt = ip6_rt_copy(rt); + if (nrt == NULL) + goto out; + + nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE; + if (on_link) + nrt->rt6i_flags &= ~RTF_GATEWAY; + + ipv6_addr_copy(&nrt->rt6i_dst.addr, dest); + nrt->rt6i_dst.plen = 128; + + ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key); + nrt->rt6i_nexthop = neigh_clone(neigh); + /* Reset pmtu, it may be better */ + nrt->u.dst.pmtu = ipv6_get_mtu(neigh->dev); + nrt->rt6i_hoplimit = ipv6_get_hoplimit(neigh->dev); + + if (rt6_ins(nrt)) + goto out; + + /* Sic! rt6_redirect is called by bh, so that it is allowed */ + dst_release(&rt->u.dst); + if (rt->rt6i_flags&RTF_CACHE) + ip6_del_rt(rt); + return; + +out: + dst_release(&rt->u.dst); + return; +} + +/* + * Handle ICMP "packet too big" messages + * i.e. Path MTU discovery + */ + +void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, + struct device *dev, u32 pmtu) +{ + struct rt6_info *rt, *nrt; + + if (pmtu < IPV6_MIN_MTU) { + if (net_ratelimit()) + printk(KERN_DEBUG "rt6_pmtu_discovery: invalid MTU value %d\n", + pmtu); + return; + } + + rt = rt6_lookup(daddr, saddr, dev->ifindex, 0); + + if (rt == NULL) + return; + + if (pmtu >= rt->u.dst.pmtu) + goto out; + + /* New mtu received -> path was valid. + They are sent only in response to data packets, + so that this nexthop apparently is reachable. --ANK + */ + dst_confirm(&rt->u.dst); + + /* Host route. If it is static, it would be better + not to override it, but add new one, so that + when cache entry will expire old pmtu + would return automatically. + */ + if (rt->rt6i_flags & RTF_CACHE) { + rt->u.dst.pmtu = pmtu; + dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires); + rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES; + goto out; + } + + /* Network route. + Two cases are possible: + 1. It is connected route. Action: COW + 2. It is gatewayed route or NONEXTHOP route. Action: clone it. + */ + if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) { + nrt = rt6_cow(rt, daddr, saddr); + if (!nrt->u.dst.error) { + nrt->u.dst.pmtu = pmtu; + dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires); + nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES; + dst_release(&nrt->u.dst); + } + } else { + nrt = ip6_rt_copy(rt); + if (nrt == NULL) + goto out; + ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr); + nrt->rt6i_dst.plen = 128; + nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop); + dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires); + nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES; + nrt->u.dst.pmtu = pmtu; + rt6_ins(nrt); + } + +out: + dst_release(&rt->u.dst); +} + +/* + * Misc support functions + */ + +static struct rt6_info * ip6_rt_copy(struct rt6_info *ort) +{ + struct rt6_info *rt; + + rt = dst_alloc(sizeof(struct rt6_info), &ip6_dst_ops); + + if (rt) { + rt->u.dst.input = ort->u.dst.input; + rt->u.dst.output = ort->u.dst.output; + + rt->u.dst.pmtu = ort->u.dst.pmtu; + rt->u.dst.rtt = ort->u.dst.rtt; + rt->u.dst.window = ort->u.dst.window; + rt->u.dst.mxlock = ort->u.dst.mxlock; + rt->u.dst.dev = ort->u.dst.dev; + rt->u.dst.lastuse = jiffies; + rt->rt6i_hoplimit = ort->rt6i_hoplimit; + rt->rt6i_expires = 0; + + ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway); + rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES; + rt->rt6i_metric = 0; + + memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); +#ifdef CONFIG_IPV6_SUBTREES + memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); +#endif + } + return rt; +} + +struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct device *dev) +{ + struct rt6_info *rt; + struct fib6_node *fn; + + fn = &ip6_routing_table; + + start_bh_atomic(); + for (rt = fn->leaf; rt; rt=rt->u.next) { + if (dev == rt->rt6i_dev && + ipv6_addr_cmp(&rt->rt6i_gateway, addr) == 0) + break; + } + if (rt) + dst_clone(&rt->u.dst); + end_bh_atomic(); + return rt; +} + +struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr, + struct device *dev) +{ + struct in6_rtmsg rtmsg; + + memset(&rtmsg, 0, sizeof(struct in6_rtmsg)); + rtmsg.rtmsg_type = RTMSG_NEWROUTE; + ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr); + rtmsg.rtmsg_metric = 1024; + rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP; + + rtmsg.rtmsg_ifindex = dev->ifindex; + + ip6_route_add(&rtmsg); + return rt6_get_dflt_router(gwaddr, dev); +} + +void rt6_purge_dflt_routers(int last_resort) +{ + struct rt6_info *rt; + u32 flags; + + if (last_resort) + flags = RTF_ALLONLINK; + else + flags = RTF_DEFAULT | RTF_ADDRCONF; + +restart: + rt6_dflt_pointer = NULL; + + for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) { + if (rt->rt6i_flags & flags) { + ip6_del_rt(rt); + goto restart; + } + } +} + +int ipv6_route_ioctl(unsigned int cmd, void *arg) +{ + struct in6_rtmsg rtmsg; + int err; + + RDBG(("ipv6_route_ioctl(%d,%p)\n", cmd, arg)); + switch(cmd) { + case SIOCADDRT: /* Add a route */ + case SIOCDELRT: /* Delete a route */ + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + err = copy_from_user(&rtmsg, arg, + sizeof(struct in6_rtmsg)); + if (err) + return -EFAULT; + + rtnl_lock(); + switch (cmd) { + case SIOCADDRT: + err = ip6_route_add(&rtmsg); + break; + case SIOCDELRT: + err = ip6_route_del(&rtmsg); + break; + default: + err = -EINVAL; + }; + rtnl_unlock(); + +#ifdef CONFIG_IPV6_NETLINK + if (err == 0) + rt6_sndrtmsg(&rtmsg); +#endif + return err; + }; + + return -EINVAL; +} + +/* + * Drop the packet on the floor + */ + +int ip6_pkt_discard(struct sk_buff *skb) +{ + ipv6_statistics.Ip6OutNoRoutes++; + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev); + kfree_skb(skb); + return 0; +} + +/* + * Add address + */ + +int ip6_rt_addr_add(struct in6_addr *addr, struct device *dev) +{ + struct rt6_info *rt; + + rt = dst_alloc(sizeof(struct rt6_info), &ip6_dst_ops); + if (rt == NULL) + return -ENOMEM; + + rt->u.dst.input = ip6_input; + rt->u.dst.output = ip6_output; + rt->rt6i_dev = dev_get("lo"); + rt->u.dst.rtt = TCP_TIMEOUT_INIT; + rt->u.dst.pmtu = ipv6_get_mtu(rt->rt6i_dev); + rt->rt6i_hoplimit = ipv6_get_hoplimit(rt->rt6i_dev); + rt->u.dst.obsolete = -1; + + rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; + rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); + if (rt->rt6i_nexthop == NULL) { + dst_free((struct dst_entry *) rt); + return -ENOMEM; + } + + ipv6_addr_copy(&rt->rt6i_dst.addr, addr); + rt->rt6i_dst.plen = 128; + rt6_ins(rt); + + return 0; +} + +/* Delete address. Warning: you should check that this address + disappeared before calling this function. + */ + +int ip6_rt_addr_del(struct in6_addr *addr, struct device *dev) +{ + struct rt6_info *rt; + int err = -ENOENT; + + rt = rt6_lookup(addr, NULL, loopback_dev.ifindex, 1); + if (rt) { + if (rt->rt6i_dst.plen == 128) + err= ip6_del_rt(rt); + dst_release(&rt->u.dst); + } + + return err; +} + +#ifdef CONFIG_RT6_POLICY + +static int rt6_flow_match_in(struct rt6_info *rt, struct sk_buff *skb) +{ + struct flow_filter *frule; + struct pkt_filter *filter; + int res = 1; + + if ((frule = rt->rt6i_filter) == NULL) + goto out; + + if (frule->type != FLR_INPUT) { + res = 0; + goto out; + } + + for (filter = frule->u.filter; filter; filter = filter->next) { + __u32 *word; + + word = (__u32 *) skb->h.raw; + word += filter->offset; + + if ((*word ^ filter->value) & filter->mask) { + res = 0; + break; + } + } + +out: + return res; +} + +static int rt6_flow_match_out(struct rt6_info *rt, struct sock *sk) +{ + struct flow_filter *frule; + int res = 1; + + if ((frule = rt->rt6i_filter) == NULL) + goto out; + + if (frule->type != FLR_INPUT) { + res = 0; + goto out; + } + + if (frule->u.sk != sk) + res = 0; +out: + return res; +} + +static struct rt6_info *rt6_flow_lookup(struct rt6_info *rt, + struct in6_addr *daddr, + struct in6_addr *saddr, + struct fl_acc_args *args) +{ + struct flow_rule *frule; + struct rt6_info *nrt = NULL; + struct pol_chain *pol; + + for (pol = rt6_pol_list; pol; pol = pol->next) { + struct fib6_node *fn; + struct rt6_info *sprt; + + fn = fib6_lookup(pol->rules, daddr, saddr); + + do { + for (sprt = fn->leaf; sprt; sprt=sprt->u.next) { + int res; + + frule = sprt->rt6i_flowr; +#if RT6_DEBUG >= 2 + if (frule == NULL) { + printk(KERN_DEBUG "NULL flowr\n"); + goto error; + } +#endif + res = frule->ops->accept(rt, sprt, args, &nrt); + + switch (res) { + case FLOWR_SELECT: + goto found; + case FLOWR_CLEAR: + goto next_policy; + case FLOWR_NODECISION: + break; + default: + goto error; + }; + } + + fn = fn->parent; + + } while ((fn->fn_flags & RTN_TL_ROOT) == 0); + + next_policy: + } + +error: + dst_clone(&ip6_null_entry.u.dst); + return &ip6_null_entry; + +found: + if (nrt == NULL) + goto error; + + nrt->rt6i_flags |= RTF_CACHE; + dst_clone(&nrt->u.dst); + err = rt6_ins(nrt); + if (err) + nrt->u.dst.error = err; + return nrt; +} +#endif + +static int fib6_ifdown(struct rt6_info *rt, void *arg) +{ + if (((void*)rt->rt6i_dev == arg || arg == NULL) && + rt != &ip6_null_entry) { + RT6_TRACE("deleted by ifdown %p\n", rt); + return -1; + } + return 0; +} + +void rt6_ifdown(struct device *dev) +{ + fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev); +} + +struct rt6_mtu_change_arg +{ + struct device *dev; + unsigned mtu; +}; + +static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) +{ + struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg; + + /* In IPv6 pmtu discovery is not optional, + so that RTAX_MTU lock cannot dissable it. + We still use this lock to block changes + caused by addrconf/ndisc. + */ + if (rt->rt6i_dev == arg->dev && + !(rt->u.dst.mxlock&(1<<RTAX_MTU))) + rt->u.dst.pmtu = arg->mtu; + return 0; +} + +void rt6_mtu_change(struct device *dev, unsigned mtu) +{ + struct rt6_mtu_change_arg arg; + + arg.dev = dev; + arg.mtu = mtu; + fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg); +} + +#ifdef CONFIG_RTNETLINK + +static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta, + struct in6_rtmsg *rtmsg) +{ + memset(rtmsg, 0, sizeof(*rtmsg)); + + rtmsg->rtmsg_dst_len = r->rtm_dst_len; + rtmsg->rtmsg_src_len = r->rtm_src_len; + rtmsg->rtmsg_flags = RTF_UP; + if (r->rtm_type == RTN_UNREACHABLE) + rtmsg->rtmsg_flags |= RTF_REJECT; + + if (rta[RTA_GATEWAY-1]) { + if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16)) + return -EINVAL; + memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16); + rtmsg->rtmsg_flags |= RTF_GATEWAY; + } + if (rta[RTA_DST-1]) { + if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3)) + return -EINVAL; + memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3)); + } + if (rta[RTA_SRC-1]) { + if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3)) + return -EINVAL; + memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3)); + } + if (rta[RTA_OIF-1]) { + if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int))) + return -EINVAL; + memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int)); + } + if (rta[RTA_PRIORITY-1]) { + if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4)) + return -EINVAL; + memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4); + } + return 0; +} + +int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +{ + struct rtmsg *r = NLMSG_DATA(nlh); + struct in6_rtmsg rtmsg; + + if (inet6_rtm_to_rtmsg(r, arg, &rtmsg)) + return -EINVAL; + return ip6_route_del(&rtmsg); +} + +int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +{ + struct rtmsg *r = NLMSG_DATA(nlh); + struct in6_rtmsg rtmsg; + + if (inet6_rtm_to_rtmsg(r, arg, &rtmsg)) + return -EINVAL; + return ip6_route_add(&rtmsg); +} + +struct rt6_rtnl_dump_arg +{ + struct sk_buff *skb; + struct netlink_callback *cb; +}; + +static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, + struct in6_addr *dst, + struct in6_addr *src, + int iif, + int type, u32 pid, u32 seq) +{ + struct rtmsg *rtm; + struct nlmsghdr *nlh; + unsigned char *b = skb->tail; + struct rtattr *mx; + struct rta_cacheinfo ci; + + nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*rtm)); + rtm = NLMSG_DATA(nlh); + rtm->rtm_family = AF_INET6; + rtm->rtm_dst_len = rt->rt6i_dst.plen; + rtm->rtm_src_len = rt->rt6i_src.plen; + rtm->rtm_tos = 0; + rtm->rtm_table = RT_TABLE_MAIN; + if (rt->rt6i_flags&RTF_REJECT) + rtm->rtm_type = RTN_UNREACHABLE; + else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK)) + rtm->rtm_type = RTN_LOCAL; + else + rtm->rtm_type = RTN_UNICAST; + rtm->rtm_flags = 0; + rtm->rtm_scope = RT_SCOPE_UNIVERSE; + rtm->rtm_protocol = RTPROT_BOOT; + if (rt->rt6i_flags&RTF_DYNAMIC) + rtm->rtm_protocol = RTPROT_REDIRECT; + else if (rt->rt6i_flags&(RTF_ADDRCONF|RTF_ALLONLINK)) + rtm->rtm_protocol = RTPROT_KERNEL; + else if (rt->rt6i_flags&RTF_DEFAULT) + rtm->rtm_protocol = RTPROT_RA; + + if (rt->rt6i_flags&RTF_CACHE) + rtm->rtm_flags |= RTM_F_CLONED; + + if (dst) { + RTA_PUT(skb, RTA_DST, 16, dst); + rtm->rtm_dst_len = 128; + } else if (rtm->rtm_dst_len) + RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr); +#ifdef CONFIG_IPV6_SUBTREES + if (src) { + RTA_PUT(skb, RTA_SRC, 16, src); + rtm->rtm_src_len = 128; + } else if (rtm->rtm_src_len) + RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr); +#endif + if (iif) + RTA_PUT(skb, RTA_IIF, 4, &iif); + else if (dst) { + struct in6_addr saddr_buf; + if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf)) + RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); + } + mx = (struct rtattr*)skb->tail; + RTA_PUT(skb, RTA_METRICS, 0, NULL); + if (rt->u.dst.mxlock) + RTA_PUT(skb, RTAX_LOCK, sizeof(unsigned), &rt->u.dst.mxlock); + if (rt->u.dst.pmtu) + RTA_PUT(skb, RTAX_MTU, sizeof(unsigned), &rt->u.dst.pmtu); + if (rt->u.dst.window) + RTA_PUT(skb, RTAX_WINDOW, sizeof(unsigned), &rt->u.dst.window); + if (rt->u.dst.rtt) + RTA_PUT(skb, RTAX_RTT, sizeof(unsigned), &rt->u.dst.rtt); + mx->rta_len = skb->tail - (u8*)mx; + if (mx->rta_len == RTA_LENGTH(0)) + skb_trim(skb, (u8*)mx - skb->data); + if (rt->u.dst.neighbour) + RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key); + if (rt->u.dst.dev) + RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex); + RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric); + ci.rta_lastuse = jiffies - rt->u.dst.lastuse; + if (rt->rt6i_expires) + ci.rta_expires = rt->rt6i_expires - jiffies; + else + ci.rta_expires = 0; + ci.rta_used = atomic_read(&rt->u.dst.refcnt); + ci.rta_clntref = atomic_read(&rt->u.dst.use); + ci.rta_error = rt->u.dst.error; + RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci); + nlh->nlmsg_len = skb->tail - b; + return skb->len; + +nlmsg_failure: +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; +} + +static int rt6_dump_route(struct rt6_info *rt, void *p_arg) +{ + struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg; + + return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, + NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq); +} + +static int fib6_dump_node(struct fib6_walker_t *w) +{ + int res; + struct rt6_info *rt; + + for (rt = w->leaf; rt; rt = rt->u.next) { + res = rt6_dump_route(rt, w->args); + if (res < 0) { + /* Frame is full, suspend walking */ + w->leaf = rt; + return 1; + } + BUG_TRAP(res!=0); + } + w->leaf = NULL; + return 0; +} + +static int fib6_dump_done(struct netlink_callback *cb) +{ + struct fib6_walker_t *w = (void*)cb->args[0]; + + if (w) { + cb->args[0] = 0; + start_bh_atomic(); + fib6_walker_unlink(w); + end_bh_atomic(); + kfree(w); + } + if (cb->args[1]) { + cb->done = (void*)cb->args[1]; + cb->args[1] = 0; + } + return cb->done(cb); +} + +int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct rt6_rtnl_dump_arg arg; + struct fib6_walker_t *w; + int res; + + arg.skb = skb; + arg.cb = cb; + + w = (void*)cb->args[0]; + if (w == NULL) { + /* New dump: + * + * 1. hook callback destructor. + */ + cb->args[1] = (long)cb->done; + cb->done = fib6_dump_done; + + /* + * 2. allocate and initialize walker. + */ + w = kmalloc(sizeof(*w), GFP_KERNEL); + if (w == NULL) + return -ENOMEM; + RT6_TRACE("dump<%p", w); + memset(w, 0, sizeof(*w)); + w->root = &ip6_routing_table; + w->func = fib6_dump_node; + w->args = &arg; + cb->args[0] = (long)w; + start_bh_atomic(); + res = fib6_walk(w); + end_bh_atomic(); + } else { + w->args = &arg; + start_bh_atomic(); + res = fib6_walk_continue(w); + end_bh_atomic(); + } +#if RT6_DEBUG >= 3 + if (res <= 0 && skb->len == 0) + RT6_TRACE("%p>dump end\n", w); +#endif + /* res < 0 is an error. (really, impossible) + res == 0 means that dump is complete, but skb still can contain data. + res > 0 dump is not complete, but frame is full. + */ + return res < 0 ? res : skb->len; +} + +int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) +{ + struct rtattr **rta = arg; + int iif = 0; + int err; + struct sk_buff *skb; + struct flowi fl; + struct rt6_info *rt; + + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (skb == NULL) + return -ENOBUFS; + + /* Reserve room for dummy headers, this skb can pass + through good chunk of routing engine. + */ + skb->mac.raw = skb->data; + skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); + + fl.proto = 0; + fl.nl_u.ip6_u.daddr = NULL; + fl.nl_u.ip6_u.saddr = NULL; + fl.uli_u.icmpt.type = 0; + fl.uli_u.icmpt.code = 0; + if (rta[RTA_SRC-1]) + fl.nl_u.ip6_u.saddr = (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]); + if (rta[RTA_DST-1]) + fl.nl_u.ip6_u.daddr = (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]); + + if (rta[RTA_IIF-1]) + memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int)); + + if (iif) { + struct device *dev; + dev = dev_get_by_index(iif); + if (!dev) + return -ENODEV; + } + + fl.oif = 0; + if (rta[RTA_OIF-1]) + memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int)); + + rt = (struct rt6_info*)ip6_route_output(NULL, &fl); + + skb->dst = &rt->u.dst; + + NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; + err = rt6_fill_node(skb, rt, + fl.nl_u.ip6_u.daddr, + fl.nl_u.ip6_u.saddr, + iif, + RTM_NEWROUTE, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq); + if (err < 0) + return -EMSGSIZE; + + err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); + if (err < 0) + return err; + return 0; +} + +void inet6_rt_notify(int event, struct rt6_info *rt) +{ + struct sk_buff *skb; + int size = NLMSG_SPACE(sizeof(struct rtmsg)+256); + + skb = alloc_skb(size, gfp_any()); + if (!skb) { + netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS); + return; + } + if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, 0, 0) < 0) { + kfree_skb(skb); + netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL); + return; + } + NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_ROUTE; + netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_ROUTE, gfp_any()); +} + +#endif + +/* + * /proc + */ + +#ifdef CONFIG_PROC_FS + +#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1) + +struct rt6_proc_arg +{ + char *buffer; + int offset; + int length; + int skip; + int len; +}; + +static int rt6_info_route(struct rt6_info *rt, void *p_arg) +{ + struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg; + int i; + + if (arg->skip < arg->offset / RT6_INFO_LEN) { + arg->skip++; + return 0; + } + + if (arg->len >= arg->length) + return 0; + + for (i=0; i<16; i++) { + sprintf(arg->buffer + arg->len, "%02x", + rt->rt6i_dst.addr.s6_addr[i]); + arg->len += 2; + } + arg->len += sprintf(arg->buffer + arg->len, " %02x ", + rt->rt6i_dst.plen); + +#ifdef CONFIG_IPV6_SUBTREES + for (i=0; i<16; i++) { + sprintf(arg->buffer + arg->len, "%02x", + rt->rt6i_src.addr.s6_addr[i]); + arg->len += 2; + } + arg->len += sprintf(arg->buffer + arg->len, " %02x ", + rt->rt6i_src.plen); +#else + sprintf(arg->buffer + arg->len, + "00000000000000000000000000000000 00 "); + arg->len += 36; +#endif + + if (rt->rt6i_nexthop) { + for (i=0; i<16; i++) { + sprintf(arg->buffer + arg->len, "%02x", + rt->rt6i_nexthop->primary_key[i]); + arg->len += 2; + } + } else { + sprintf(arg->buffer + arg->len, + "00000000000000000000000000000000"); + arg->len += 32; + } + arg->len += sprintf(arg->buffer + arg->len, + " %08x %08x %08x %08x %8s\n", + rt->rt6i_metric, atomic_read(&rt->u.dst.use), + atomic_read(&rt->u.dst.refcnt), rt->rt6i_flags, + rt->rt6i_dev ? rt->rt6i_dev->name : ""); + return 0; +} + +static int rt6_proc_info(char *buffer, char **start, off_t offset, int length, + int dummy) +{ + struct rt6_proc_arg arg; + arg.buffer = buffer; + arg.offset = offset; + arg.length = length; + arg.skip = 0; + arg.len = 0; + + fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg); + + *start = buffer; + if (offset) + *start += offset % RT6_INFO_LEN; + + arg.len -= offset % RT6_INFO_LEN; + + if (arg.len > length) + arg.len = length; + if (arg.len < 0) + arg.len = 0; + + return arg.len; +} + +extern struct rt6_statistics rt6_stats; + +static int rt6_proc_stats(char *buffer, char **start, off_t offset, int length, + int dummy) +{ + int len; + + len = sprintf(buffer, "%04x %04x %04x %04x %04x %04x\n", + rt6_stats.fib_nodes, rt6_stats.fib_route_nodes, + rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries, + rt6_stats.fib_rt_cache, + atomic_read(&ip6_dst_ops.entries)); + + len -= offset; + + if (len > length) + len = length; + if(len < 0) + len = 0; + + *start = buffer + offset; + + return len; +} + +static struct proc_dir_entry proc_rt6_info = { + PROC_NET_RT6, 10, "ipv6_route", + S_IFREG | S_IRUGO, 1, 0, 0, + 0, &proc_net_inode_operations, + rt6_proc_info +}; +static struct proc_dir_entry proc_rt6_stats = { + PROC_NET_RT6_STATS, 9, "rt6_stats", + S_IFREG | S_IRUGO, 1, 0, 0, + 0, &proc_net_inode_operations, + rt6_proc_stats +}; +#endif /* CONFIG_PROC_FS */ + +#ifdef CONFIG_SYSCTL + +static int flush_delay; + +static +int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp, + void *buffer, size_t *lenp) +{ + if (write) { + proc_dointvec(ctl, write, filp, buffer, lenp); + if (flush_delay < 0) + flush_delay = 0; + start_bh_atomic(); + fib6_run_gc((unsigned long)flush_delay); + end_bh_atomic(); + return 0; + } else + return -EINVAL; +} + +ctl_table ipv6_route_table[] = { + {NET_IPV6_ROUTE_FLUSH, "flush", + &flush_delay, sizeof(int), 0644, NULL, + &ipv6_sysctl_rtcache_flush}, + {NET_IPV6_ROUTE_GC_THRESH, "gc_thresh", + &ip6_dst_ops.gc_thresh, sizeof(int), 0644, NULL, + &proc_dointvec}, + {NET_IPV6_ROUTE_MAX_SIZE, "max_size", + &ip6_rt_max_size, sizeof(int), 0644, NULL, + &proc_dointvec}, + {NET_IPV6_ROUTE_GC_MIN_INTERVAL, "gc_min_interval", + &ip6_rt_gc_min_interval, sizeof(int), 0644, NULL, + &proc_dointvec_jiffies}, + {NET_IPV6_ROUTE_GC_TIMEOUT, "gc_timeout", + &ip6_rt_gc_timeout, sizeof(int), 0644, NULL, + &proc_dointvec_jiffies}, + {NET_IPV6_ROUTE_GC_INTERVAL, "gc_interval", + &ip6_rt_gc_interval, sizeof(int), 0644, NULL, + &proc_dointvec_jiffies}, + {NET_IPV6_ROUTE_GC_ELASTICITY, "gc_elasticity", + &ip6_rt_gc_elasticity, sizeof(int), 0644, NULL, + &proc_dointvec_jiffies}, + {NET_IPV6_ROUTE_MTU_EXPIRES, "mtu_expires", + &ip6_rt_mtu_expires, sizeof(int), 0644, NULL, + &proc_dointvec_jiffies}, + {0} +}; + +#endif + + +__initfunc(void ip6_route_init(void)) +{ +#ifdef CONFIG_PROC_FS + proc_net_register(&proc_rt6_info); + proc_net_register(&proc_rt6_stats); +#endif +#ifdef CONFIG_IPV6_NETLINK + netlink_attach(NETLINK_ROUTE6, rt6_msgrcv); +#endif +} + +#ifdef MODULE +void ip6_route_cleanup(void) +{ +#ifdef CONFIG_PROC_FS + proc_net_unregister(PROC_NET_RT6); + proc_net_unregister(PROC_NET_RT6_STATS); +#endif +#ifdef CONFIG_IPV6_NETLINK + netlink_detach(NETLINK_ROUTE6); +#endif + rt6_ifdown(NULL); + fib6_gc_cleanup(); +} +#endif /* MODULE */ diff --git a/pfinet/linux-src/net/ipv6/tcp_ipv6.c b/pfinet/linux-src/net/ipv6/tcp_ipv6.c new file mode 100644 index 00000000..e9d1d079 --- /dev/null +++ b/pfinet/linux-src/net/ipv6/tcp_ipv6.c @@ -0,0 +1,1741 @@ +/* + * TCP over IPv6 + * Linux INET6 implementation + * + * Authors: + * Pedro Roque <roque@di.fc.ul.pt> + * + * $Id: tcp_ipv6.c,v 1.1 2007/10/08 21:12:31 stesie Exp $ + * + * Based on: + * linux/net/ipv4/tcp.c + * linux/net/ipv4/tcp_input.c + * linux/net/ipv4/tcp_output.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/config.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/sockios.h> +#include <linux/net.h> +#include <linux/sched.h> +#include <linux/in.h> +#include <linux/in6.h> +#include <linux/netdevice.h> +#include <linux/init.h> + +#include <linux/ipv6.h> +#include <linux/icmpv6.h> +#include <linux/random.h> + +#include <net/tcp.h> +#include <net/ndisc.h> +#include <net/ipv6.h> +#include <net/transp_v6.h> +#include <net/addrconf.h> +#include <net/ip6_route.h> + +#include <asm/uaccess.h> + +extern int sysctl_max_syn_backlog; + +static void tcp_v6_send_reset(struct sk_buff *skb); +static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len, + struct sk_buff *skb); + +static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); +static void tcp_v6_xmit(struct sk_buff *skb); +static struct open_request *tcp_v6_search_req(struct tcp_opt *tp, + struct ipv6hdr *ip6h, + struct tcphdr *th, + int iif, + struct open_request **prevp); + +static struct tcp_func ipv6_mapped; +static struct tcp_func ipv6_specific; + +/* I have no idea if this is a good hash for v6 or not. -DaveM */ +static __inline__ int tcp_v6_hashfn(struct in6_addr *laddr, u16 lport, + struct in6_addr *faddr, u16 fport) +{ + int hashent = (lport ^ fport); + + hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]); + return (hashent & ((tcp_ehash_size/2) - 1)); +} + +static __inline__ int tcp_v6_sk_hashfn(struct sock *sk) +{ + struct in6_addr *laddr = &sk->net_pinfo.af_inet6.rcv_saddr; + struct in6_addr *faddr = &sk->net_pinfo.af_inet6.daddr; + __u16 lport = sk->num; + __u16 fport = sk->dport; + return tcp_v6_hashfn(laddr, lport, faddr, fport); +} + +/* Grrr, addr_type already calculated by caller, but I don't want + * to add some silly "cookie" argument to this method just for that. + * But it doesn't matter, the recalculation is in the rarest path + * this function ever takes. + */ +static int tcp_v6_get_port(struct sock *sk, unsigned short snum) +{ + struct tcp_bind_bucket *tb; + + SOCKHASH_LOCK(); + if (snum == 0) { + int rover = tcp_port_rover; + int low = sysctl_local_port_range[0]; + int high = sysctl_local_port_range[1]; + int remaining = (high - low) + 1; + + do { rover++; + if ((rover < low) || (rover > high)) + rover = low; + tb = tcp_bhash[tcp_bhashfn(rover)]; + for ( ; tb; tb = tb->next) + if (tb->port == rover) + goto next; + break; + next: + } while (--remaining > 0); + tcp_port_rover = rover; + + /* Exhausted local port range during search? */ + if (remaining <= 0) + goto fail; + + /* OK, here is the one we will use. */ + snum = rover; + tb = NULL; + } else { + for (tb = tcp_bhash[tcp_bhashfn(snum)]; + tb != NULL; + tb = tb->next) + if (tb->port == snum) + break; + } + if (tb != NULL && tb->owners != NULL) { + if (tb->fastreuse != 0 && sk->reuse != 0) { + goto success; + } else { + struct sock *sk2 = tb->owners; + int sk_reuse = sk->reuse; + int addr_type = ipv6_addr_type(&sk->net_pinfo.af_inet6.rcv_saddr); + + for( ; sk2 != NULL; sk2 = sk2->bind_next) { + if (sk->bound_dev_if == sk2->bound_dev_if) { + if (!sk_reuse || + !sk2->reuse || + sk2->state == TCP_LISTEN) { + /* NOTE: IPv6 tw bucket have different format */ + if (!sk2->rcv_saddr || + addr_type == IPV6_ADDR_ANY || + !ipv6_addr_cmp(&sk->net_pinfo.af_inet6.rcv_saddr, + sk2->state != TCP_TIME_WAIT ? + &sk2->net_pinfo.af_inet6.rcv_saddr : + &((struct tcp_tw_bucket*)sk)->v6_rcv_saddr)) + break; + } + } + } + /* If we found a conflict, fail. */ + if (sk2 != NULL) + goto fail; + } + } + if (tb == NULL && + (tb = tcp_bucket_create(snum)) == NULL) + goto fail; + if (tb->owners == NULL) { + if (sk->reuse && sk->state != TCP_LISTEN) + tb->fastreuse = 1; + else + tb->fastreuse = 0; + } else if (tb->fastreuse && + ((sk->reuse == 0) || (sk->state == TCP_LISTEN))) + tb->fastreuse = 0; + +success: + sk->num = snum; + if ((sk->bind_next = tb->owners) != NULL) + tb->owners->bind_pprev = &sk->bind_next; + tb->owners = sk; + sk->bind_pprev = &tb->owners; + sk->prev = (struct sock *) tb; + + SOCKHASH_UNLOCK(); + return 0; + +fail: + SOCKHASH_UNLOCK(); + return 1; +} + +static void tcp_v6_hash(struct sock *sk) +{ + if(sk->state != TCP_CLOSE) { + struct sock **skp; + + /* Well, I know that it is ugly... + * All this ->prot, ->af_specific etc. need LARGE cleanup --ANK + */ + if (sk->tp_pinfo.af_tcp.af_specific == &ipv6_mapped) { + tcp_prot.hash(sk); + return; + } + + if(sk->state == TCP_LISTEN) + skp = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)]; + else + skp = &tcp_ehash[(sk->hashent = tcp_v6_sk_hashfn(sk))]; + + SOCKHASH_LOCK(); + if((sk->next = *skp) != NULL) + (*skp)->pprev = &sk->next; + *skp = sk; + sk->pprev = skp; + SOCKHASH_UNLOCK(); + } +} + +static void tcp_v6_unhash(struct sock *sk) +{ + SOCKHASH_LOCK(); + if(sk->pprev) { + if(sk->next) + sk->next->pprev = sk->pprev; + *sk->pprev = sk->next; + sk->pprev = NULL; + tcp_reg_zap(sk); + __tcp_put_port(sk); + } + SOCKHASH_UNLOCK(); +} + +static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned short hnum, int dif) +{ + struct sock *sk; + struct sock *result = NULL; + int score, hiscore; + + hiscore=0; + sk = tcp_listening_hash[tcp_lhashfn(hnum)]; + for(; sk; sk = sk->next) { + if((sk->num == hnum) && (sk->family == PF_INET6)) { + struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6; + + score = 1; + if(!ipv6_addr_any(&np->rcv_saddr)) { + if(ipv6_addr_cmp(&np->rcv_saddr, daddr)) + continue; + score++; + } + if (sk->bound_dev_if) { + if (sk->bound_dev_if != dif) + continue; + score++; + } + if (score == 3) + return sk; + if (score > hiscore) { + hiscore = score; + result = sk; + } + } + } + return result; +} + +/* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so + * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM + * It is assumed that this code only gets called from within NET_BH. + */ +static inline struct sock *__tcp_v6_lookup(struct tcphdr *th, + struct in6_addr *saddr, u16 sport, + struct in6_addr *daddr, u16 dport, + int dif) +{ + struct sock *sk; + __u16 hnum = ntohs(dport); + __u32 ports = TCP_COMBINED_PORTS(sport, hnum); + int hash; + + /* Check TCP register quick cache first. */ + sk = TCP_RHASH(sport); + if(sk && TCP_IPV6_MATCH(sk, saddr, daddr, ports, dif)) + goto hit; + + /* Optimize here for direct hit, only listening connections can + * have wildcards anyways. + */ + hash = tcp_v6_hashfn(daddr, hnum, saddr, sport); + for(sk = tcp_ehash[hash]; sk; sk = sk->next) { + /* For IPV6 do the cheaper port and family tests first. */ + if(TCP_IPV6_MATCH(sk, saddr, daddr, ports, dif)) { + if (sk->state == TCP_ESTABLISHED) + TCP_RHASH(sport) = sk; + goto hit; /* You sunk my battleship! */ + } + } + /* Must check for a TIME_WAIT'er before going to listener hash. */ + for(sk = tcp_ehash[hash+(tcp_ehash_size/2)]; sk; sk = sk->next) { + if(*((__u32 *)&(sk->dport)) == ports && + sk->family == PF_INET6) { + struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk; + if(!ipv6_addr_cmp(&tw->v6_daddr, saddr) && + !ipv6_addr_cmp(&tw->v6_rcv_saddr, daddr) && + (!sk->bound_dev_if || sk->bound_dev_if == dif)) + goto hit; + } + } + sk = tcp_v6_lookup_listener(daddr, hnum, dif); +hit: + return sk; +} + +#define tcp_v6_lookup(sa, sp, da, dp, dif) __tcp_v6_lookup((0),(sa),(sp),(da),(dp),(dif)) + +static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len, + struct in6_addr *saddr, + struct in6_addr *daddr, + unsigned long base) +{ + return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base); +} + +static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb) +{ + __u32 si; + __u32 di; + + if (skb->protocol == __constant_htons(ETH_P_IPV6)) { + si = skb->nh.ipv6h->saddr.s6_addr32[3]; + di = skb->nh.ipv6h->daddr.s6_addr32[3]; + } else { + si = skb->nh.iph->saddr; + di = skb->nh.iph->daddr; + } + + return secure_tcp_sequence_number(di, si, + skb->h.th->dest, + skb->h.th->source); +} + +static int tcp_v6_unique_address(struct sock *sk) +{ + struct tcp_bind_bucket *tb; + unsigned short snum = sk->num; + int retval = 1; + + /* Freeze the hash while we snoop around. */ + SOCKHASH_LOCK(); + tb = tcp_bhash[tcp_bhashfn(snum)]; + for(; tb; tb = tb->next) { + if(tb->port == snum && tb->owners != NULL) { + /* Almost certainly the re-use port case, search the real hashes + * so it actually scales. (we hope that all ipv6 ftp servers will + * use passive ftp, I just cover this case for completeness) + */ + sk = __tcp_v6_lookup(NULL, &sk->net_pinfo.af_inet6.daddr, + sk->dport, + &sk->net_pinfo.af_inet6.rcv_saddr, + htons(snum), + sk->bound_dev_if); + if((sk != NULL) && (sk->state != TCP_LISTEN)) + retval = 0; + break; + } + } + SOCKHASH_UNLOCK(); + return retval; +} + +static __inline__ int tcp_v6_iif(struct sk_buff *skb) +{ + struct inet6_skb_parm *opt = (struct inet6_skb_parm *) skb->cb; + return opt->iif; +} + +static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, + int addr_len) +{ + struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; + struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6; + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; + struct in6_addr *saddr = NULL; + struct in6_addr saddr_buf; + struct flowi fl; + struct dst_entry *dst; + struct sk_buff *buff; + int addr_type; + int err; + + if (sk->state != TCP_CLOSE) + return(-EISCONN); + + /* + * Don't allow a double connect. + */ + + if(!ipv6_addr_any(&np->daddr)) + return -EINVAL; + + if (addr_len < sizeof(struct sockaddr_in6)) + return(-EINVAL); + + if (usin->sin6_family && usin->sin6_family != AF_INET6) + return(-EAFNOSUPPORT); + + fl.fl6_flowlabel = 0; + if (np->sndflow) { + fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; + if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) { + struct ip6_flowlabel *flowlabel; + flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); + if (flowlabel == NULL) + return -EINVAL; + ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst); + fl6_sock_release(flowlabel); + } + } + + /* + * connect() to INADDR_ANY means loopback (BSD'ism). + */ + + if(ipv6_addr_any(&usin->sin6_addr)) + usin->sin6_addr.s6_addr[15] = 0x1; + + addr_type = ipv6_addr_type(&usin->sin6_addr); + + if(addr_type & IPV6_ADDR_MULTICAST) + return -ENETUNREACH; + + /* + * connect to self not allowed + */ + + if (ipv6_addr_cmp(&usin->sin6_addr, &np->saddr) == 0 && + usin->sin6_port == sk->sport) + return (-EINVAL); + + memcpy(&np->daddr, &usin->sin6_addr, sizeof(struct in6_addr)); + np->flow_label = fl.fl6_flowlabel; + + /* + * TCP over IPv4 + */ + + if (addr_type == IPV6_ADDR_MAPPED) { + u32 exthdrlen = tp->ext_header_len; + struct sockaddr_in sin; + + SOCK_DEBUG(sk, "connect: ipv4 mapped\n"); + + sin.sin_family = AF_INET; + sin.sin_port = usin->sin6_port; + sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; + + sk->tp_pinfo.af_tcp.af_specific = &ipv6_mapped; + sk->backlog_rcv = tcp_v4_do_rcv; + + err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin)); + + if (err) { + tp->ext_header_len = exthdrlen; + sk->tp_pinfo.af_tcp.af_specific = &ipv6_specific; + sk->backlog_rcv = tcp_v6_do_rcv; + goto failure; + } else { + ipv6_addr_set(&np->saddr, 0, 0, __constant_htonl(0x0000FFFF), + sk->saddr); + ipv6_addr_set(&np->rcv_saddr, 0, 0, __constant_htonl(0x0000FFFF), + sk->rcv_saddr); + } + + return err; + } + + if (!ipv6_addr_any(&np->rcv_saddr)) + saddr = &np->rcv_saddr; + + fl.proto = IPPROTO_TCP; + fl.fl6_dst = &np->daddr; + fl.fl6_src = saddr; + fl.oif = sk->bound_dev_if; + fl.uli_u.ports.dport = usin->sin6_port; + fl.uli_u.ports.sport = sk->sport; + + if (np->opt && np->opt->srcrt) { + struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt; + fl.nl_u.ip6_u.daddr = rt0->addr; + } + + dst = ip6_route_output(sk, &fl); + + if ((err = dst->error) != 0) { + dst_release(dst); + goto failure; + } + + if (fl.oif == 0 && addr_type&IPV6_ADDR_LINKLOCAL) { + /* Ough! This guy tries to connect to link local + * address and did not specify interface. + * Actually we should kick him out, but + * we will be patient :) --ANK + */ + sk->bound_dev_if = dst->dev->ifindex; + } + + ip6_dst_store(sk, dst, NULL); + + if (saddr == NULL) { + err = ipv6_get_saddr(dst, &np->daddr, &saddr_buf); + if (err) + goto failure; + + saddr = &saddr_buf; + } + + /* set the source address */ + ipv6_addr_copy(&np->rcv_saddr, saddr); + ipv6_addr_copy(&np->saddr, saddr); + + tp->ext_header_len = 0; + if (np->opt) + tp->ext_header_len = np->opt->opt_flen+np->opt->opt_nflen; + /* Reset mss clamp */ + tp->mss_clamp = ~0; + + err = -ENOBUFS; + buff = sock_wmalloc(sk, (MAX_HEADER + sk->prot->max_header), + 0, GFP_KERNEL); + + if (buff == NULL) + goto failure; + + sk->dport = usin->sin6_port; + + if (!tcp_v6_unique_address(sk)) { + kfree_skb(buff); + err = -EADDRNOTAVAIL; + goto failure; + } + + /* + * Init variables + */ + + tp->write_seq = secure_tcp_sequence_number(np->saddr.s6_addr32[3], + np->daddr.s6_addr32[3], + sk->sport, sk->dport); + + tcp_connect(sk, buff, dst->pmtu); + + return 0; + +failure: + dst_release(xchg(&sk->dst_cache, NULL)); + memset(&np->daddr, 0, sizeof(struct in6_addr)); + sk->daddr = 0; + return err; +} + +static int tcp_v6_sendmsg(struct sock *sk, struct msghdr *msg, int len) +{ + struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6; + int retval = -EINVAL; + + /* + * Do sanity checking for sendmsg/sendto/send + */ + + if (msg->msg_flags & ~(MSG_OOB|MSG_DONTROUTE|MSG_DONTWAIT|MSG_NOSIGNAL)) + goto out; + if (msg->msg_name) { + struct sockaddr_in6 *addr=(struct sockaddr_in6 *)msg->msg_name; + + if (msg->msg_namelen < sizeof(*addr)) + goto out; + + if (addr->sin6_family && addr->sin6_family != AF_INET6) + goto out; + retval = -ENOTCONN; + + if(sk->state == TCP_CLOSE) + goto out; + retval = -EISCONN; + if (addr->sin6_port != sk->dport) + goto out; + if (ipv6_addr_cmp(&addr->sin6_addr, &np->daddr)) + goto out; + if (np->sndflow && np->flow_label != (addr->sin6_flowinfo&IPV6_FLOWINFO_MASK)) + goto out; + } + + retval = tcp_do_sendmsg(sk, msg); + +out: + return retval; +} + +void tcp_v6_err(struct sk_buff *skb, struct ipv6hdr *hdr, + struct inet6_skb_parm *opt, + int type, int code, unsigned char *header, __u32 info) +{ + struct in6_addr *saddr = &hdr->saddr; + struct in6_addr *daddr = &hdr->daddr; + struct tcphdr *th = (struct tcphdr *)header; + struct ipv6_pinfo *np; + struct sock *sk; + int err; + struct tcp_opt *tp; + __u32 seq; + + if (header + 8 > skb->tail) + return; + + sk = tcp_v6_lookup(daddr, th->dest, saddr, th->source, skb->dev->ifindex); + + if (sk == NULL || sk->state == TCP_TIME_WAIT) { + /* XXX: Update ICMP error count */ + return; + } + + tp = &sk->tp_pinfo.af_tcp; + seq = ntohl(th->seq); + if (sk->state != TCP_LISTEN && !between(seq, tp->snd_una, tp->snd_nxt)) { + net_statistics.OutOfWindowIcmps++; + return; + } + + np = &sk->net_pinfo.af_inet6; + if (type == ICMPV6_PKT_TOOBIG) { + struct dst_entry *dst = NULL; + + if (atomic_read(&sk->sock_readers)) + return; + + if (sk->state == TCP_LISTEN) + return; + + /* icmp should have updated the destination cache entry */ + if (sk->dst_cache) + dst = dst_check(&sk->dst_cache, np->dst_cookie); + + if (dst == NULL) { + struct flowi fl; + + /* BUGGG_FUTURE: Again, it is not clear how + to handle rthdr case. Ignore this complexity + for now. + */ + fl.proto = IPPROTO_TCP; + fl.nl_u.ip6_u.daddr = &np->daddr; + fl.nl_u.ip6_u.saddr = &np->saddr; + fl.oif = sk->bound_dev_if; + fl.uli_u.ports.dport = sk->dport; + fl.uli_u.ports.sport = sk->sport; + + dst = ip6_route_output(sk, &fl); + } else + dst = dst_clone(dst); + + if (dst->error) { + sk->err_soft = -dst->error; + } else if (tp->pmtu_cookie > dst->pmtu) { + tcp_sync_mss(sk, dst->pmtu); + tcp_simple_retransmit(sk); + } /* else let the usual retransmit timer handle it */ + dst_release(dst); + return; + } + + icmpv6_err_convert(type, code, &err); + + /* Might be for an open_request */ + switch (sk->state) { + struct open_request *req, *prev; + struct ipv6hdr hd; + case TCP_LISTEN: + if (atomic_read(&sk->sock_readers)) { + net_statistics.LockDroppedIcmps++; + /* If too many ICMPs get dropped on busy + * servers this needs to be solved differently. + */ + return; + } + + /* Grrrr - fix this later. */ + ipv6_addr_copy(&hd.saddr, saddr); + ipv6_addr_copy(&hd.daddr, daddr); + req = tcp_v6_search_req(tp, &hd, th, tcp_v6_iif(skb), &prev); + if (!req) + return; + if (seq != req->snt_isn) { + net_statistics.OutOfWindowIcmps++; + return; + } + if (req->sk) { + sk = req->sk; /* report error in accept */ + } else { + tp->syn_backlog--; + tcp_synq_unlink(tp, req, prev); + req->class->destructor(req); + tcp_openreq_free(req); + } + /* FALL THROUGH */ + case TCP_SYN_SENT: + case TCP_SYN_RECV: /* Cannot happen */ + tcp_statistics.TcpAttemptFails++; + sk->err = err; + sk->zapped = 1; + mb(); + sk->error_report(sk); + return; + } + + if (np->recverr) { + /* This code isn't serialized with the socket code */ + /* ANK (980927) ... which is harmless now, + sk->err's may be safely lost. + */ + sk->err = err; + mb(); + sk->error_report(sk); + } else { + sk->err_soft = err; + mb(); + } +} + + +static void tcp_v6_send_synack(struct sock *sk, struct open_request *req) +{ + struct sk_buff * skb; + struct dst_entry *dst; + struct ipv6_txoptions *opt = NULL; + struct flowi fl; + int mss; + + fl.proto = IPPROTO_TCP; + fl.nl_u.ip6_u.daddr = &req->af.v6_req.rmt_addr; + fl.nl_u.ip6_u.saddr = &req->af.v6_req.loc_addr; + fl.fl6_flowlabel = 0; + fl.oif = req->af.v6_req.iif; + fl.uli_u.ports.dport = req->rmt_port; + fl.uli_u.ports.sport = sk->sport; + + opt = sk->net_pinfo.af_inet6.opt; + if (opt == NULL && + sk->net_pinfo.af_inet6.rxopt.bits.srcrt == 2 && + req->af.v6_req.pktopts) { + struct sk_buff *pktopts = req->af.v6_req.pktopts; + struct inet6_skb_parm *rxopt = (struct inet6_skb_parm *)pktopts->cb; + if (rxopt->srcrt) + opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt)); + } + + if (opt && opt->srcrt) { + struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt; + fl.nl_u.ip6_u.daddr = rt0->addr; + } + + dst = ip6_route_output(sk, &fl); + if (dst->error) + goto done; + + mss = dst->pmtu - sizeof(struct ipv6hdr) - sizeof(struct tcphdr); + + skb = tcp_make_synack(sk, dst, req, mss); + if (skb) { + struct tcphdr *th = skb->h.th; + + th->check = tcp_v6_check(th, skb->len, + &req->af.v6_req.loc_addr, &req->af.v6_req.rmt_addr, + csum_partial((char *)th, skb->len, skb->csum)); + + fl.nl_u.ip6_u.daddr = &req->af.v6_req.rmt_addr; + ip6_xmit(sk, skb, &fl, opt); + } + +done: + dst_release(dst); + if (opt && opt != sk->net_pinfo.af_inet6.opt) + sock_kfree_s(sk, opt, opt->tot_len); +} + +static void tcp_v6_or_free(struct open_request *req) +{ + if (req->af.v6_req.pktopts) { + kfree_skb(req->af.v6_req.pktopts); + req->af.v6_req.pktopts = NULL; + } +} + +static struct or_calltable or_ipv6 = { + tcp_v6_send_synack, + tcp_v6_or_free, + tcp_v6_send_reset +}; + +static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb) +{ + struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb; + + if (sk->net_pinfo.af_inet6.rxopt.all) { + if ((opt->hop && sk->net_pinfo.af_inet6.rxopt.bits.hopopts) || + ((IPV6_FLOWINFO_MASK&*(u32*)skb->nh.raw) && + sk->net_pinfo.af_inet6.rxopt.bits.rxflow) || + (opt->srcrt && sk->net_pinfo.af_inet6.rxopt.bits.srcrt) || + ((opt->dst1 || opt->dst0) && sk->net_pinfo.af_inet6.rxopt.bits.dstopts)) + return 1; + } + return 0; +} + + +#define BACKLOG(sk) ((sk)->tp_pinfo.af_tcp.syn_backlog) /* lvalue! */ +#define BACKLOGMAX(sk) sysctl_max_syn_backlog + +/* FIXME: this is substantially similar to the ipv4 code. + * Can some kind of merge be done? -- erics + */ +static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb, __u32 isn) +{ + struct tcp_opt tp; + struct open_request *req; + + /* If the socket is dead, don't accept the connection. */ + if (sk->dead) { + SOCK_DEBUG(sk, "Reset on %p: Connect on dead socket.\n", sk); + tcp_statistics.TcpAttemptFails++; + return -ENOTCONN; + } + + if (skb->protocol == __constant_htons(ETH_P_IP)) + return tcp_v4_conn_request(sk, skb, isn); + + /* FIXME: do the same check for anycast */ + if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr)) + goto drop; + + if (isn == 0) + isn = tcp_v6_init_sequence(sk,skb); + + /* + * There are no SYN attacks on IPv6, yet... + */ + if (BACKLOG(sk) >= BACKLOGMAX(sk)) { + (void)(net_ratelimit() && + printk(KERN_INFO "droping syn ack:%d max:%d\n", + BACKLOG(sk), BACKLOGMAX(sk))); + goto drop; + } + + req = tcp_openreq_alloc(); + if (req == NULL) { + goto drop; + } + + BACKLOG(sk)++; + + req->rcv_wnd = 0; /* So that tcp_send_synack() knows! */ + + req->rcv_isn = TCP_SKB_CB(skb)->seq; + req->snt_isn = isn; + tp.tstamp_ok = tp.sack_ok = tp.wscale_ok = tp.snd_wscale = 0; + tp.mss_clamp = 65535; + tcp_parse_options(NULL, skb->h.th, &tp, 0); + if (tp.mss_clamp == 65535) + tp.mss_clamp = 576 - sizeof(struct ipv6hdr) - sizeof(struct iphdr); + if (sk->tp_pinfo.af_tcp.user_mss && sk->tp_pinfo.af_tcp.user_mss < tp.mss_clamp) + tp.mss_clamp = sk->tp_pinfo.af_tcp.user_mss; + + req->mss = tp.mss_clamp; + if (tp.saw_tstamp) + req->ts_recent = tp.rcv_tsval; + req->tstamp_ok = tp.tstamp_ok; + req->sack_ok = tp.sack_ok; + req->snd_wscale = tp.snd_wscale; + req->wscale_ok = tp.wscale_ok; + req->rmt_port = skb->h.th->source; + ipv6_addr_copy(&req->af.v6_req.rmt_addr, &skb->nh.ipv6h->saddr); + ipv6_addr_copy(&req->af.v6_req.loc_addr, &skb->nh.ipv6h->daddr); + req->af.v6_req.pktopts = NULL; + if (ipv6_opt_accepted(sk, skb)) { + atomic_inc(&skb->users); + req->af.v6_req.pktopts = skb; + } + req->af.v6_req.iif = sk->bound_dev_if; + + /* So that link locals have meaning */ + if (!sk->bound_dev_if && ipv6_addr_type(&req->af.v6_req.rmt_addr)&IPV6_ADDR_LINKLOCAL) + req->af.v6_req.iif = tcp_v6_iif(skb); + + req->class = &or_ipv6; + req->retrans = 0; + req->sk = NULL; + + tcp_v6_send_synack(sk, req); + + req->expires = jiffies + TCP_TIMEOUT_INIT; + tcp_inc_slow_timer(TCP_SLT_SYNACK); + tcp_synq_queue(&sk->tp_pinfo.af_tcp, req); + + return 0; + +drop: + tcp_statistics.TcpAttemptFails++; + return 0; /* don't send reset */ +} + +static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len, + struct sk_buff *skb) +{ + struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6; + th->check = 0; + + th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, + csum_partial((char *)th, th->doff<<2, + skb->csum)); +} + +static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, + struct open_request *req, + struct dst_entry *dst) +{ + struct ipv6_pinfo *np; + struct flowi fl; + struct tcp_opt *newtp; + struct sock *newsk; + struct ipv6_txoptions *opt; + + if (skb->protocol == __constant_htons(ETH_P_IP)) { + /* + * v6 mapped + */ + + newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst); + + if (newsk == NULL) + return NULL; + + np = &newsk->net_pinfo.af_inet6; + + ipv6_addr_set(&np->daddr, 0, 0, __constant_htonl(0x0000FFFF), + newsk->daddr); + + ipv6_addr_set(&np->saddr, 0, 0, __constant_htonl(0x0000FFFF), + newsk->saddr); + + ipv6_addr_copy(&np->rcv_saddr, &np->saddr); + + newsk->tp_pinfo.af_tcp.af_specific = &ipv6_mapped; + newsk->backlog_rcv = tcp_v4_do_rcv; + newsk->net_pinfo.af_inet6.pktoptions = NULL; + newsk->net_pinfo.af_inet6.opt = NULL; + + /* It is tricky place. Until this moment IPv4 tcp + worked with IPv6 af_tcp.af_specific. + Sync it now. + */ + tcp_sync_mss(newsk, newsk->tp_pinfo.af_tcp.pmtu_cookie); + + return newsk; + } + + opt = sk->net_pinfo.af_inet6.opt; + + if (sk->ack_backlog > sk->max_ack_backlog) + goto out; + + if (sk->net_pinfo.af_inet6.rxopt.bits.srcrt == 2 && + opt == NULL && req->af.v6_req.pktopts) { + struct inet6_skb_parm *rxopt = (struct inet6_skb_parm *)req->af.v6_req.pktopts->cb; + if (rxopt->srcrt) + opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(req->af.v6_req.pktopts->nh.raw+rxopt->srcrt)); + } + + if (dst == NULL) { + fl.proto = IPPROTO_TCP; + fl.nl_u.ip6_u.daddr = &req->af.v6_req.rmt_addr; + if (opt && opt->srcrt) { + struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt; + fl.nl_u.ip6_u.daddr = rt0->addr; + } + fl.nl_u.ip6_u.saddr = &req->af.v6_req.loc_addr; + fl.fl6_flowlabel = 0; + fl.oif = sk->bound_dev_if; + fl.uli_u.ports.dport = req->rmt_port; + fl.uli_u.ports.sport = sk->sport; + + dst = ip6_route_output(sk, &fl); + } + + if (dst->error) + goto out; + + sk->tp_pinfo.af_tcp.syn_backlog--; + sk->ack_backlog++; + + newsk = tcp_create_openreq_child(sk, req, skb); + if (newsk == NULL) + goto out; + + ip6_dst_store(newsk, dst, NULL); + + newtp = &(newsk->tp_pinfo.af_tcp); + + np = &newsk->net_pinfo.af_inet6; + ipv6_addr_copy(&np->daddr, &req->af.v6_req.rmt_addr); + ipv6_addr_copy(&np->saddr, &req->af.v6_req.loc_addr); + ipv6_addr_copy(&np->rcv_saddr, &req->af.v6_req.loc_addr); + newsk->bound_dev_if = req->af.v6_req.iif; + + /* Now IPv6 options... + + First: no IPv4 options. + */ + newsk->opt = NULL; + + /* Clone RX bits */ + np->rxopt.all = sk->net_pinfo.af_inet6.rxopt.all; + + /* Clone pktoptions received with SYN */ + np->pktoptions = req->af.v6_req.pktopts; + if (np->pktoptions) + atomic_inc(&np->pktoptions->users); + np->opt = NULL; + + /* Clone native IPv6 options from listening socket (if any) + + Yes, keeping reference count would be much more clever, + but we make one more one thing there: reattach optmem + to newsk. + */ + if (opt) { + np->opt = ipv6_dup_options(newsk, opt); + if (opt != sk->net_pinfo.af_inet6.opt) + sock_kfree_s(sk, opt, opt->tot_len); + } + + newtp->ext_header_len = 0; + if (np->opt) + newtp->ext_header_len = np->opt->opt_nflen + np->opt->opt_flen; + + tcp_sync_mss(newsk, dst->pmtu); + newtp->rcv_mss = newtp->mss_clamp; + + newsk->daddr = LOOPBACK4_IPV6; + newsk->saddr = LOOPBACK4_IPV6; + newsk->rcv_saddr= LOOPBACK4_IPV6; + + newsk->prot->hash(newsk); + tcp_inherit_port(sk, newsk); + add_to_prot_sklist(newsk); + sk->data_ready(sk, 0); /* Deliver SIGIO */ + + return newsk; + +out: + if (opt && opt != sk->net_pinfo.af_inet6.opt) + sock_kfree_s(sk, opt, opt->tot_len); + dst_release(dst); + return NULL; +} + +static void tcp_v6_send_reset(struct sk_buff *skb) +{ + struct tcphdr *th = skb->h.th, *t1; + struct sk_buff *buff; + struct flowi fl; + + if (th->rst) + return; + + if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr)) + return; + + /* + * We need to grab some memory, and put together an RST, + * and then put it into the queue to be sent. + */ + + buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr), GFP_ATOMIC); + if (buff == NULL) + return; + + skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr)); + + t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr)); + + /* Swap the send and the receive. */ + memset(t1, 0, sizeof(*t1)); + t1->dest = th->source; + t1->source = th->dest; + t1->doff = sizeof(*t1)/4; + t1->rst = 1; + + if(th->ack) { + t1->seq = th->ack_seq; + } else { + t1->ack = 1; + if(!th->syn) + t1->ack_seq = th->seq; + else + t1->ack_seq = htonl(ntohl(th->seq)+1); + } + + buff->csum = csum_partial((char *)t1, sizeof(*t1), 0); + + fl.nl_u.ip6_u.daddr = &skb->nh.ipv6h->saddr; + fl.nl_u.ip6_u.saddr = &skb->nh.ipv6h->daddr; + fl.fl6_flowlabel = 0; + + t1->check = csum_ipv6_magic(fl.nl_u.ip6_u.saddr, + fl.nl_u.ip6_u.daddr, + sizeof(*t1), IPPROTO_TCP, + buff->csum); + + fl.proto = IPPROTO_TCP; + fl.oif = tcp_v6_iif(skb); + fl.uli_u.ports.dport = t1->dest; + fl.uli_u.ports.sport = t1->source; + + /* sk = NULL, but it is safe for now. RST socket required. */ + buff->dst = ip6_route_output(NULL, &fl); + + if (buff->dst->error == 0) { + ip6_xmit(NULL, buff, &fl, NULL); + tcp_statistics.TcpOutSegs++; + tcp_statistics.TcpOutRsts++; + return; + } + + kfree_skb(buff); +} + +static void tcp_v6_send_ack(struct sk_buff *skb, __u32 seq, __u32 ack, __u16 window) +{ + struct tcphdr *th = skb->h.th, *t1; + struct sk_buff *buff; + struct flowi fl; + + buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr), GFP_ATOMIC); + if (buff == NULL) + return; + + skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr)); + + t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr)); + + /* Swap the send and the receive. */ + memset(t1, 0, sizeof(*t1)); + t1->dest = th->source; + t1->source = th->dest; + t1->doff = sizeof(*t1)/4; + t1->ack = 1; + t1->seq = seq; + t1->ack_seq = ack; + + t1->window = htons(window); + + buff->csum = csum_partial((char *)t1, sizeof(*t1), 0); + + fl.nl_u.ip6_u.daddr = &skb->nh.ipv6h->saddr; + fl.nl_u.ip6_u.saddr = &skb->nh.ipv6h->daddr; + fl.fl6_flowlabel = 0; + + t1->check = csum_ipv6_magic(fl.nl_u.ip6_u.saddr, + fl.nl_u.ip6_u.daddr, + sizeof(*t1), IPPROTO_TCP, + buff->csum); + + fl.proto = IPPROTO_TCP; + fl.oif = tcp_v6_iif(skb); + fl.uli_u.ports.dport = t1->dest; + fl.uli_u.ports.sport = t1->source; + + /* sk = NULL, but it is safe for now. static socket required. */ + buff->dst = ip6_route_output(NULL, &fl); + + if (buff->dst->error == 0) { + ip6_xmit(NULL, buff, &fl, NULL); + tcp_statistics.TcpOutSegs++; + return; + } + + kfree_skb(buff); +} + +static struct open_request *tcp_v6_search_req(struct tcp_opt *tp, + struct ipv6hdr *ip6h, + struct tcphdr *th, + int iif, + struct open_request **prevp) +{ + struct open_request *req, *prev; + __u16 rport = th->source; + + /* assumption: the socket is not in use. + * as we checked the user count on tcp_rcv and we're + * running from a soft interrupt. + */ + prev = (struct open_request *) (&tp->syn_wait_queue); + for (req = prev->dl_next; req; req = req->dl_next) { + if (!ipv6_addr_cmp(&req->af.v6_req.rmt_addr, &ip6h->saddr) && + !ipv6_addr_cmp(&req->af.v6_req.loc_addr, &ip6h->daddr) && + req->rmt_port == rport && + (!req->af.v6_req.iif || req->af.v6_req.iif == iif)) { + *prevp = prev; + return req; + } + prev = req; + } + return NULL; +} + +static void tcp_v6_rst_req(struct sock *sk, struct sk_buff *skb) +{ + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; + struct open_request *req, *prev; + + req = tcp_v6_search_req(tp,skb->nh.ipv6h,skb->h.th,tcp_v6_iif(skb),&prev); + if (!req) + return; + /* Sequence number check required by RFC793 */ + if (before(TCP_SKB_CB(skb)->seq, req->rcv_isn) || + after(TCP_SKB_CB(skb)->seq, req->rcv_isn+1)) + return; + if(req->sk) + sk->ack_backlog--; + else + tp->syn_backlog--; + tcp_synq_unlink(tp, req, prev); + req->class->destructor(req); + tcp_openreq_free(req); + net_statistics.EmbryonicRsts++; +} + +static inline struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb) +{ + struct tcphdr *th = skb->h.th; + u32 flg = ((u32 *)th)[3]; + + /* Check for RST */ + if (flg & __constant_htonl(0x00040000)) { + tcp_v6_rst_req(sk, skb); + return NULL; + } + + /* Check SYN|ACK */ + if (flg & __constant_htonl(0x00120000)) { + struct open_request *req, *dummy; + struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); + + req = tcp_v6_search_req(tp, skb->nh.ipv6h, th, tcp_v6_iif(skb), &dummy); + if (req) { + sk = tcp_check_req(sk, skb, req); + } +#if 0 /*def CONFIG_SYN_COOKIES */ + else { + sk = cookie_v6_check(sk, skb); + } +#endif + } + return sk; +} + +static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) +{ +#ifdef CONFIG_FILTER + struct sk_filter *filter; +#endif + int users = 0; + + /* Imagine: socket is IPv6. IPv4 packet arrives, + goes to IPv4 receive handler and backlogged. + From backlog it always goes here. Kerboom... + Fortunately, tcp_rcv_established and rcv_established + handle them correctly, but it is not case with + tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK + */ + + if (skb->protocol == __constant_htons(ETH_P_IP)) + return tcp_v4_do_rcv(sk, skb); + +#ifdef CONFIG_FILTER + filter = sk->filter; + if (filter && sk_filter(skb, filter)) + goto discard; +#endif /* CONFIG_FILTER */ + + /* + * socket locking is here for SMP purposes as backlog rcv + * is currently called with bh processing disabled. + */ + + ipv6_statistics.Ip6InDelivers++; + + /* + * This doesn't check if the socket has enough room for the packet. + * Either process the packet _without_ queueing it and then free it, + * or do the check later. + */ + skb_set_owner_r(skb, sk); + + /* Do Stevens' IPV6_PKTOPTIONS. + + Yes, guys, it is the only place in our code, where we + may make it not affecting IPv4. + The rest of code is protocol independent, + and I do not like idea to uglify IPv4. + + Actually, all the idea behind IPV6_PKTOPTIONS + looks not very well thought. For now we latch + options, received in the last packet, enqueued + by tcp. Feel free to propose better solution. + --ANK (980728) + */ + if (sk->net_pinfo.af_inet6.rxopt.all) { + users = atomic_read(&skb->users); + atomic_inc(&skb->users); + } + + if (sk->state == TCP_ESTABLISHED) { /* Fast path */ + if (tcp_rcv_established(sk, skb, skb->h.th, skb->len)) + goto reset; + if (users) + goto ipv6_pktoptions; + return 0; + } + + if (sk->state == TCP_LISTEN) { + struct sock *nsk; + + nsk = tcp_v6_hnd_req(sk, skb); + if (!nsk) + goto discard; + + /* + * Queue it on the new socket if the new socket is active, + * otherwise we just shortcircuit this and continue with + * the new socket.. + */ + if (atomic_read(&nsk->sock_readers)) { + skb_orphan(skb); + __skb_queue_tail(&nsk->back_log, skb); + return 0; + } + sk = nsk; + } + + if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len)) + goto reset; + if (users) + goto ipv6_pktoptions; + return 0; + +reset: + tcp_v6_send_reset(skb); +discard: + if (users) + kfree_skb(skb); + kfree_skb(skb); + return 0; + +ipv6_pktoptions: + /* Do you ask, what is it? + + 1. skb was enqueued by tcp. + 2. skb is added to tail of read queue, rather than out of order. + 3. socket is not in passive state. + 4. Finally, it really contains options, which user wants to receive. + */ + if (atomic_read(&skb->users) > users && + TCP_SKB_CB(skb)->end_seq == sk->tp_pinfo.af_tcp.rcv_nxt && + !((1<<sk->state)&(TCPF_CLOSE|TCPF_LISTEN))) { + if (ipv6_opt_accepted(sk, skb)) { + struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); + kfree_skb(skb); + skb = NULL; + if (skb2) { + skb_set_owner_r(skb2, sk); + skb = xchg(&sk->net_pinfo.af_inet6.pktoptions, skb2); + } + } else { + kfree_skb(skb); + skb = xchg(&sk->net_pinfo.af_inet6.pktoptions, NULL); + } + } + + if (skb) + kfree_skb(skb); + return 0; +} + +int tcp_v6_rcv(struct sk_buff *skb, unsigned long len) +{ + struct tcphdr *th; + struct sock *sk; + struct in6_addr *saddr = &skb->nh.ipv6h->saddr; + struct in6_addr *daddr = &skb->nh.ipv6h->daddr; + + th = skb->h.th; + + if (skb->pkt_type != PACKET_HOST) + goto discard_it; + + /* + * Pull up the IP header. + */ + + __skb_pull(skb, skb->h.raw - skb->data); + + /* + * Count it even if it's bad. + */ + + tcp_statistics.TcpInSegs++; + + len = skb->len; + if (len < sizeof(struct tcphdr)) + goto bad_packet; + + /* + * Try to use the device checksum if provided. + */ + + switch (skb->ip_summed) { + case CHECKSUM_NONE: + skb->csum = csum_partial((char *)th, len, 0); + case CHECKSUM_HW: + if (tcp_v6_check(th,len,saddr,daddr,skb->csum)) { + printk(KERN_DEBUG "tcp csum failed\n"); + bad_packet: + tcp_statistics.TcpInErrs++; + goto discard_it; + } + default: + /* CHECKSUM_UNNECESSARY */ + }; + + if((th->doff * 4) < sizeof(struct tcphdr) || + len < (th->doff * 4)) + goto bad_packet; + + sk = __tcp_v6_lookup(th, saddr, th->source, daddr, th->dest, tcp_v6_iif(skb)); + + if (!sk) + goto no_tcp_socket; + + TCP_SKB_CB(skb)->seq = ntohl(th->seq); + TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + + len - th->doff*4); + TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); + skb->used = 0; + if(sk->state == TCP_TIME_WAIT) + goto do_time_wait; + + if (!atomic_read(&sk->sock_readers)) + return tcp_v6_do_rcv(sk, skb); + + __skb_queue_tail(&sk->back_log, skb); + return(0); + +no_tcp_socket: + tcp_v6_send_reset(skb); + +discard_it: + + /* + * Discard frame + */ + + kfree_skb(skb); + return 0; + +do_time_wait: + switch (tcp_timewait_state_process((struct tcp_tw_bucket *)sk, + skb, th, skb->len)) { + case TCP_TW_ACK: + tcp_v6_send_ack(skb, + ((struct tcp_tw_bucket *)sk)->snd_nxt, + ((struct tcp_tw_bucket *)sk)->rcv_nxt, + ((struct tcp_tw_bucket *)sk)->window); + goto discard_it; + case TCP_TW_RST: + goto no_tcp_socket; + default: + goto discard_it; + } +} + +static int tcp_v6_rebuild_header(struct sock *sk) +{ + struct dst_entry *dst = NULL; + struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6; + + if (sk->dst_cache) + dst = dst_check(&sk->dst_cache, np->dst_cookie); + + if (dst == NULL) { + struct flowi fl; + + fl.proto = IPPROTO_TCP; + fl.nl_u.ip6_u.daddr = &np->daddr; + fl.nl_u.ip6_u.saddr = &np->saddr; + fl.fl6_flowlabel = np->flow_label; + fl.oif = sk->bound_dev_if; + fl.uli_u.ports.dport = sk->dport; + fl.uli_u.ports.sport = sk->sport; + + if (np->opt && np->opt->srcrt) { + struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt; + fl.nl_u.ip6_u.daddr = rt0->addr; + } + + + dst = ip6_route_output(sk, &fl); + + if (dst->error) { + dst_release(dst); + return dst->error; + } + + ip6_dst_store(sk, dst, NULL); + } + + return dst->error; +} + +static struct sock * tcp_v6_get_sock(struct sk_buff *skb, struct tcphdr *th) +{ + struct in6_addr *saddr; + struct in6_addr *daddr; + + if (skb->protocol == __constant_htons(ETH_P_IP)) + return ipv4_specific.get_sock(skb, th); + + saddr = &skb->nh.ipv6h->saddr; + daddr = &skb->nh.ipv6h->daddr; + return tcp_v6_lookup(saddr, th->source, daddr, th->dest, tcp_v6_iif(skb)); +} + +static void tcp_v6_xmit(struct sk_buff *skb) +{ + struct sock *sk = skb->sk; + struct ipv6_pinfo * np = &sk->net_pinfo.af_inet6; + struct flowi fl; + struct dst_entry *dst = sk->dst_cache; + + fl.proto = IPPROTO_TCP; + fl.fl6_dst = &np->daddr; + fl.fl6_src = &np->saddr; + fl.fl6_flowlabel = np->flow_label; + fl.oif = sk->bound_dev_if; + fl.uli_u.ports.sport = sk->sport; + fl.uli_u.ports.dport = sk->dport; + + if (np->opt && np->opt->srcrt) { + struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt; + fl.nl_u.ip6_u.daddr = rt0->addr; + } + + if (sk->dst_cache) + dst = dst_check(&sk->dst_cache, np->dst_cookie); + + if (dst == NULL) { + dst = ip6_route_output(sk, &fl); + + if (dst->error) { + sk->err_soft = -dst->error; + dst_release(dst); + return; + } + + ip6_dst_store(sk, dst, NULL); + } + + skb->dst = dst_clone(dst); + + /* Restore final destination back after routing done */ + fl.nl_u.ip6_u.daddr = &np->daddr; + + ip6_xmit(sk, skb, &fl, np->opt); +} + +static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr) +{ + struct ipv6_pinfo * np = &sk->net_pinfo.af_inet6; + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr; + + sin6->sin6_family = AF_INET6; + memcpy(&sin6->sin6_addr, &np->daddr, sizeof(struct in6_addr)); + sin6->sin6_port = sk->dport; + /* We do not store received flowlabel for TCP */ + sin6->sin6_flowinfo = 0; +} + +static struct tcp_func ipv6_specific = { + tcp_v6_xmit, + tcp_v6_send_check, + tcp_v6_rebuild_header, + tcp_v6_conn_request, + tcp_v6_syn_recv_sock, + tcp_v6_get_sock, + sizeof(struct ipv6hdr), + + ipv6_setsockopt, + ipv6_getsockopt, + v6_addr2sockaddr, + sizeof(struct sockaddr_in6) +}; + +/* + * TCP over IPv4 via INET6 API + */ + +static struct tcp_func ipv6_mapped = { + ip_queue_xmit, + tcp_v4_send_check, + tcp_v4_rebuild_header, + tcp_v6_conn_request, + tcp_v6_syn_recv_sock, + tcp_v6_get_sock, + sizeof(struct iphdr), + + ipv6_setsockopt, + ipv6_getsockopt, + v6_addr2sockaddr, + sizeof(struct sockaddr_in6) +}; + +/* NOTE: A lot of things set to zero explicitly by call to + * sk_alloc() so need not be done here. + */ +static int tcp_v6_init_sock(struct sock *sk) +{ + struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); + + skb_queue_head_init(&tp->out_of_order_queue); + tcp_init_xmit_timers(sk); + + tp->rto = TCP_TIMEOUT_INIT; /*TCP_WRITE_TIME*/ + tp->mdev = TCP_TIMEOUT_INIT; + tp->mss_clamp = ~0; + + /* So many TCP implementations out there (incorrectly) count the + * initial SYN frame in their delayed-ACK and congestion control + * algorithms that we must have the following bandaid to talk + * efficiently to them. -DaveM + */ + tp->snd_cwnd = 2; + + /* See draft-stevens-tcpca-spec-01 for discussion of the + * initialization of these values. + */ + tp->snd_cwnd_cnt = 0; + tp->snd_ssthresh = 0x7fffffff; + + sk->state = TCP_CLOSE; + sk->max_ack_backlog = SOMAXCONN; + tp->rcv_mss = 536; + + /* Init SYN queue. */ + tcp_synq_init(tp); + + sk->tp_pinfo.af_tcp.af_specific = &ipv6_specific; + + sk->write_space = tcp_write_space; + + return 0; +} + +static int tcp_v6_destroy_sock(struct sock *sk) +{ + struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); + struct sk_buff *skb; + + tcp_clear_xmit_timers(sk); + + if (sk->keepopen) + tcp_dec_slow_timer(TCP_SLT_KEEPALIVE); + + /* + * Cleanup up the write buffer. + */ + + while((skb = __skb_dequeue(&sk->write_queue)) != NULL) + kfree_skb(skb); + + /* + * Cleans up our, hopefuly empty, out_of_order_queue + */ + + while((skb = __skb_dequeue(&tp->out_of_order_queue)) != NULL) + kfree_skb(skb); + + /* Clean up a locked TCP bind bucket, this only happens if a + * port is allocated for a socket, but it never fully connects. + */ + if(sk->prev != NULL) + tcp_put_port(sk); + + return inet6_destroy_sock(sk); +} + +struct proto tcpv6_prot = { + (struct sock *)&tcpv6_prot, /* sklist_next */ + (struct sock *)&tcpv6_prot, /* sklist_prev */ + tcp_close, /* close */ + tcp_v6_connect, /* connect */ + tcp_accept, /* accept */ + NULL, /* retransmit */ + tcp_write_wakeup, /* write_wakeup */ + tcp_read_wakeup, /* read_wakeup */ + tcp_poll, /* poll */ + tcp_ioctl, /* ioctl */ + tcp_v6_init_sock, /* init */ + tcp_v6_destroy_sock, /* destroy */ + tcp_shutdown, /* shutdown */ + tcp_setsockopt, /* setsockopt */ + tcp_getsockopt, /* getsockopt */ + tcp_v6_sendmsg, /* sendmsg */ + tcp_recvmsg, /* recvmsg */ + NULL, /* bind */ + tcp_v6_do_rcv, /* backlog_rcv */ + tcp_v6_hash, /* hash */ + tcp_v6_unhash, /* unhash */ + tcp_v6_get_port, /* get_port */ + 128, /* max_header */ + 0, /* retransmits */ + "TCPv6", /* name */ + 0, /* inuse */ + 0 /* highestinuse */ +}; + +static struct inet6_protocol tcpv6_protocol = +{ + tcp_v6_rcv, /* TCP handler */ + tcp_v6_err, /* TCP error control */ + NULL, /* next */ + IPPROTO_TCP, /* protocol ID */ + 0, /* copy */ + NULL, /* data */ + "TCPv6" /* name */ +}; + +__initfunc(void tcpv6_init(void)) +{ + /* register inet6 protocol */ + inet6_add_protocol(&tcpv6_protocol); +} diff --git a/pfinet/linux-src/net/ipv6/udp_ipv6.c b/pfinet/linux-src/net/ipv6/udp_ipv6.c new file mode 100644 index 00000000..377f5751 --- /dev/null +++ b/pfinet/linux-src/net/ipv6/udp_ipv6.c @@ -0,0 +1,926 @@ +/* + * UDP over IPv6 + * Linux INET6 implementation + * + * Authors: + * Pedro Roque <roque@di.fc.ul.pt> + * + * Based on linux/ipv4/udp.c + * + * $Id: udp_ipv6.c,v 1.1 2007/10/08 21:12:31 stesie Exp $ + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/config.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/sockios.h> +#include <linux/sched.h> +#include <linux/net.h> +#include <linux/in6.h> +#include <linux/netdevice.h> +#include <linux/if_arp.h> +#include <linux/ipv6.h> +#include <linux/icmpv6.h> +#include <linux/init.h> +#include <asm/uaccess.h> + +#include <net/sock.h> +#include <net/snmp.h> + +#include <net/ipv6.h> +#include <net/ndisc.h> +#include <net/protocol.h> +#include <net/transp_v6.h> +#include <net/ip6_route.h> +#include <net/addrconf.h> +#include <net/ip.h> +#include <net/udp.h> + +#include <net/checksum.h> + +struct udp_mib udp_stats_in6; + +/* Grrr, addr_type already calculated by caller, but I don't want + * to add some silly "cookie" argument to this method just for that. + */ +static int udp_v6_get_port(struct sock *sk, unsigned short snum) +{ + SOCKHASH_LOCK(); + if (snum == 0) { + int best_size_so_far, best, result, i; + + if (udp_port_rover > sysctl_local_port_range[1] || + udp_port_rover < sysctl_local_port_range[0]) + udp_port_rover = sysctl_local_port_range[0]; + best_size_so_far = 32767; + best = result = udp_port_rover; + for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) { + struct sock *sk; + int size; + + sk = udp_hash[result & (UDP_HTABLE_SIZE - 1)]; + if (!sk) { + if (result > sysctl_local_port_range[1]) + result = sysctl_local_port_range[0] + + ((result - sysctl_local_port_range[0]) & + (UDP_HTABLE_SIZE - 1)); + goto gotit; + } + size = 0; + do { + if (++size >= best_size_so_far) + goto next; + } while ((sk = sk->next) != NULL); + best_size_so_far = size; + best = result; + next: + } + result = best; + for(;; result += UDP_HTABLE_SIZE) { + if (result > sysctl_local_port_range[1]) + result = sysctl_local_port_range[0] + + ((result - sysctl_local_port_range[0]) & + (UDP_HTABLE_SIZE - 1)); + if (!udp_lport_inuse(result)) + break; + } +gotit: + udp_port_rover = snum = result; + } else { + struct sock *sk2; + int addr_type = ipv6_addr_type(&sk->net_pinfo.af_inet6.rcv_saddr); + + for (sk2 = udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; + sk2 != NULL; + sk2 = sk2->next) { + if (sk2->num == snum && + sk2 != sk && + sk2->bound_dev_if == sk->bound_dev_if && + (!sk2->rcv_saddr || + addr_type == IPV6_ADDR_ANY || + !ipv6_addr_cmp(&sk->net_pinfo.af_inet6.rcv_saddr, + &sk2->net_pinfo.af_inet6.rcv_saddr)) && + (!sk2->reuse || !sk->reuse)) + goto fail; + } + } + + sk->num = snum; + SOCKHASH_UNLOCK(); + return 0; + +fail: + SOCKHASH_UNLOCK(); + return 1; +} + +static void udp_v6_hash(struct sock *sk) +{ + struct sock **skp = &udp_hash[sk->num & (UDP_HTABLE_SIZE - 1)]; + + SOCKHASH_LOCK(); + if ((sk->next = *skp) != NULL) + (*skp)->pprev = &sk->next; + *skp = sk; + sk->pprev = skp; + SOCKHASH_UNLOCK(); +} + +static void udp_v6_unhash(struct sock *sk) +{ + SOCKHASH_LOCK(); + if (sk->pprev) { + if (sk->next) + sk->next->pprev = sk->pprev; + *sk->pprev = sk->next; + sk->pprev = NULL; + } + SOCKHASH_UNLOCK(); +} + +static struct sock *udp_v6_lookup(struct in6_addr *saddr, u16 sport, + struct in6_addr *daddr, u16 dport, int dif) +{ + struct sock *sk, *result = NULL; + unsigned short hnum = ntohs(dport); + int badness = -1; + + for(sk = udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]; sk != NULL; sk = sk->next) { + if((sk->num == hnum) && + (sk->family == PF_INET6) && + !(sk->dead && (sk->state == TCP_CLOSE))) { + struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6; + int score = 0; + if(sk->dport) { + if(sk->dport != sport) + continue; + score++; + } + if(!ipv6_addr_any(&np->rcv_saddr)) { + if(ipv6_addr_cmp(&np->rcv_saddr, daddr)) + continue; + score++; + } + if(!ipv6_addr_any(&np->daddr)) { + if(ipv6_addr_cmp(&np->daddr, saddr)) + continue; + score++; + } + if(sk->bound_dev_if) { + if(sk->bound_dev_if != dif) + continue; + score++; + } + if(score == 4) { + result = sk; + break; + } else if(score > badness) { + result = sk; + badness = score; + } + } + } + return result; +} + +/* + * + */ + +int udpv6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) +{ + struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; + struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6; + struct in6_addr *daddr; + struct in6_addr saddr; + struct dst_entry *dst; + struct flowi fl; + struct ip6_flowlabel *flowlabel = NULL; + int addr_type; + int err; + + if (usin->sin6_family == AF_INET) { + err = udp_connect(sk, uaddr, addr_len); + goto ipv4_connected; + } + + if (addr_len < sizeof(*usin)) + return(-EINVAL); + + if (usin->sin6_family && usin->sin6_family != AF_INET6) + return(-EAFNOSUPPORT); + + fl.fl6_flowlabel = 0; + if (np->sndflow) { + fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; + if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) { + flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); + if (flowlabel == NULL) + return -EINVAL; + ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst); + } + } + + addr_type = ipv6_addr_type(&usin->sin6_addr); + + if (addr_type == IPV6_ADDR_ANY) { + /* + * connect to self + */ + usin->sin6_addr.s6_addr[15] = 0x01; + } + + daddr = &usin->sin6_addr; + + if (addr_type == IPV6_ADDR_MAPPED) { + struct sockaddr_in sin; + + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = daddr->s6_addr32[3]; + sin.sin_port = usin->sin6_port; + + err = udp_connect(sk, (struct sockaddr*) &sin, sizeof(sin)); + +ipv4_connected: + if (err < 0) + return err; + + ipv6_addr_set(&np->daddr, 0, 0, + __constant_htonl(0x0000ffff), + sk->daddr); + + if(ipv6_addr_any(&np->saddr)) { + ipv6_addr_set(&np->saddr, 0, 0, + __constant_htonl(0x0000ffff), + sk->saddr); + + } + + if(ipv6_addr_any(&np->rcv_saddr)) { + ipv6_addr_set(&np->rcv_saddr, 0, 0, + __constant_htonl(0x0000ffff), + sk->rcv_saddr); + } + return 0; + } + + ipv6_addr_copy(&np->daddr, daddr); + np->flow_label = fl.fl6_flowlabel; + + sk->dport = usin->sin6_port; + + /* + * Check for a route to destination an obtain the + * destination cache for it. + */ + + fl.proto = IPPROTO_UDP; + fl.fl6_dst = &np->daddr; + fl.fl6_src = &saddr; + fl.oif = sk->bound_dev_if; + fl.uli_u.ports.dport = sk->dport; + fl.uli_u.ports.sport = sk->sport; + + if (flowlabel) { + if (flowlabel->opt && flowlabel->opt->srcrt) { + struct rt0_hdr *rt0 = (struct rt0_hdr *) flowlabel->opt->srcrt; + fl.fl6_dst = rt0->addr; + } + } else if (np->opt && np->opt->srcrt) { + struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt; + fl.fl6_dst = rt0->addr; + } + + dst = ip6_route_output(sk, &fl); + + if ((err = dst->error) != 0) { + dst_release(dst); + fl6_sock_release(flowlabel); + return err; + } + + ip6_dst_store(sk, dst, fl.fl6_dst); + + /* get the source adddress used in the apropriate device */ + + err = ipv6_get_saddr(dst, daddr, &saddr); + + if (err == 0) { + if(ipv6_addr_any(&np->saddr)) + ipv6_addr_copy(&np->saddr, &saddr); + + if(ipv6_addr_any(&np->rcv_saddr)) { + ipv6_addr_copy(&np->rcv_saddr, &saddr); + sk->rcv_saddr = 0xffffffff; + } + sk->state = TCP_ESTABLISHED; + } + fl6_sock_release(flowlabel); + + return err; +} + +static void udpv6_close(struct sock *sk, long timeout) +{ + /* See for explanation: raw_close in ipv4/raw.c */ + sk->state = TCP_CLOSE; + udp_v6_unhash(sk); + sk->dead = 1; + destroy_sock(sk); +} + +#ifndef HAVE_CSUM_COPY_USER +#undef CONFIG_UDP_DELAY_CSUM +#endif + +/* + * This should be easy, if there is something there we + * return it, otherwise we block. + */ + +int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, int len, + int noblock, int flags, int *addr_len) +{ + struct sk_buff *skb; + int copied, err; + + if (addr_len) + *addr_len=sizeof(struct sockaddr_in6); + + if (flags & MSG_ERRQUEUE) + return ipv6_recv_error(sk, msg, len); + + skb = skb_recv_datagram(sk, flags, noblock, &err); + if (!skb) + goto out; + + copied = skb->len - sizeof(struct udphdr); + if (copied > len) { + copied = len; + msg->msg_flags |= MSG_TRUNC; + } + +#ifndef CONFIG_UDP_DELAY_CSUM + err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), + msg->msg_iov, copied); +#else + if (skb->ip_summed==CHECKSUM_UNNECESSARY) { + err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov, + copied); + } else if (copied > msg->msg_iov[0].iov_len || (msg->msg_flags&MSG_TRUNC)) { + if ((unsigned short)csum_fold(csum_partial(skb->h.raw, skb->len, skb->csum))) { + /* Error for blocking case is chosen to masquerade + as some normal condition. + */ + err = (flags&MSG_DONTWAIT) ? -EAGAIN : -EHOSTUNREACH; + udp_stats_in6.UdpInErrors++; + goto out_free; + } + err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov, + copied); + } else { + unsigned int csum = csum_partial(skb->h.raw, sizeof(struct udphdr), skb->csum); + + err = 0; + csum = csum_and_copy_to_user((char*)&skb->h.uh[1], msg->msg_iov[0].iov_base, copied, csum, &err); + if (err) + goto out_free; + if ((unsigned short)csum_fold(csum)) { + /* Error for blocking case is chosen to masquerade + as some normal condition. + */ + err = (flags&MSG_DONTWAIT) ? -EAGAIN : -EHOSTUNREACH; + udp_stats_in6.UdpInErrors++; + goto out_free; + } + } +#endif + if (err) + goto out_free; + + sk->stamp=skb->stamp; + + /* Copy the address. */ + if (msg->msg_name) { + struct sockaddr_in6 *sin6; + + sin6 = (struct sockaddr_in6 *) msg->msg_name; + sin6->sin6_family = AF_INET6; + sin6->sin6_port = skb->h.uh->source; + sin6->sin6_flowinfo = 0; + + if (skb->protocol == __constant_htons(ETH_P_IP)) { + ipv6_addr_set(&sin6->sin6_addr, 0, 0, + __constant_htonl(0xffff), skb->nh.iph->saddr); + if (sk->ip_cmsg_flags) + ip_cmsg_recv(msg, skb); + } else { + memcpy(&sin6->sin6_addr, &skb->nh.ipv6h->saddr, + sizeof(struct in6_addr)); + + if (sk->net_pinfo.af_inet6.rxopt.all) + datagram_recv_ctl(sk, msg, skb); + } + } + err = copied; + +out_free: + skb_free_datagram(sk, skb); +out: + return err; +} + +void udpv6_err(struct sk_buff *skb, struct ipv6hdr *hdr, + struct inet6_skb_parm *opt, + int type, int code, unsigned char *buff, __u32 info) +{ + struct device *dev = skb->dev; + struct in6_addr *saddr = &hdr->saddr; + struct in6_addr *daddr = &hdr->daddr; + struct sock *sk; + struct udphdr *uh; + int err; + + if (buff + sizeof(struct udphdr) > skb->tail) + return; + + uh = (struct udphdr *) buff; + + sk = udp_v6_lookup(daddr, uh->dest, saddr, uh->source, dev->ifindex); + + if (sk == NULL) + return; + + if (!icmpv6_err_convert(type, code, &err) && + !sk->net_pinfo.af_inet6.recverr) + return; + + if (sk->bsdism && sk->state!=TCP_ESTABLISHED) + return; + + if (sk->net_pinfo.af_inet6.recverr) + ipv6_icmp_error(sk, skb, err, uh->dest, ntohl(info), (u8 *)(uh+1)); + + sk->err = err; + sk->error_report(sk); +} + +static inline int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) +{ +#if defined(CONFIG_FILTER) && defined(CONFIG_UDP_DELAY_CSUM) + if (sk->filter && skb->ip_summed != CHECKSUM_UNNECESSARY) { + if ((unsigned short)csum_fold(csum_partial(skb->h.raw, skb->len, skb->csum))) { + udp_stats_in6.UdpInErrors++; + ipv6_statistics.Ip6InDiscards++; + kfree_skb(skb); + return 0; + } + skb->ip_summed = CHECKSUM_UNNECESSARY; + } +#endif + if (sock_queue_rcv_skb(sk,skb)<0) { + udp_stats_in6.UdpInErrors++; + ipv6_statistics.Ip6InDiscards++; + kfree_skb(skb); + return 0; + } + ipv6_statistics.Ip6InDelivers++; + udp_stats_in6.UdpInDatagrams++; + return 0; +} + +static __inline__ int inet6_mc_check(struct sock *sk, struct in6_addr *addr) +{ + struct ipv6_mc_socklist *mc; + + for (mc = sk->net_pinfo.af_inet6.ipv6_mc_list; mc; mc=mc->next) { + if (ipv6_addr_cmp(&mc->addr, addr) == 0) + return 1; + } + + return 0; +} + +static struct sock *udp_v6_mcast_next(struct sock *sk, + u16 loc_port, struct in6_addr *loc_addr, + u16 rmt_port, struct in6_addr *rmt_addr, + int dif) +{ + struct sock *s = sk; + unsigned short num = ntohs(loc_port); + for(; s; s = s->next) { + if((s->num == num) && + !(s->dead && (s->state == TCP_CLOSE))) { + struct ipv6_pinfo *np = &s->net_pinfo.af_inet6; + if(s->dport) { + if(s->dport != rmt_port) + continue; + } + if(!ipv6_addr_any(&np->daddr) && + ipv6_addr_cmp(&np->daddr, rmt_addr)) + continue; + + if (s->bound_dev_if && s->bound_dev_if != dif) + continue; + + if(!ipv6_addr_any(&np->rcv_saddr)) { + if(ipv6_addr_cmp(&np->rcv_saddr, loc_addr) == 0) + return s; + } + if(!inet6_mc_check(s, loc_addr)) + continue; + return s; + } + } + return NULL; +} + +/* + * Note: called only from the BH handler context, + * so we don't need to lock the hashes. + */ +static void udpv6_mcast_deliver(struct udphdr *uh, + struct in6_addr *saddr, struct in6_addr *daddr, + struct sk_buff *skb) +{ + struct sock *sk, *sk2; + struct sk_buff *buff; + int dif; + + sk = udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]; + dif = skb->dev->ifindex; + sk = udp_v6_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); + if (!sk) + goto free_skb; + + buff = NULL; + sk2 = sk; + while((sk2 = udp_v6_mcast_next(sk2->next, uh->dest, saddr, + uh->source, daddr, dif))) { + if (!buff) { + buff = skb_clone(skb, GFP_ATOMIC); + if (!buff) + continue; + } + if (sock_queue_rcv_skb(sk2, buff) >= 0) + buff = NULL; + } + if (buff) + kfree_skb(buff); + if (sock_queue_rcv_skb(sk, skb) < 0) { +free_skb: + kfree_skb(skb); + } +} + +int udpv6_rcv(struct sk_buff *skb, unsigned long len) +{ + struct sock *sk; + struct udphdr *uh; + struct device *dev = skb->dev; + struct in6_addr *saddr = &skb->nh.ipv6h->saddr; + struct in6_addr *daddr = &skb->nh.ipv6h->daddr; + u32 ulen; + + uh = skb->h.uh; + __skb_pull(skb, skb->h.raw - skb->data); + + ulen = ntohs(uh->len); + + /* Check for jumbo payload */ + if (ulen == 0 && skb->nh.ipv6h->payload_len == 0) + ulen = len; + + if (ulen > len || len < sizeof(*uh)) { + if (net_ratelimit()) + printk(KERN_DEBUG "UDP: short packet: %d/%ld\n", ulen, len); + udp_stats_in6.UdpInErrors++; + kfree_skb(skb); + return(0); + } + + if (uh->check == 0) { + /* IPv6 draft-v2 section 8.1 says that we SHOULD log + this error. Well, it is reasonable. + */ + if (net_ratelimit()) + printk(KERN_INFO "IPv6: udp checksum is 0\n"); + goto discard; + } + + skb_trim(skb, ulen); + +#ifndef CONFIG_UDP_DELAY_CSUM + switch (skb->ip_summed) { + case CHECKSUM_NONE: + skb->csum = csum_partial((char*)uh, ulen, 0); + case CHECKSUM_HW: + if (csum_ipv6_magic(saddr, daddr, ulen, IPPROTO_UDP, skb->csum)) { + printk(KERN_DEBUG "IPv6: udp checksum error\n"); + goto discard; + } + }; +#else + if (skb->ip_summed==CHECKSUM_HW) { + if (csum_ipv6_magic(saddr, daddr, ulen, IPPROTO_UDP, skb->csum)) + goto discard; + skb->ip_summed = CHECKSUM_UNNECESSARY; + } else if (skb->ip_summed != CHECKSUM_UNNECESSARY) + skb->csum = ~csum_ipv6_magic(saddr, daddr, ulen, IPPROTO_UDP, 0); +#endif + + len = ulen; + + /* + * Multicast receive code + */ + if (ipv6_addr_type(daddr) & IPV6_ADDR_MULTICAST) { + udpv6_mcast_deliver(uh, saddr, daddr, skb); + return 0; + } + + /* Unicast */ + + /* + * check socket cache ... must talk to Alan about his plans + * for sock caches... i'll skip this for now. + */ + + sk = udp_v6_lookup(saddr, uh->source, daddr, uh->dest, dev->ifindex); + + if (sk == NULL) { +#ifdef CONFIG_UDP_DELAY_CSUM + if (skb->ip_summed != CHECKSUM_UNNECESSARY && + (unsigned short)csum_fold(csum_partial((char*)uh, len, skb->csum))) + goto discard; +#endif + udp_stats_in6.UdpNoPorts++; + + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0, dev); + + kfree_skb(skb); + return(0); + } + + /* deliver */ + + udpv6_queue_rcv_skb(sk, skb); + + return(0); + +discard: + udp_stats_in6.UdpInErrors++; + kfree_skb(skb); + return(0); +} + +/* + * Sending + */ + +struct udpv6fakehdr +{ + struct udphdr uh; + struct iovec *iov; + __u32 wcheck; + __u32 pl_len; + struct in6_addr *daddr; +}; + +/* + * with checksum + */ + +static int udpv6_getfrag(const void *data, struct in6_addr *addr, + char *buff, unsigned int offset, unsigned int len) +{ + struct udpv6fakehdr *udh = (struct udpv6fakehdr *) data; + char *dst; + int final = 0; + int clen = len; + + dst = buff; + + if (offset) { + offset -= sizeof(struct udphdr); + } else { + dst += sizeof(struct udphdr); + final = 1; + clen -= sizeof(struct udphdr); + } + + if (csum_partial_copy_fromiovecend(dst, udh->iov, offset, + clen, &udh->wcheck)) + return -EFAULT; + + if (final) { + struct in6_addr *daddr; + + udh->wcheck = csum_partial((char *)udh, sizeof(struct udphdr), + udh->wcheck); + + if (udh->daddr) { + daddr = udh->daddr; + } else { + /* + * use packet destination address + * this should improve cache locality + */ + daddr = addr + 1; + } + udh->uh.check = csum_ipv6_magic(addr, daddr, + udh->pl_len, IPPROTO_UDP, + udh->wcheck); + if (udh->uh.check == 0) + udh->uh.check = -1; + + memcpy(buff, udh, sizeof(struct udphdr)); + } + return 0; +} + +static int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, int ulen) +{ + struct ipv6_txoptions opt_space; + struct udpv6fakehdr udh; + struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6; + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) msg->msg_name; + struct ipv6_txoptions *opt = NULL; + struct ip6_flowlabel *flowlabel = NULL; + struct flowi fl; + int addr_len = msg->msg_namelen; + struct in6_addr *daddr; + int len = ulen + sizeof(struct udphdr); + int addr_type; + int hlimit = -1; + + int err; + + /* Rough check on arithmetic overflow, + better check is made in ip6_build_xmit + */ + if (ulen < 0 || ulen > INT_MAX - sizeof(struct udphdr)) + return -EMSGSIZE; + + if (msg->msg_flags & ~(MSG_DONTROUTE|MSG_DONTWAIT)) + return(-EINVAL); + + fl.fl6_flowlabel = 0; + + if (sin6) { + if (sin6->sin6_family == AF_INET) + return udp_sendmsg(sk, msg, ulen); + + if (addr_len < sizeof(*sin6)) + return(-EINVAL); + + if (sin6->sin6_family && sin6->sin6_family != AF_INET6) + return(-EINVAL); + + if (sin6->sin6_port == 0) + return(-EINVAL); + + udh.uh.dest = sin6->sin6_port; + daddr = &sin6->sin6_addr; + + if (np->sndflow) { + fl.fl6_flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; + if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) { + flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); + if (flowlabel == NULL) + return -EINVAL; + daddr = &flowlabel->dst; + } + } + + /* Otherwise it will be difficult to maintain sk->dst_cache. */ + if (sk->state == TCP_ESTABLISHED && + !ipv6_addr_cmp(daddr, &sk->net_pinfo.af_inet6.daddr)) + daddr = &sk->net_pinfo.af_inet6.daddr; + } else { + if (sk->state != TCP_ESTABLISHED) + return(-ENOTCONN); + + udh.uh.dest = sk->dport; + daddr = &sk->net_pinfo.af_inet6.daddr; + fl.fl6_flowlabel = np->flow_label; + } + + addr_type = ipv6_addr_type(daddr); + + if (addr_type == IPV6_ADDR_MAPPED) { + struct sockaddr_in sin; + + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = daddr->s6_addr32[3]; + sin.sin_port = udh.uh.dest; + msg->msg_name = (struct sockaddr *)(&sin); + msg->msg_namelen = sizeof(sin); + fl6_sock_release(flowlabel); + + return udp_sendmsg(sk, msg, ulen); + } + + udh.daddr = NULL; + fl.oif = sk->bound_dev_if; + fl.fl6_src = NULL; + + if (msg->msg_controllen) { + opt = &opt_space; + memset(opt, 0, sizeof(struct ipv6_txoptions)); + + err = datagram_send_ctl(msg, &fl, opt, &hlimit); + if (err < 0) { + fl6_sock_release(flowlabel); + return err; + } + if ((fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { + flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); + if (flowlabel == NULL) + return -EINVAL; + } + if (!(opt->opt_nflen|opt->opt_flen)) + opt = NULL; + } + if (opt == NULL) + opt = np->opt; + if (flowlabel) + opt = fl6_merge_options(&opt_space, flowlabel, opt); + if (opt && opt->srcrt) + udh.daddr = daddr; + + udh.uh.source = sk->sport; + udh.uh.len = len < 0x10000 ? htons(len) : 0; + udh.uh.check = 0; + udh.iov = msg->msg_iov; + udh.wcheck = 0; + udh.pl_len = len; + + fl.proto = IPPROTO_UDP; + fl.fl6_dst = daddr; + fl.uli_u.ports.dport = udh.uh.dest; + fl.uli_u.ports.sport = udh.uh.source; + + err = ip6_build_xmit(sk, udpv6_getfrag, &udh, &fl, len, opt, hlimit, + msg->msg_flags); + + fl6_sock_release(flowlabel); + + if (err < 0) + return err; + + udp_stats_in6.UdpOutDatagrams++; + return ulen; +} + +static struct inet6_protocol udpv6_protocol = +{ + udpv6_rcv, /* UDP handler */ + udpv6_err, /* UDP error control */ + NULL, /* next */ + IPPROTO_UDP, /* protocol ID */ + 0, /* copy */ + NULL, /* data */ + "UDPv6" /* name */ +}; + +struct proto udpv6_prot = { + (struct sock *)&udpv6_prot, /* sklist_next */ + (struct sock *)&udpv6_prot, /* sklist_prev */ + udpv6_close, /* close */ + udpv6_connect, /* connect */ + NULL, /* accept */ + NULL, /* retransmit */ + NULL, /* write_wakeup */ + NULL, /* read_wakeup */ + datagram_poll, /* poll */ + udp_ioctl, /* ioctl */ + NULL, /* init */ + inet6_destroy_sock, /* destroy */ + NULL, /* shutdown */ + ipv6_setsockopt, /* setsockopt */ + ipv6_getsockopt, /* getsockopt */ + udpv6_sendmsg, /* sendmsg */ + udpv6_recvmsg, /* recvmsg */ + NULL, /* bind */ + udpv6_queue_rcv_skb, /* backlog_rcv */ + udp_v6_hash, /* hash */ + udp_v6_unhash, /* unhash */ + udp_v6_get_port, /* get_port */ + 128, /* max_header */ + 0, /* retransmits */ + "UDP", /* name */ + 0, /* inuse */ + 0 /* highestinuse */ +}; + +void __init udpv6_init(void) +{ + inet6_add_protocol(&udpv6_protocol); +} |