diff options
author | Samuel Thibault <samuel.thibault@ens-lyon.org> | 2013-07-27 22:07:53 +0000 |
---|---|---|
committer | Samuel Thibault <samuel.thibault@ens-lyon.org> | 2013-07-27 22:07:53 +0000 |
commit | 4fbe7358c7747a9165f776eb19addbb9baf7def2 (patch) | |
tree | bc7076b4f6d10c2cc2942539bb666e50f0b66954 /libdde_linux26/lib/src/net | |
parent | 21adb5284111190057db245cfc2b54091920c373 (diff) |
rename libdde_linux26 into libdde-linux26 to make dpkg-source happy
Diffstat (limited to 'libdde_linux26/lib/src/net')
-rw-r--r-- | libdde_linux26/lib/src/net/core/dev.c | 5286 | ||||
-rw-r--r-- | libdde_linux26/lib/src/net/core/link_watch.c | 238 | ||||
-rw-r--r-- | libdde_linux26/lib/src/net/core/net_namespace.c | 511 | ||||
-rw-r--r-- | libdde_linux26/lib/src/net/core/rtnetlink.c | 1436 | ||||
-rw-r--r-- | libdde_linux26/lib/src/net/core/skbuff.c | 2956 | ||||
-rw-r--r-- | libdde_linux26/lib/src/net/core/utils.c | 309 | ||||
-rw-r--r-- | libdde_linux26/lib/src/net/netlink/af_netlink.c | 2013 | ||||
-rw-r--r-- | libdde_linux26/lib/src/net/sched/sch_generic.c | 749 |
8 files changed, 0 insertions, 13498 deletions
diff --git a/libdde_linux26/lib/src/net/core/dev.c b/libdde_linux26/lib/src/net/core/dev.c deleted file mode 100644 index cf036525..00000000 --- a/libdde_linux26/lib/src/net/core/dev.c +++ /dev/null @@ -1,5286 +0,0 @@ -/* - * NET3 Protocol independent device support routines. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * Derived from the non IP parts of dev.c 1.0.19 - * Authors: Ross Biro - * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> - * Mark Evans, <evansmp@uhura.aston.ac.uk> - * - * Additional Authors: - * Florian la Roche <rzsfl@rz.uni-sb.de> - * Alan Cox <gw4pts@gw4pts.ampr.org> - * David Hinds <dahinds@users.sourceforge.net> - * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> - * Adam Sulmicki <adam@cfar.umd.edu> - * Pekka Riikonen <priikone@poesidon.pspt.fi> - * - * Changes: - * D.J. Barrow : Fixed bug where dev->refcnt gets set - * to 2 if register_netdev gets called - * before net_dev_init & also removed a - * few lines of code in the process. - * Alan Cox : device private ioctl copies fields back. - * Alan Cox : Transmit queue code does relevant - * stunts to keep the queue safe. - * Alan Cox : Fixed double lock. - * Alan Cox : Fixed promisc NULL pointer trap - * ???????? : Support the full private ioctl range - * Alan Cox : Moved ioctl permission check into - * drivers - * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI - * Alan Cox : 100 backlog just doesn't cut it when - * you start doing multicast video 8) - * Alan Cox : Rewrote net_bh and list manager. - * Alan Cox : Fix ETH_P_ALL echoback lengths. - * Alan Cox : Took out transmit every packet pass - * Saved a few bytes in the ioctl handler - * Alan Cox : Network driver sets packet type before - * calling netif_rx. Saves a function - * call a packet. - * Alan Cox : Hashed net_bh() - * Richard Kooijman: Timestamp fixes. - * Alan Cox : Wrong field in SIOCGIFDSTADDR - * Alan Cox : Device lock protection. - * Alan Cox : Fixed nasty side effect of device close - * changes. - * Rudi Cilibrasi : Pass the right thing to - * set_mac_address() - * Dave Miller : 32bit quantity for the device lock to - * make it work out on a Sparc. - * Bjorn Ekwall : Added KERNELD hack. - * Alan Cox : Cleaned up the backlog initialise. - * Craig Metz : SIOCGIFCONF fix if space for under - * 1 device. - * Thomas Bogendoerfer : Return ENODEV for dev_open, if there - * is no device open function. - * Andi Kleen : Fix error reporting for SIOCGIFCONF - * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF - * Cyrus Durgin : Cleaned for KMOD - * Adam Sulmicki : Bug Fix : Network Device Unload - * A network device unload needs to purge - * the backlog queue. - * Paul Rusty Russell : SIOCSIFNAME - * Pekka Riikonen : Netdev boot-time settings code - * Andrew Morton : Make unregister_netdevice wait - * indefinitely on dev->refcnt - * J Hadi Salim : - Backlog queue sampling - * - netif_rx() feedback - */ - -#ifdef DDE_LINUX -#include "local.h" -#include <dde26_net.h> -#endif - -#include <asm/uaccess.h> -#include <asm/system.h> -#include <linux/bitops.h> -#include <linux/capability.h> -#include <linux/cpu.h> -#include <linux/types.h> -#include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/mutex.h> -#include <linux/string.h> -#include <linux/mm.h> -#include <linux/socket.h> -#include <linux/sockios.h> -#include <linux/errno.h> -#include <linux/interrupt.h> -#include <linux/if_ether.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/ethtool.h> -#include <linux/notifier.h> -#include <linux/skbuff.h> -#include <net/net_namespace.h> -#include <net/sock.h> -#include <linux/rtnetlink.h> -#include <linux/proc_fs.h> -#include <linux/seq_file.h> -#include <linux/stat.h> -#include <linux/if_bridge.h> -#include <linux/if_macvlan.h> -#include <net/dst.h> -#include <net/pkt_sched.h> -#include <net/checksum.h> -#include <linux/highmem.h> -#include <linux/init.h> -#include <linux/kmod.h> -#include <linux/module.h> -#include <linux/netpoll.h> -#include <linux/rcupdate.h> -#include <linux/delay.h> -#include <net/wext.h> -#include <net/iw_handler.h> -#include <asm/current.h> -#include <linux/audit.h> -#include <linux/dmaengine.h> -#include <linux/err.h> -#include <linux/ctype.h> -#include <linux/if_arp.h> -#include <linux/if_vlan.h> -#include <linux/ip.h> -#include <net/ip.h> -#include <linux/ipv6.h> -#include <linux/in.h> -#include <linux/jhash.h> -#include <linux/random.h> - -#include "net-sysfs.h" - -#include <ddekit/timer.h> - -/* Instead of increasing this, you should create a hash table. */ -#define MAX_GRO_SKBS 8 - -/* This should be increased if a protocol with a bigger head is added. */ -#define GRO_MAX_HEAD (MAX_HEADER + 128) - -/* - * The list of packet types we will receive (as opposed to discard) - * and the routines to invoke. - * - * Why 16. Because with 16 the only overlap we get on a hash of the - * low nibble of the protocol value is RARP/SNAP/X.25. - * - * NOTE: That is no longer true with the addition of VLAN tags. Not - * sure which should go first, but I bet it won't make much - * difference if we are running VLANs. The good news is that - * this protocol won't be in the list unless compiled in, so - * the average user (w/out VLANs) will not be adversely affected. - * --BLG - * - * 0800 IP - * 8100 802.1Q VLAN - * 0001 802.3 - * 0002 AX.25 - * 0004 802.2 - * 8035 RARP - * 0005 SNAP - * 0805 X.25 - * 0806 ARP - * 8137 IPX - * 0009 Localtalk - * 86DD IPv6 - */ - -#define PTYPE_HASH_SIZE (16) -#define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1) - -static DEFINE_SPINLOCK(ptype_lock); -static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; -static struct list_head ptype_all __read_mostly; /* Taps */ - -/* - * The @dev_base_head list is protected by @dev_base_lock and the rtnl - * semaphore. - * - * Pure readers hold dev_base_lock for reading. - * - * Writers must hold the rtnl semaphore while they loop through the - * dev_base_head list, and hold dev_base_lock for writing when they do the - * actual updates. This allows pure readers to access the list even - * while a writer is preparing to update it. - * - * To put it another way, dev_base_lock is held for writing only to - * protect against pure readers; the rtnl semaphore provides the - * protection against other writers. - * - * See, for example usages, register_netdevice() and - * unregister_netdevice(), which must be called with the rtnl - * semaphore held. - */ -DEFINE_RWLOCK(dev_base_lock); - -EXPORT_SYMBOL(dev_base_lock); - -#define NETDEV_HASHBITS 8 -#define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS) - -static inline struct hlist_head *dev_name_hash(struct net *net, const char *name) -{ - unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ)); - return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)]; -} - -static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) -{ - return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)]; -} - -/* Device list insertion */ -static int list_netdevice(struct net_device *dev) -{ - struct net *net = dev_net(dev); - - ASSERT_RTNL(); - - write_lock_bh(&dev_base_lock); - list_add_tail(&dev->dev_list, &net->dev_base_head); - hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name)); - hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex)); - write_unlock_bh(&dev_base_lock); - return 0; -} - -/* Device list removal */ -static void unlist_netdevice(struct net_device *dev) -{ - ASSERT_RTNL(); - - /* Unlink dev from the device chain */ - write_lock_bh(&dev_base_lock); - list_del(&dev->dev_list); - hlist_del(&dev->name_hlist); - hlist_del(&dev->index_hlist); - write_unlock_bh(&dev_base_lock); -} - -/* - * Our notifier list - */ - -static RAW_NOTIFIER_HEAD(netdev_chain); - -/* - * Device drivers call our routines to queue packets here. We empty the - * queue in the local softnet handler. - */ - -DEFINE_PER_CPU(struct softnet_data, softnet_data); - -#ifdef CONFIG_LOCKDEP -/* - * register_netdevice() inits txq->_xmit_lock and sets lockdep class - * according to dev->type - */ -static const unsigned short netdev_lock_type[] = - {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25, - ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET, - ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM, - ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP, - ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD, - ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25, - ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP, - ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD, - ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI, - ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE, - ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET, - ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL, - ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211, - ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, - ARPHRD_PHONET_PIPE, ARPHRD_VOID, ARPHRD_NONE}; - -static const char *netdev_lock_name[] = - {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25", - "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET", - "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM", - "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP", - "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD", - "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25", - "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP", - "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD", - "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI", - "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE", - "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET", - "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL", - "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211", - "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", - "_xmit_PHONET_PIPE", "_xmit_VOID", "_xmit_NONE"}; - -static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)]; -static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)]; - -static inline unsigned short netdev_lock_pos(unsigned short dev_type) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++) - if (netdev_lock_type[i] == dev_type) - return i; - /* the last key is used by default */ - return ARRAY_SIZE(netdev_lock_type) - 1; -} - -static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock, - unsigned short dev_type) -{ - int i; - - i = netdev_lock_pos(dev_type); - lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i], - netdev_lock_name[i]); -} - -static inline void netdev_set_addr_lockdep_class(struct net_device *dev) -{ - int i; - - i = netdev_lock_pos(dev->type); - lockdep_set_class_and_name(&dev->addr_list_lock, - &netdev_addr_lock_key[i], - netdev_lock_name[i]); -} -#else -static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock, - unsigned short dev_type) -{ -} -static inline void netdev_set_addr_lockdep_class(struct net_device *dev) -{ -} -#endif - -/******************************************************************************* - - Protocol management and registration routines - -*******************************************************************************/ - -/* - * Add a protocol ID to the list. Now that the input handler is - * smarter we can dispense with all the messy stuff that used to be - * here. - * - * BEWARE!!! Protocol handlers, mangling input packets, - * MUST BE last in hash buckets and checking protocol handlers - * MUST start from promiscuous ptype_all chain in net_bh. - * It is true now, do not change it. - * Explanation follows: if protocol handler, mangling packet, will - * be the first on list, it is not able to sense, that packet - * is cloned and should be copied-on-write, so that it will - * change it and subsequent readers will get broken packet. - * --ANK (980803) - */ - -/** - * dev_add_pack - add packet handler - * @pt: packet type declaration - * - * Add a protocol handler to the networking stack. The passed &packet_type - * is linked into kernel lists and may not be freed until it has been - * removed from the kernel lists. - * - * This call does not sleep therefore it can not - * guarantee all CPU's that are in middle of receiving packets - * will see the new packet type (until the next received packet). - */ - -void dev_add_pack(struct packet_type *pt) -{ - int hash; - - spin_lock_bh(&ptype_lock); - if (pt->type == htons(ETH_P_ALL)) - list_add_rcu(&pt->list, &ptype_all); - else { - hash = ntohs(pt->type) & PTYPE_HASH_MASK; - list_add_rcu(&pt->list, &ptype_base[hash]); - } - spin_unlock_bh(&ptype_lock); -} - -/** - * __dev_remove_pack - remove packet handler - * @pt: packet type declaration - * - * Remove a protocol handler that was previously added to the kernel - * protocol handlers by dev_add_pack(). The passed &packet_type is removed - * from the kernel lists and can be freed or reused once this function - * returns. - * - * The packet type might still be in use by receivers - * and must not be freed until after all the CPU's have gone - * through a quiescent state. - */ -void __dev_remove_pack(struct packet_type *pt) -{ - struct list_head *head; - struct packet_type *pt1; - - spin_lock_bh(&ptype_lock); - - if (pt->type == htons(ETH_P_ALL)) - head = &ptype_all; - else - head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK]; - - list_for_each_entry(pt1, head, list) { - if (pt == pt1) { - list_del_rcu(&pt->list); - goto out; - } - } - - printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt); -out: - spin_unlock_bh(&ptype_lock); -} -/** - * dev_remove_pack - remove packet handler - * @pt: packet type declaration - * - * Remove a protocol handler that was previously added to the kernel - * protocol handlers by dev_add_pack(). The passed &packet_type is removed - * from the kernel lists and can be freed or reused once this function - * returns. - * - * This call sleeps to guarantee that no CPU is looking at the packet - * type after return. - */ -void dev_remove_pack(struct packet_type *pt) -{ - __dev_remove_pack(pt); - - synchronize_net(); -} - -/****************************************************************************** - - Device Boot-time Settings Routines - -*******************************************************************************/ - -/* Boot time configuration table */ -static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX]; - -/** - * netdev_boot_setup_add - add new setup entry - * @name: name of the device - * @map: configured settings for the device - * - * Adds new setup entry to the dev_boot_setup list. The function - * returns 0 on error and 1 on success. This is a generic routine to - * all netdevices. - */ -static int netdev_boot_setup_add(char *name, struct ifmap *map) -{ - struct netdev_boot_setup *s; - int i; - - s = dev_boot_setup; - for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { - if (s[i].name[0] == '\0' || s[i].name[0] == ' ') { - memset(s[i].name, 0, sizeof(s[i].name)); - strlcpy(s[i].name, name, IFNAMSIZ); - memcpy(&s[i].map, map, sizeof(s[i].map)); - break; - } - } - - return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1; -} - -/** - * netdev_boot_setup_check - check boot time settings - * @dev: the netdevice - * - * Check boot time settings for the device. - * The found settings are set for the device to be used - * later in the device probing. - * Returns 0 if no settings found, 1 if they are. - */ -int netdev_boot_setup_check(struct net_device *dev) -{ - struct netdev_boot_setup *s = dev_boot_setup; - int i; - - for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { - if (s[i].name[0] != '\0' && s[i].name[0] != ' ' && - !strcmp(dev->name, s[i].name)) { - dev->irq = s[i].map.irq; - dev->base_addr = s[i].map.base_addr; - dev->mem_start = s[i].map.mem_start; - dev->mem_end = s[i].map.mem_end; - return 1; - } - } - return 0; -} - - -/** - * netdev_boot_base - get address from boot time settings - * @prefix: prefix for network device - * @unit: id for network device - * - * Check boot time settings for the base address of device. - * The found settings are set for the device to be used - * later in the device probing. - * Returns 0 if no settings found. - */ -unsigned long netdev_boot_base(const char *prefix, int unit) -{ - const struct netdev_boot_setup *s = dev_boot_setup; - char name[IFNAMSIZ]; - int i; - - sprintf(name, "%s%d", prefix, unit); - - /* - * If device already registered then return base of 1 - * to indicate not to probe for this interface - */ - if (__dev_get_by_name(&init_net, name)) - return 1; - - for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) - if (!strcmp(name, s[i].name)) - return s[i].map.base_addr; - return 0; -} - -#ifndef DDE_LINUX -/* - * Saves at boot time configured settings for any netdevice. - */ -int __init netdev_boot_setup(char *str) -{ - int ints[5]; - struct ifmap map; - - str = get_options(str, ARRAY_SIZE(ints), ints); - if (!str || !*str) - return 0; - - /* Save settings */ - memset(&map, 0, sizeof(map)); - if (ints[0] > 0) - map.irq = ints[1]; - if (ints[0] > 1) - map.base_addr = ints[2]; - if (ints[0] > 2) - map.mem_start = ints[3]; - if (ints[0] > 3) - map.mem_end = ints[4]; - - /* Add new entry to the list */ - return netdev_boot_setup_add(str, &map); -} -#endif - -__setup("netdev=", netdev_boot_setup); - -/******************************************************************************* - - Device Interface Subroutines - -*******************************************************************************/ - -/** - * __dev_get_by_name - find a device by its name - * @net: the applicable net namespace - * @name: name to find - * - * Find an interface by name. Must be called under RTNL semaphore - * or @dev_base_lock. If the name is found a pointer to the device - * is returned. If the name is not found then %NULL is returned. The - * reference counters are not incremented so the caller must be - * careful with locks. - */ - -struct net_device *__dev_get_by_name(struct net *net, const char *name) -{ - struct hlist_node *p; - - hlist_for_each(p, dev_name_hash(net, name)) { - struct net_device *dev - = hlist_entry(p, struct net_device, name_hlist); - if (!strncmp(dev->name, name, IFNAMSIZ)) - return dev; - } - return NULL; -} - -/** - * dev_get_by_name - find a device by its name - * @net: the applicable net namespace - * @name: name to find - * - * Find an interface by name. This can be called from any - * context and does its own locking. The returned handle has - * the usage count incremented and the caller must use dev_put() to - * release it when it is no longer needed. %NULL is returned if no - * matching device is found. - */ - -struct net_device *dev_get_by_name(struct net *net, const char *name) -{ - struct net_device *dev; - - read_lock(&dev_base_lock); - dev = __dev_get_by_name(net, name); - if (dev) - dev_hold(dev); - read_unlock(&dev_base_lock); - return dev; -} - -/** - * __dev_get_by_index - find a device by its ifindex - * @net: the applicable net namespace - * @ifindex: index of device - * - * Search for an interface by index. Returns %NULL if the device - * is not found or a pointer to the device. The device has not - * had its reference counter increased so the caller must be careful - * about locking. The caller must hold either the RTNL semaphore - * or @dev_base_lock. - */ - -struct net_device *__dev_get_by_index(struct net *net, int ifindex) -{ - struct hlist_node *p; - - hlist_for_each(p, dev_index_hash(net, ifindex)) { - struct net_device *dev - = hlist_entry(p, struct net_device, index_hlist); - if (dev->ifindex == ifindex) - return dev; - } - return NULL; -} - - -/** - * dev_get_by_index - find a device by its ifindex - * @net: the applicable net namespace - * @ifindex: index of device - * - * Search for an interface by index. Returns NULL if the device - * is not found or a pointer to the device. The device returned has - * had a reference added and the pointer is safe until the user calls - * dev_put to indicate they have finished with it. - */ - -struct net_device *dev_get_by_index(struct net *net, int ifindex) -{ - struct net_device *dev; - - read_lock(&dev_base_lock); - dev = __dev_get_by_index(net, ifindex); - if (dev) - dev_hold(dev); - read_unlock(&dev_base_lock); - return dev; -} - -/** - * dev_getbyhwaddr - find a device by its hardware address - * @net: the applicable net namespace - * @type: media type of device - * @ha: hardware address - * - * Search for an interface by MAC address. Returns NULL if the device - * is not found or a pointer to the device. The caller must hold the - * rtnl semaphore. The returned device has not had its ref count increased - * and the caller must therefore be careful about locking - * - * BUGS: - * If the API was consistent this would be __dev_get_by_hwaddr - */ - -struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha) -{ - struct net_device *dev; - - ASSERT_RTNL(); - - for_each_netdev(net, dev) - if (dev->type == type && - !memcmp(dev->dev_addr, ha, dev->addr_len)) - return dev; - - return NULL; -} - -EXPORT_SYMBOL(dev_getbyhwaddr); - -struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type) -{ - struct net_device *dev; - - ASSERT_RTNL(); - for_each_netdev(net, dev) - if (dev->type == type) - return dev; - - return NULL; -} - -EXPORT_SYMBOL(__dev_getfirstbyhwtype); - -struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type) -{ - struct net_device *dev; - - rtnl_lock(); - dev = __dev_getfirstbyhwtype(net, type); - if (dev) - dev_hold(dev); - rtnl_unlock(); - return dev; -} - -EXPORT_SYMBOL(dev_getfirstbyhwtype); - -/** - * dev_get_by_flags - find any device with given flags - * @net: the applicable net namespace - * @if_flags: IFF_* values - * @mask: bitmask of bits in if_flags to check - * - * Search for any interface with the given flags. Returns NULL if a device - * is not found or a pointer to the device. The device returned has - * had a reference added and the pointer is safe until the user calls - * dev_put to indicate they have finished with it. - */ - -struct net_device * dev_get_by_flags(struct net *net, unsigned short if_flags, unsigned short mask) -{ - struct net_device *dev, *ret; - - ret = NULL; - read_lock(&dev_base_lock); - for_each_netdev(net, dev) { - if (((dev->flags ^ if_flags) & mask) == 0) { - dev_hold(dev); - ret = dev; - break; - } - } - read_unlock(&dev_base_lock); - return ret; -} - -/** - * dev_valid_name - check if name is okay for network device - * @name: name string - * - * Network device names need to be valid file names to - * to allow sysfs to work. We also disallow any kind of - * whitespace. - */ -int dev_valid_name(const char *name) -{ - if (*name == '\0') - return 0; - if (strlen(name) >= IFNAMSIZ) - return 0; - if (!strcmp(name, ".") || !strcmp(name, "..")) - return 0; - - while (*name) { - if (*name == '/' || isspace(*name)) - return 0; - name++; - } - return 1; -} - -/** - * __dev_alloc_name - allocate a name for a device - * @net: network namespace to allocate the device name in - * @name: name format string - * @buf: scratch buffer and result name string - * - * Passed a format string - eg "lt%d" it will try and find a suitable - * id. It scans list of devices to build up a free map, then chooses - * the first empty slot. The caller must hold the dev_base or rtnl lock - * while allocating the name and adding the device in order to avoid - * duplicates. - * Limited to bits_per_byte * page size devices (ie 32K on most platforms). - * Returns the number of the unit assigned or a negative errno code. - */ - -static int __dev_alloc_name(struct net *net, const char *name, char *buf) -{ - int i = 0; - const char *p; - const int max_netdevices = 8*PAGE_SIZE; - unsigned long *inuse; - struct net_device *d; - - p = strnchr(name, IFNAMSIZ-1, '%'); - if (p) { - /* - * Verify the string as this thing may have come from - * the user. There must be either one "%d" and no other "%" - * characters. - */ - if (p[1] != 'd' || strchr(p + 2, '%')) - return -EINVAL; - - /* Use one page as a bit array of possible slots */ - inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC); - if (!inuse) - return -ENOMEM; - - for_each_netdev(net, d) { - if (!sscanf(d->name, name, &i)) - continue; - if (i < 0 || i >= max_netdevices) - continue; - - /* avoid cases where sscanf is not exact inverse of printf */ - snprintf(buf, IFNAMSIZ, name, i); - if (!strncmp(buf, d->name, IFNAMSIZ)) - set_bit(i, inuse); - } - - i = find_first_zero_bit(inuse, max_netdevices); - free_page((unsigned long) inuse); - } - - snprintf(buf, IFNAMSIZ, name, i); - if (!__dev_get_by_name(net, buf)) - return i; - - /* It is possible to run out of possible slots - * when the name is long and there isn't enough space left - * for the digits, or if all bits are used. - */ - return -ENFILE; -} - -/** - * dev_alloc_name - allocate a name for a device - * @dev: device - * @name: name format string - * - * Passed a format string - eg "lt%d" it will try and find a suitable - * id. It scans list of devices to build up a free map, then chooses - * the first empty slot. The caller must hold the dev_base or rtnl lock - * while allocating the name and adding the device in order to avoid - * duplicates. - * Limited to bits_per_byte * page size devices (ie 32K on most platforms). - * Returns the number of the unit assigned or a negative errno code. - */ - -int dev_alloc_name(struct net_device *dev, const char *name) -{ - char buf[IFNAMSIZ]; - struct net *net; - int ret; - - BUG_ON(!dev_net(dev)); - net = dev_net(dev); - ret = __dev_alloc_name(net, name, buf); - if (ret >= 0) - strlcpy(dev->name, buf, IFNAMSIZ); - return ret; -} - - -/** - * dev_change_name - change name of a device - * @dev: device - * @newname: name (or format string) must be at least IFNAMSIZ - * - * Change name of a device, can pass format strings "eth%d". - * for wildcarding. - */ -int dev_change_name(struct net_device *dev, const char *newname) -{ - char oldname[IFNAMSIZ]; - int err = 0; - int ret; - struct net *net; - - ASSERT_RTNL(); - BUG_ON(!dev_net(dev)); - - net = dev_net(dev); - if (dev->flags & IFF_UP) - return -EBUSY; - - if (!dev_valid_name(newname)) - return -EINVAL; - - if (strncmp(newname, dev->name, IFNAMSIZ) == 0) - return 0; - - memcpy(oldname, dev->name, IFNAMSIZ); - - if (strchr(newname, '%')) { - err = dev_alloc_name(dev, newname); - if (err < 0) - return err; - } - else if (__dev_get_by_name(net, newname)) - return -EEXIST; - else - strlcpy(dev->name, newname, IFNAMSIZ); - -rollback: - /* For now only devices in the initial network namespace - * are in sysfs. - */ - if (net == &init_net) { - ret = device_rename(&dev->dev, dev->name); - if (ret) { - memcpy(dev->name, oldname, IFNAMSIZ); - return ret; - } - } - - write_lock_bh(&dev_base_lock); - hlist_del(&dev->name_hlist); - hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name)); - write_unlock_bh(&dev_base_lock); - - ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev); - ret = notifier_to_errno(ret); - - if (ret) { - if (err) { - printk(KERN_ERR - "%s: name change rollback failed: %d.\n", - dev->name, ret); - } else { - err = ret; - memcpy(dev->name, oldname, IFNAMSIZ); - goto rollback; - } - } - - return err; -} - -/** - * dev_set_alias - change ifalias of a device - * @dev: device - * @alias: name up to IFALIASZ - * @len: limit of bytes to copy from info - * - * Set ifalias for a device, - */ -int dev_set_alias(struct net_device *dev, const char *alias, size_t len) -{ - ASSERT_RTNL(); - - if (len >= IFALIASZ) - return -EINVAL; - - if (!len) { - if (dev->ifalias) { - kfree(dev->ifalias); - dev->ifalias = NULL; - } - return 0; - } - - dev->ifalias = krealloc(dev->ifalias, len+1, GFP_KERNEL); - if (!dev->ifalias) - return -ENOMEM; - - strlcpy(dev->ifalias, alias, len+1); - return len; -} - - -/** - * netdev_features_change - device changes features - * @dev: device to cause notification - * - * Called to indicate a device has changed features. - */ -void netdev_features_change(struct net_device *dev) -{ - call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev); -} -EXPORT_SYMBOL(netdev_features_change); - -/** - * netdev_state_change - device changes state - * @dev: device to cause notification - * - * Called to indicate a device has changed state. This function calls - * the notifier chains for netdev_chain and sends a NEWLINK message - * to the routing socket. - */ -void netdev_state_change(struct net_device *dev) -{ - if (dev->flags & IFF_UP) { - call_netdevice_notifiers(NETDEV_CHANGE, dev); - rtmsg_ifinfo(RTM_NEWLINK, dev, 0); - } -} - -void netdev_bonding_change(struct net_device *dev) -{ - call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, dev); -} -EXPORT_SYMBOL(netdev_bonding_change); - -/** - * dev_load - load a network module - * @net: the applicable net namespace - * @name: name of interface - * - * If a network interface is not present and the process has suitable - * privileges this function loads the module. If module loading is not - * available in this kernel then it becomes a nop. - */ - -void dev_load(struct net *net, const char *name) -{ - struct net_device *dev; - - read_lock(&dev_base_lock); - dev = __dev_get_by_name(net, name); - read_unlock(&dev_base_lock); - - if (!dev && capable(CAP_SYS_MODULE)) - request_module("%s", name); -} - -/** - * dev_open - prepare an interface for use. - * @dev: device to open - * - * Takes a device from down to up state. The device's private open - * function is invoked and then the multicast lists are loaded. Finally - * the device is moved into the up state and a %NETDEV_UP message is - * sent to the netdev notifier chain. - * - * Calling this function on an active interface is a nop. On a failure - * a negative errno code is returned. - */ -int dev_open(struct net_device *dev) -{ - const struct net_device_ops *ops = dev->netdev_ops; - int ret = 0; - - ASSERT_RTNL(); - - /* - * Is it already up? - */ - - if (dev->flags & IFF_UP) - return 0; - - /* - * Is it even present? - */ - if (!netif_device_present(dev)) - return -ENODEV; - - /* - * Call device private open method - */ - set_bit(__LINK_STATE_START, &dev->state); - - if (ops->ndo_validate_addr) - ret = ops->ndo_validate_addr(dev); - - if (!ret && ops->ndo_open) - ret = ops->ndo_open(dev); - - /* - * If it went open OK then: - */ - - if (ret) - clear_bit(__LINK_STATE_START, &dev->state); - else { - /* - * Set the flags. - */ - dev->flags |= IFF_UP; - - /* - * Enable NET_DMA - */ - net_dmaengine_get(); - - /* - * Initialize multicasting status - */ - dev_set_rx_mode(dev); - - /* - * Wakeup transmit queue engine - */ - dev_activate(dev); - - /* - * ... and announce new interface. - */ - call_netdevice_notifiers(NETDEV_UP, dev); - } - - return ret; -} - -/** - * dev_close - shutdown an interface. - * @dev: device to shutdown - * - * This function moves an active device into down state. A - * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device - * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier - * chain. - */ -int dev_close(struct net_device *dev) -{ - const struct net_device_ops *ops = dev->netdev_ops; - ASSERT_RTNL(); - - might_sleep(); - - if (!(dev->flags & IFF_UP)) - return 0; - - /* - * Tell people we are going down, so that they can - * prepare to death, when device is still operating. - */ - call_netdevice_notifiers(NETDEV_GOING_DOWN, dev); - - clear_bit(__LINK_STATE_START, &dev->state); - - /* Synchronize to scheduled poll. We cannot touch poll list, - * it can be even on different cpu. So just clear netif_running(). - * - * dev->stop() will invoke napi_disable() on all of it's - * napi_struct instances on this device. - */ - smp_mb__after_clear_bit(); /* Commit netif_running(). */ - - dev_deactivate(dev); - - /* - * Call the device specific close. This cannot fail. - * Only if device is UP - * - * We allow it to be called even after a DETACH hot-plug - * event. - */ - if (ops->ndo_stop) - ops->ndo_stop(dev); - - /* - * Device is now down. - */ - - dev->flags &= ~IFF_UP; - - /* - * Tell people we are down - */ - call_netdevice_notifiers(NETDEV_DOWN, dev); - - /* - * Shutdown NET_DMA - */ - net_dmaengine_put(); - - return 0; -} - - -/** - * dev_disable_lro - disable Large Receive Offload on a device - * @dev: device - * - * Disable Large Receive Offload (LRO) on a net device. Must be - * called under RTNL. This is needed if received packets may be - * forwarded to another interface. - */ -void dev_disable_lro(struct net_device *dev) -{ - if (dev->ethtool_ops && dev->ethtool_ops->get_flags && - dev->ethtool_ops->set_flags) { - u32 flags = dev->ethtool_ops->get_flags(dev); - if (flags & ETH_FLAG_LRO) { - flags &= ~ETH_FLAG_LRO; - dev->ethtool_ops->set_flags(dev, flags); - } - } - WARN_ON(dev->features & NETIF_F_LRO); -} -EXPORT_SYMBOL(dev_disable_lro); - - -static int dev_boot_phase = 1; - -/* - * Device change register/unregister. These are not inline or static - * as we export them to the world. - */ - -/** - * register_netdevice_notifier - register a network notifier block - * @nb: notifier - * - * Register a notifier to be called when network device events occur. - * The notifier passed is linked into the kernel structures and must - * not be reused until it has been unregistered. A negative errno code - * is returned on a failure. - * - * When registered all registration and up events are replayed - * to the new notifier to allow device to have a race free - * view of the network device list. - */ - -int register_netdevice_notifier(struct notifier_block *nb) -{ - struct net_device *dev; - struct net_device *last; - struct net *net; - int err; - - rtnl_lock(); - err = raw_notifier_chain_register(&netdev_chain, nb); - if (err) - goto unlock; - if (dev_boot_phase) - goto unlock; - for_each_net(net) { - for_each_netdev(net, dev) { - err = nb->notifier_call(nb, NETDEV_REGISTER, dev); - err = notifier_to_errno(err); - if (err) - goto rollback; - - if (!(dev->flags & IFF_UP)) - continue; - - nb->notifier_call(nb, NETDEV_UP, dev); - } - } - -unlock: - rtnl_unlock(); - return err; - -rollback: - last = dev; - for_each_net(net) { - for_each_netdev(net, dev) { - if (dev == last) - break; - - if (dev->flags & IFF_UP) { - nb->notifier_call(nb, NETDEV_GOING_DOWN, dev); - nb->notifier_call(nb, NETDEV_DOWN, dev); - } - nb->notifier_call(nb, NETDEV_UNREGISTER, dev); - } - } - - raw_notifier_chain_unregister(&netdev_chain, nb); - goto unlock; -} - -/** - * unregister_netdevice_notifier - unregister a network notifier block - * @nb: notifier - * - * Unregister a notifier previously registered by - * register_netdevice_notifier(). The notifier is unlinked into the - * kernel structures and may then be reused. A negative errno code - * is returned on a failure. - */ - -int unregister_netdevice_notifier(struct notifier_block *nb) -{ - int err; - - rtnl_lock(); - err = raw_notifier_chain_unregister(&netdev_chain, nb); - rtnl_unlock(); - return err; -} - -/** - * call_netdevice_notifiers - call all network notifier blocks - * @val: value passed unmodified to notifier function - * @dev: net_device pointer passed unmodified to notifier function - * - * Call all network notifier blocks. Parameters and return value - * are as for raw_notifier_call_chain(). - */ - -int call_netdevice_notifiers(unsigned long val, struct net_device *dev) -{ - return raw_notifier_call_chain(&netdev_chain, val, dev); -} - -/* When > 0 there are consumers of rx skb time stamps */ -static atomic_t netstamp_needed = ATOMIC_INIT(0); - -void net_enable_timestamp(void) -{ - atomic_inc(&netstamp_needed); -} - -void net_disable_timestamp(void) -{ - atomic_dec(&netstamp_needed); -} - -static inline void net_timestamp(struct sk_buff *skb) -{ - if (atomic_read(&netstamp_needed)) - __net_timestamp(skb); - else - skb->tstamp.tv64 = 0; -} - -/* - * Support routine. Sends outgoing frames to any network - * taps currently in use. - */ - -static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) -{ - struct packet_type *ptype; - - net_timestamp(skb); - - rcu_read_lock(); - list_for_each_entry_rcu(ptype, &ptype_all, list) { - /* Never send packets back to the socket - * they originated from - MvS (miquels@drinkel.ow.org) - */ - if ((ptype->dev == dev || !ptype->dev) && - (ptype->af_packet_priv == NULL || - (struct sock *)ptype->af_packet_priv != skb->sk)) { - struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC); - if (!skb2) - break; - - /* skb->nh should be correctly - set by sender, so that the second statement is - just protection against buggy protocols. - */ - skb_reset_mac_header(skb2); - - if (skb_network_header(skb2) < skb2->data || - skb2->network_header > skb2->tail) { - if (net_ratelimit()) - printk(KERN_CRIT "protocol %04x is " - "buggy, dev %s\n", - skb2->protocol, dev->name); - skb_reset_network_header(skb2); - } - - skb2->transport_header = skb2->network_header; - skb2->pkt_type = PACKET_OUTGOING; - ptype->func(skb2, skb->dev, ptype, skb->dev); - } - } - rcu_read_unlock(); -} - - -static inline void __netif_reschedule(struct Qdisc *q) -{ - struct softnet_data *sd; - unsigned long flags; - - local_irq_save(flags); - sd = &__get_cpu_var(softnet_data); - q->next_sched = sd->output_queue; - sd->output_queue = q; - raise_softirq_irqoff(NET_TX_SOFTIRQ); - local_irq_restore(flags); -} - -void __netif_schedule(struct Qdisc *q) -{ - if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state)) - __netif_reschedule(q); -} -EXPORT_SYMBOL(__netif_schedule); - -void dev_kfree_skb_irq(struct sk_buff *skb) -{ - if (atomic_dec_and_test(&skb->users)) { - struct softnet_data *sd; - unsigned long flags; - - local_irq_save(flags); - sd = &__get_cpu_var(softnet_data); - skb->next = sd->completion_queue; - sd->completion_queue = skb; - raise_softirq_irqoff(NET_TX_SOFTIRQ); - local_irq_restore(flags); - } -} -EXPORT_SYMBOL(dev_kfree_skb_irq); - -void dev_kfree_skb_any(struct sk_buff *skb) -{ - if (in_irq() || irqs_disabled()) - dev_kfree_skb_irq(skb); - else - dev_kfree_skb(skb); -} -EXPORT_SYMBOL(dev_kfree_skb_any); - - -/** - * netif_device_detach - mark device as removed - * @dev: network device - * - * Mark device as removed from system and therefore no longer available. - */ -void netif_device_detach(struct net_device *dev) -{ - if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) && - netif_running(dev)) { - netif_stop_queue(dev); - } -} -EXPORT_SYMBOL(netif_device_detach); - -/** - * netif_device_attach - mark device as attached - * @dev: network device - * - * Mark device as attached from system and restart if needed. - */ -void netif_device_attach(struct net_device *dev) -{ - if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) && - netif_running(dev)) { - netif_wake_queue(dev); - __netdev_watchdog_up(dev); - } -} -EXPORT_SYMBOL(netif_device_attach); - -static bool can_checksum_protocol(unsigned long features, __be16 protocol) -{ - return ((features & NETIF_F_GEN_CSUM) || - ((features & NETIF_F_IP_CSUM) && - protocol == htons(ETH_P_IP)) || - ((features & NETIF_F_IPV6_CSUM) && - protocol == htons(ETH_P_IPV6))); -} - -static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb) -{ - if (can_checksum_protocol(dev->features, skb->protocol)) - return true; - - if (skb->protocol == htons(ETH_P_8021Q)) { - struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; - if (can_checksum_protocol(dev->features & dev->vlan_features, - veh->h_vlan_encapsulated_proto)) - return true; - } - - return false; -} - -/* - * Invalidate hardware checksum when packet is to be mangled, and - * complete checksum manually on outgoing path. - */ -int skb_checksum_help(struct sk_buff *skb) -{ - __wsum csum; - int ret = 0, offset; - - if (skb->ip_summed == CHECKSUM_COMPLETE) - goto out_set_summed; - - if (unlikely(skb_shinfo(skb)->gso_size)) { - /* Let GSO fix up the checksum. */ - goto out_set_summed; - } - - offset = skb->csum_start - skb_headroom(skb); - BUG_ON(offset >= skb_headlen(skb)); - csum = skb_checksum(skb, offset, skb->len - offset, 0); - - offset += skb->csum_offset; - BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb)); - - if (skb_cloned(skb) && - !skb_clone_writable(skb, offset + sizeof(__sum16))) { - ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); - if (ret) - goto out; - } - - *(__sum16 *)(skb->data + offset) = csum_fold(csum); -out_set_summed: - skb->ip_summed = CHECKSUM_NONE; -out: - return ret; -} - -/** - * skb_gso_segment - Perform segmentation on skb. - * @skb: buffer to segment - * @features: features for the output path (see dev->features) - * - * This function segments the given skb and returns a list of segments. - * - * It may return NULL if the skb requires no segmentation. This is - * only possible when GSO is used for verifying header integrity. - */ -struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) -{ - struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); - struct packet_type *ptype; - __be16 type = skb->protocol; - int err; - - skb_reset_mac_header(skb); - skb->mac_len = skb->network_header - skb->mac_header; - __skb_pull(skb, skb->mac_len); - - if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { - struct net_device *dev = skb->dev; - struct ethtool_drvinfo info = {}; - - if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo) - dev->ethtool_ops->get_drvinfo(dev, &info); - - WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d " - "ip_summed=%d", - info.driver, dev ? dev->features : 0L, - skb->sk ? skb->sk->sk_route_caps : 0L, - skb->len, skb->data_len, skb->ip_summed); - - if (skb_header_cloned(skb) && - (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) - return ERR_PTR(err); - } - - rcu_read_lock(); - list_for_each_entry_rcu(ptype, - &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { - if (ptype->type == type && !ptype->dev && ptype->gso_segment) { - if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { - err = ptype->gso_send_check(skb); - segs = ERR_PTR(err); - if (err || skb_gso_ok(skb, features)) - break; - __skb_push(skb, (skb->data - - skb_network_header(skb))); - } - segs = ptype->gso_segment(skb, features); - break; - } - } - rcu_read_unlock(); - - __skb_push(skb, skb->data - skb_mac_header(skb)); - - return segs; -} - -EXPORT_SYMBOL(skb_gso_segment); - -/* Take action when hardware reception checksum errors are detected. */ -#ifdef CONFIG_BUG -void netdev_rx_csum_fault(struct net_device *dev) -{ - if (net_ratelimit()) { - printk(KERN_ERR "%s: hw csum failure.\n", - dev ? dev->name : "<unknown>"); - dump_stack(); - } -} -EXPORT_SYMBOL(netdev_rx_csum_fault); -#endif - -/* Actually, we should eliminate this check as soon as we know, that: - * 1. IOMMU is present and allows to map all the memory. - * 2. No high memory really exists on this machine. - */ - -static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb) -{ -#ifdef CONFIG_HIGHMEM - int i; - - if (dev->features & NETIF_F_HIGHDMA) - return 0; - - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) - if (PageHighMem(skb_shinfo(skb)->frags[i].page)) - return 1; - -#endif - return 0; -} - -struct dev_gso_cb { - void (*destructor)(struct sk_buff *skb); -}; - -#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb) - -static void dev_gso_skb_destructor(struct sk_buff *skb) -{ - struct dev_gso_cb *cb; - - do { - struct sk_buff *nskb = skb->next; - - skb->next = nskb->next; - nskb->next = NULL; - kfree_skb(nskb); - } while (skb->next); - - cb = DEV_GSO_CB(skb); - if (cb->destructor) - cb->destructor(skb); -} - -/** - * dev_gso_segment - Perform emulated hardware segmentation on skb. - * @skb: buffer to segment - * - * This function segments the given skb and stores the list of segments - * in skb->next. - */ -static int dev_gso_segment(struct sk_buff *skb) -{ - struct net_device *dev = skb->dev; - struct sk_buff *segs; - int features = dev->features & ~(illegal_highdma(dev, skb) ? - NETIF_F_SG : 0); - - segs = skb_gso_segment(skb, features); - - /* Verifying header integrity only. */ - if (!segs) - return 0; - - if (IS_ERR(segs)) - return PTR_ERR(segs); - - skb->next = segs; - DEV_GSO_CB(skb)->destructor = skb->destructor; - skb->destructor = dev_gso_skb_destructor; - - return 0; -} - -int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, - struct netdev_queue *txq) -{ - const struct net_device_ops *ops = dev->netdev_ops; - - prefetch(&dev->netdev_ops->ndo_start_xmit); - if (likely(!skb->next)) { - if (!list_empty(&ptype_all)) - dev_queue_xmit_nit(skb, dev); - - if (netif_needs_gso(dev, skb)) { - if (unlikely(dev_gso_segment(skb))) - goto out_kfree_skb; - if (skb->next) - goto gso; - } - - return ops->ndo_start_xmit(skb, dev); - } - -gso: - do { - struct sk_buff *nskb = skb->next; - int rc; - - skb->next = nskb->next; - nskb->next = NULL; - rc = ops->ndo_start_xmit(nskb, dev); - if (unlikely(rc)) { - nskb->next = skb->next; - skb->next = nskb; - return rc; - } - if (unlikely(netif_tx_queue_stopped(txq) && skb->next)) - return NETDEV_TX_BUSY; - } while (skb->next); - - skb->destructor = DEV_GSO_CB(skb)->destructor; - -out_kfree_skb: - kfree_skb(skb); - return 0; -} - -static u32 simple_tx_hashrnd; -static int simple_tx_hashrnd_initialized = 0; - -static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb) -{ - u32 addr1, addr2, ports; - u32 hash, ihl; - u8 ip_proto = 0; - - if (unlikely(!simple_tx_hashrnd_initialized)) { - get_random_bytes(&simple_tx_hashrnd, 4); - simple_tx_hashrnd_initialized = 1; - } - - switch (skb->protocol) { - case htons(ETH_P_IP): - if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET))) - ip_proto = ip_hdr(skb)->protocol; - addr1 = ip_hdr(skb)->saddr; - addr2 = ip_hdr(skb)->daddr; - ihl = ip_hdr(skb)->ihl; - break; - case htons(ETH_P_IPV6): - ip_proto = ipv6_hdr(skb)->nexthdr; - addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3]; - addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3]; - ihl = (40 >> 2); - break; - default: - return 0; - } - - - switch (ip_proto) { - case IPPROTO_TCP: - case IPPROTO_UDP: - case IPPROTO_DCCP: - case IPPROTO_ESP: - case IPPROTO_AH: - case IPPROTO_SCTP: - case IPPROTO_UDPLITE: - ports = *((u32 *) (skb_network_header(skb) + (ihl * 4))); - break; - - default: - ports = 0; - break; - } - - hash = jhash_3words(addr1, addr2, ports, simple_tx_hashrnd); - - return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); -} - -static struct netdev_queue *dev_pick_tx(struct net_device *dev, - struct sk_buff *skb) -{ - const struct net_device_ops *ops = dev->netdev_ops; - u16 queue_index = 0; - - if (ops->ndo_select_queue) - queue_index = ops->ndo_select_queue(dev, skb); - - skb_set_queue_mapping(skb, queue_index); - return netdev_get_tx_queue(dev, queue_index); -} - -/** - * dev_queue_xmit - transmit a buffer - * @skb: buffer to transmit - * - * Queue a buffer for transmission to a network device. The caller must - * have set the device and priority and built the buffer before calling - * this function. The function can be called from an interrupt. - * - * A negative errno code is returned on a failure. A success does not - * guarantee the frame will be transmitted as it may be dropped due - * to congestion or traffic shaping. - * - * ----------------------------------------------------------------------------------- - * I notice this method can also return errors from the queue disciplines, - * including NET_XMIT_DROP, which is a positive value. So, errors can also - * be positive. - * - * Regardless of the return value, the skb is consumed, so it is currently - * difficult to retry a send to this method. (You can bump the ref count - * before sending to hold a reference for retry if you are careful.) - * - * When calling this method, interrupts MUST be enabled. This is because - * the BH enable code must have IRQs enabled so that it will not deadlock. - * --BLG - */ -int dev_queue_xmit(struct sk_buff *skb) -{ - struct net_device *dev = skb->dev; - struct netdev_queue *txq; - int rc = -ENOMEM; - - /* GSO will handle the following emulations directly. */ - if (netif_needs_gso(dev, skb)) - goto gso; - - if (skb_shinfo(skb)->frag_list && - !(dev->features & NETIF_F_FRAGLIST) && - __skb_linearize(skb)) - goto out_kfree_skb; - - /* Fragmented skb is linearized if device does not support SG, - * or if at least one of fragments is in highmem and device - * does not support DMA from it. - */ - if (skb_shinfo(skb)->nr_frags && - (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) && - __skb_linearize(skb)) - goto out_kfree_skb; - - /* If packet is not checksummed and device does not support - * checksumming for this protocol, complete checksumming here. - */ - if (skb->ip_summed == CHECKSUM_PARTIAL) { - skb_set_transport_header(skb, skb->csum_start - - skb_headroom(skb)); - if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb)) - goto out_kfree_skb; - } - -gso: - /* Disable soft irqs for various locks below. Also - * stops preemption for RCU. - */ - rcu_read_lock_bh(); - - txq = dev_pick_tx(dev, skb); - -#ifdef CONFIG_NET_CLS_ACT - skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS); -#endif - - /* The device has no queue. Common case for software devices: - loopback, all the sorts of tunnels... - - Really, it is unlikely that netif_tx_lock protection is necessary - here. (f.e. loopback and IP tunnels are clean ignoring statistics - counters.) - However, it is possible, that they rely on protection - made by us here. - - Check this and shot the lock. It is not prone from deadlocks. - Either shot noqueue qdisc, it is even simpler 8) - */ - if (dev->flags & IFF_UP) { - int cpu = smp_processor_id(); /* ok because BHs are off */ - - if (txq->xmit_lock_owner != cpu) { - - HARD_TX_LOCK(dev, txq, cpu); - - if (!netif_tx_queue_stopped(txq)) { - rc = 0; - if (!dev_hard_start_xmit(skb, dev, txq)) { - HARD_TX_UNLOCK(dev, txq); - goto out; - } - } - HARD_TX_UNLOCK(dev, txq); - if (net_ratelimit()) - printk(KERN_CRIT "Virtual device %s asks to " - "queue packet!\n", dev->name); - } else { - /* Recursion is detected! It is possible, - * unfortunately */ - if (net_ratelimit()) - printk(KERN_CRIT "Dead loop on virtual device " - "%s, fix it urgently!\n", dev->name); - } - } - - rc = -ENETDOWN; - rcu_read_unlock_bh(); - -out_kfree_skb: - kfree_skb(skb); - return rc; -out: - rcu_read_unlock_bh(); - return rc; -} - - -/*======================================================================= - Receiver routines - =======================================================================*/ - -int netdev_max_backlog __read_mostly = 1000; -int netdev_budget __read_mostly = 300; -int weight_p __read_mostly = 64; /* old backlog weight */ - -DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, }; - - -/** - * netif_rx - post buffer to the network code - * @skb: buffer to post - * - * This function receives a packet from a device driver and queues it for - * the upper (protocol) levels to process. It always succeeds. The buffer - * may be dropped during processing for congestion control or by the - * protocol layers. - * - * return values: - * NET_RX_SUCCESS (no congestion) - * NET_RX_DROP (packet was dropped) - * - */ - -int netif_rx(struct sk_buff *skb) -{ -#ifndef DDE_LINUX - struct softnet_data *queue; - unsigned long flags; - - /* if netpoll wants it, pretend we never saw it */ - if (netpoll_rx(skb)) - return NET_RX_DROP; - - if (!skb->tstamp.tv64) - net_timestamp(skb); - - /* - * The code is rearranged so that the path is the most - * short when CPU is congested, but is still operating. - */ - local_irq_save(flags); - queue = &__get_cpu_var(softnet_data); - - __get_cpu_var(netdev_rx_stat).total++; - if (queue->input_pkt_queue.qlen <= netdev_max_backlog) { - if (queue->input_pkt_queue.qlen) { -enqueue: - dev_hold(skb->dev); - __skb_queue_tail(&queue->input_pkt_queue, skb); - local_irq_restore(flags); - return NET_RX_SUCCESS; - } - - napi_schedule(&queue->backlog); - goto enqueue; - } - - __get_cpu_var(netdev_rx_stat).dropped++; - local_irq_restore(flags); - - kfree_skb(skb); - return NET_RX_DROP; -#else /* DDE_LINUX */ - /* call our callback fn */ - return l4dde26_do_rx_callback(skb); -#endif -} - -int netif_rx_ni(struct sk_buff *skb) -{ - int err; - - preempt_disable(); - err = netif_rx(skb); - if (local_softirq_pending()) - do_softirq(); - preempt_enable(); - - return err; -} - -EXPORT_SYMBOL(netif_rx_ni); - -static void net_tx_action(struct softirq_action *h) -{ - struct softnet_data *sd = &__get_cpu_var(softnet_data); - - if (sd->completion_queue) { - struct sk_buff *clist; - - local_irq_disable(); - clist = sd->completion_queue; - sd->completion_queue = NULL; - local_irq_enable(); - - while (clist) { - struct sk_buff *skb = clist; - clist = clist->next; - - WARN_ON(atomic_read(&skb->users)); - __kfree_skb(skb); - } - } - - if (sd->output_queue) { - struct Qdisc *head; - - local_irq_disable(); - head = sd->output_queue; - sd->output_queue = NULL; - local_irq_enable(); - - while (head) { - struct Qdisc *q = head; - spinlock_t *root_lock; - - head = head->next_sched; - - root_lock = qdisc_lock(q); - if (spin_trylock(root_lock)) { - smp_mb__before_clear_bit(); - clear_bit(__QDISC_STATE_SCHED, - &q->state); - qdisc_run(q); - spin_unlock(root_lock); - } else { - if (!test_bit(__QDISC_STATE_DEACTIVATED, - &q->state)) { - __netif_reschedule(q); - } else { - smp_mb__before_clear_bit(); - clear_bit(__QDISC_STATE_SCHED, - &q->state); - } - } - } - } -} - -static inline int deliver_skb(struct sk_buff *skb, - struct packet_type *pt_prev, - struct net_device *orig_dev) -{ - atomic_inc(&skb->users); - return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); -} - -#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE) -/* These hooks defined here for ATM */ -struct net_bridge; -struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br, - unsigned char *addr); -void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly; - -/* - * If bridge module is loaded call bridging hook. - * returns NULL if packet was consumed. - */ -struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p, - struct sk_buff *skb) __read_mostly; -static inline struct sk_buff *handle_bridge(struct sk_buff *skb, - struct packet_type **pt_prev, int *ret, - struct net_device *orig_dev) -{ - struct net_bridge_port *port; - - if (skb->pkt_type == PACKET_LOOPBACK || - (port = rcu_dereference(skb->dev->br_port)) == NULL) - return skb; - - if (*pt_prev) { - *ret = deliver_skb(skb, *pt_prev, orig_dev); - *pt_prev = NULL; - } - - return br_handle_frame_hook(port, skb); -} -#else -#define handle_bridge(skb, pt_prev, ret, orig_dev) (skb) -#endif - -#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE) -struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly; -EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook); - -static inline struct sk_buff *handle_macvlan(struct sk_buff *skb, - struct packet_type **pt_prev, - int *ret, - struct net_device *orig_dev) -{ - if (skb->dev->macvlan_port == NULL) - return skb; - - if (*pt_prev) { - *ret = deliver_skb(skb, *pt_prev, orig_dev); - *pt_prev = NULL; - } - return macvlan_handle_frame_hook(skb); -} -#else -#define handle_macvlan(skb, pt_prev, ret, orig_dev) (skb) -#endif - -#ifdef CONFIG_NET_CLS_ACT -/* TODO: Maybe we should just force sch_ingress to be compiled in - * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions - * a compare and 2 stores extra right now if we dont have it on - * but have CONFIG_NET_CLS_ACT - * NOTE: This doesnt stop any functionality; if you dont have - * the ingress scheduler, you just cant add policies on ingress. - * - */ -static int ing_filter(struct sk_buff *skb) -{ - struct net_device *dev = skb->dev; - u32 ttl = G_TC_RTTL(skb->tc_verd); - struct netdev_queue *rxq; - int result = TC_ACT_OK; - struct Qdisc *q; - - if (MAX_RED_LOOP < ttl++) { - printk(KERN_WARNING - "Redir loop detected Dropping packet (%d->%d)\n", - skb->iif, dev->ifindex); - return TC_ACT_SHOT; - } - - skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl); - skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS); - - rxq = &dev->rx_queue; - - q = rxq->qdisc; - if (q != &noop_qdisc) { - spin_lock(qdisc_lock(q)); - if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) - result = qdisc_enqueue_root(skb, q); - spin_unlock(qdisc_lock(q)); - } - - return result; -} - -static inline struct sk_buff *handle_ing(struct sk_buff *skb, - struct packet_type **pt_prev, - int *ret, struct net_device *orig_dev) -{ - if (skb->dev->rx_queue.qdisc == &noop_qdisc) - goto out; - - if (*pt_prev) { - *ret = deliver_skb(skb, *pt_prev, orig_dev); - *pt_prev = NULL; - } else { - /* Huh? Why does turning on AF_PACKET affect this? */ - skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); - } - - switch (ing_filter(skb)) { - case TC_ACT_SHOT: - case TC_ACT_STOLEN: - kfree_skb(skb); - return NULL; - } - -out: - skb->tc_verd = 0; - return skb; -} -#endif - -/* - * netif_nit_deliver - deliver received packets to network taps - * @skb: buffer - * - * This function is used to deliver incoming packets to network - * taps. It should be used when the normal netif_receive_skb path - * is bypassed, for example because of VLAN acceleration. - */ -void netif_nit_deliver(struct sk_buff *skb) -{ - struct packet_type *ptype; - - if (list_empty(&ptype_all)) - return; - - skb_reset_network_header(skb); - skb_reset_transport_header(skb); - skb->mac_len = skb->network_header - skb->mac_header; - - rcu_read_lock(); - list_for_each_entry_rcu(ptype, &ptype_all, list) { - if (!ptype->dev || ptype->dev == skb->dev) - deliver_skb(skb, ptype, skb->dev); - } - rcu_read_unlock(); -} - -/** - * netif_receive_skb - process receive buffer from network - * @skb: buffer to process - * - * netif_receive_skb() is the main receive data processing function. - * It always succeeds. The buffer may be dropped during processing - * for congestion control or by the protocol layers. - * - * This function may only be called from softirq context and interrupts - * should be enabled. - * - * Return values (usually ignored): - * NET_RX_SUCCESS: no congestion - * NET_RX_DROP: packet was dropped - */ -int netif_receive_skb(struct sk_buff *skb) -{ -#ifndef DDE_LINUX - struct packet_type *ptype, *pt_prev; - struct net_device *orig_dev; - struct net_device *null_or_orig; - int ret = NET_RX_DROP; - __be16 type; - - if (skb->vlan_tci && vlan_hwaccel_do_receive(skb)) - return NET_RX_SUCCESS; - - /* if we've gotten here through NAPI, check netpoll */ - if (netpoll_receive_skb(skb)) - return NET_RX_DROP; - - if (!skb->tstamp.tv64) - net_timestamp(skb); - - if (!skb->iif) - skb->iif = skb->dev->ifindex; - - null_or_orig = NULL; - orig_dev = skb->dev; - if (orig_dev->master) { - if (skb_bond_should_drop(skb)) - null_or_orig = orig_dev; /* deliver only exact match */ - else - skb->dev = orig_dev->master; - } - - __get_cpu_var(netdev_rx_stat).total++; - - skb_reset_network_header(skb); - skb_reset_transport_header(skb); - skb->mac_len = skb->network_header - skb->mac_header; - - pt_prev = NULL; - - rcu_read_lock(); - -#ifdef CONFIG_NET_CLS_ACT - if (skb->tc_verd & TC_NCLS) { - skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); - goto ncls; - } -#endif - - list_for_each_entry_rcu(ptype, &ptype_all, list) { - if (ptype->dev == null_or_orig || ptype->dev == skb->dev || - ptype->dev == orig_dev) { - if (pt_prev) - ret = deliver_skb(skb, pt_prev, orig_dev); - pt_prev = ptype; - } - } - -#ifdef CONFIG_NET_CLS_ACT - skb = handle_ing(skb, &pt_prev, &ret, orig_dev); - if (!skb) - goto out; -ncls: -#endif - - skb = handle_bridge(skb, &pt_prev, &ret, orig_dev); - if (!skb) - goto out; - skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev); - if (!skb) - goto out; - - type = skb->protocol; - list_for_each_entry_rcu(ptype, - &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { - if (ptype->type == type && - (ptype->dev == null_or_orig || ptype->dev == skb->dev || - ptype->dev == orig_dev)) { - if (pt_prev) - ret = deliver_skb(skb, pt_prev, orig_dev); - pt_prev = ptype; - } - } - - if (pt_prev) { - ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); - } else { - kfree_skb(skb); - /* Jamal, now you will not able to escape explaining - * me how you were going to use this. :-) - */ - ret = NET_RX_DROP; - } - -out: - rcu_read_unlock(); - return ret; -#else /* DDE_LINUX */ - /* call our callback fn */ - return l4dde26_do_rx_callback(skb); -#endif -} - - -/* Network device is going away, flush any packets still pending */ -static void flush_backlog(void *arg) -{ - struct net_device *dev = arg; - struct softnet_data *queue = &__get_cpu_var(softnet_data); - struct sk_buff *skb, *tmp; - - skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp) - if (skb->dev == dev) { - __skb_unlink(skb, &queue->input_pkt_queue); - kfree_skb(skb); - } -} - -static int napi_gro_complete(struct sk_buff *skb) -{ - struct packet_type *ptype; - __be16 type = skb->protocol; - struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; - int err = -ENOENT; - - if (NAPI_GRO_CB(skb)->count == 1) - goto out; - - rcu_read_lock(); - list_for_each_entry_rcu(ptype, head, list) { - if (ptype->type != type || ptype->dev || !ptype->gro_complete) - continue; - - err = ptype->gro_complete(skb); - break; - } - rcu_read_unlock(); - - if (err) { - WARN_ON(&ptype->list == head); - kfree_skb(skb); - return NET_RX_SUCCESS; - } - -out: - skb_shinfo(skb)->gso_size = 0; - __skb_push(skb, -skb_network_offset(skb)); - return netif_receive_skb(skb); -} - -void napi_gro_flush(struct napi_struct *napi) -{ - struct sk_buff *skb, *next; - - for (skb = napi->gro_list; skb; skb = next) { - next = skb->next; - skb->next = NULL; - napi_gro_complete(skb); - } - - napi->gro_list = NULL; -} -EXPORT_SYMBOL(napi_gro_flush); - -int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) -{ - struct sk_buff **pp = NULL; - struct packet_type *ptype; - __be16 type = skb->protocol; - struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; - int count = 0; - int same_flow; - int mac_len; - int free; - - if (!(skb->dev->features & NETIF_F_GRO)) - goto normal; - - if (skb_is_gso(skb) || skb_shinfo(skb)->frag_list) - goto normal; - - rcu_read_lock(); - list_for_each_entry_rcu(ptype, head, list) { - struct sk_buff *p; - - if (ptype->type != type || ptype->dev || !ptype->gro_receive) - continue; - - skb_reset_network_header(skb); - mac_len = skb->network_header - skb->mac_header; - skb->mac_len = mac_len; - NAPI_GRO_CB(skb)->same_flow = 0; - NAPI_GRO_CB(skb)->flush = 0; - NAPI_GRO_CB(skb)->free = 0; - - for (p = napi->gro_list; p; p = p->next) { - count++; - - if (!NAPI_GRO_CB(p)->same_flow) - continue; - - if (p->mac_len != mac_len || - memcmp(skb_mac_header(p), skb_mac_header(skb), - mac_len)) - NAPI_GRO_CB(p)->same_flow = 0; - } - - pp = ptype->gro_receive(&napi->gro_list, skb); - break; - } - rcu_read_unlock(); - - if (&ptype->list == head) - goto normal; - - same_flow = NAPI_GRO_CB(skb)->same_flow; - free = NAPI_GRO_CB(skb)->free; - - if (pp) { - struct sk_buff *nskb = *pp; - - *pp = nskb->next; - nskb->next = NULL; - napi_gro_complete(nskb); - count--; - } - - if (same_flow) - goto ok; - - if (NAPI_GRO_CB(skb)->flush || count >= MAX_GRO_SKBS) { - __skb_push(skb, -skb_network_offset(skb)); - goto normal; - } - - NAPI_GRO_CB(skb)->count = 1; - skb_shinfo(skb)->gso_size = skb->len; - skb->next = napi->gro_list; - napi->gro_list = skb; - -ok: - return free; - -normal: - return -1; -} -EXPORT_SYMBOL(dev_gro_receive); - -static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) -{ - struct sk_buff *p; - - for (p = napi->gro_list; p; p = p->next) { - NAPI_GRO_CB(p)->same_flow = 1; - NAPI_GRO_CB(p)->flush = 0; - } - - return dev_gro_receive(napi, skb); -} - -int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) -{ - if (netpoll_receive_skb(skb)) - return NET_RX_DROP; - - switch (__napi_gro_receive(napi, skb)) { - case -1: - return netif_receive_skb(skb); - - case 1: - kfree_skb(skb); - break; - } - - return NET_RX_SUCCESS; -} -EXPORT_SYMBOL(napi_gro_receive); - -void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) -{ - __skb_pull(skb, skb_headlen(skb)); - skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb)); - - napi->skb = skb; -} -EXPORT_SYMBOL(napi_reuse_skb); - -struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi, - struct napi_gro_fraginfo *info) -{ - struct net_device *dev = napi->dev; - struct sk_buff *skb = napi->skb; - - napi->skb = NULL; - - if (!skb) { - skb = netdev_alloc_skb(dev, GRO_MAX_HEAD + NET_IP_ALIGN); - if (!skb) - goto out; - - skb_reserve(skb, NET_IP_ALIGN); - } - - BUG_ON(info->nr_frags > MAX_SKB_FRAGS); - skb_shinfo(skb)->nr_frags = info->nr_frags; - memcpy(skb_shinfo(skb)->frags, info->frags, sizeof(info->frags)); - - skb->data_len = info->len; - skb->len += info->len; - skb->truesize += info->len; - - if (!pskb_may_pull(skb, ETH_HLEN)) { - napi_reuse_skb(napi, skb); - skb = NULL; - goto out; - } - - skb->protocol = eth_type_trans(skb, dev); - - skb->ip_summed = info->ip_summed; - skb->csum = info->csum; - -out: - return skb; -} -EXPORT_SYMBOL(napi_fraginfo_skb); - -int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info) -{ - struct sk_buff *skb = napi_fraginfo_skb(napi, info); - int err = NET_RX_DROP; - - if (!skb) - goto out; - - if (netpoll_receive_skb(skb)) - goto out; - - err = NET_RX_SUCCESS; - - switch (__napi_gro_receive(napi, skb)) { - case -1: - return netif_receive_skb(skb); - - case 0: - goto out; - } - - napi_reuse_skb(napi, skb); - -out: - return err; -} -EXPORT_SYMBOL(napi_gro_frags); - -static int process_backlog(struct napi_struct *napi, int quota) -{ - int work = 0; - struct softnet_data *queue = &__get_cpu_var(softnet_data); - unsigned long start_time = jiffies; - - napi->weight = weight_p; - do { - struct sk_buff *skb; - - local_irq_disable(); - skb = __skb_dequeue(&queue->input_pkt_queue); - if (!skb) { - local_irq_enable(); - napi_complete(napi); - goto out; - } - local_irq_enable(); - - napi_gro_receive(napi, skb); - } while (++work < quota && jiffies == start_time); - - napi_gro_flush(napi); - -out: - return work; -} - -/** - * __napi_schedule - schedule for receive - * @n: entry to schedule - * - * The entry's receive function will be scheduled to run - */ -void __napi_schedule(struct napi_struct *n) -{ - unsigned long flags; - - local_irq_save(flags); - list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list); - __raise_softirq_irqoff(NET_RX_SOFTIRQ); - local_irq_restore(flags); -} -EXPORT_SYMBOL(__napi_schedule); - -void __napi_complete(struct napi_struct *n) -{ - BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); - BUG_ON(n->gro_list); - - list_del(&n->poll_list); - smp_mb__before_clear_bit(); - clear_bit(NAPI_STATE_SCHED, &n->state); -} -EXPORT_SYMBOL(__napi_complete); - -void napi_complete(struct napi_struct *n) -{ - unsigned long flags; - - /* - * don't let napi dequeue from the cpu poll list - * just in case its running on a different cpu - */ - if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state))) - return; - - napi_gro_flush(n); - local_irq_save(flags); - __napi_complete(n); - local_irq_restore(flags); -} -EXPORT_SYMBOL(napi_complete); - -void netif_napi_add(struct net_device *dev, struct napi_struct *napi, - int (*poll)(struct napi_struct *, int), int weight) -{ - INIT_LIST_HEAD(&napi->poll_list); - napi->gro_list = NULL; - napi->skb = NULL; - napi->poll = poll; - napi->weight = weight; - list_add(&napi->dev_list, &dev->napi_list); - napi->dev = dev; -#ifdef CONFIG_NETPOLL - spin_lock_init(&napi->poll_lock); - napi->poll_owner = -1; -#endif - set_bit(NAPI_STATE_SCHED, &napi->state); -} -EXPORT_SYMBOL(netif_napi_add); - -void netif_napi_del(struct napi_struct *napi) -{ - struct sk_buff *skb, *next; - - list_del_init(&napi->dev_list); - kfree_skb(napi->skb); - - for (skb = napi->gro_list; skb; skb = next) { - next = skb->next; - skb->next = NULL; - kfree_skb(skb); - } - - napi->gro_list = NULL; -} -EXPORT_SYMBOL(netif_napi_del); - - -static void net_rx_action(struct softirq_action *h) -{ - struct list_head *list = &__get_cpu_var(softnet_data).poll_list; - unsigned long time_limit = jiffies + 2; - int budget = netdev_budget; - void *have; - - local_irq_disable(); - - while (!list_empty(list)) { - struct napi_struct *n; - int work, weight; - - /* If softirq window is exhuasted then punt. - * Allow this to run for 2 jiffies since which will allow - * an average latency of 1.5/HZ. - */ - if (unlikely(budget <= 0 || time_after(jiffies, time_limit))) - goto softnet_break; - - local_irq_enable(); - - /* Even though interrupts have been re-enabled, this - * access is safe because interrupts can only add new - * entries to the tail of this list, and only ->poll() - * calls can remove this head entry from the list. - */ - n = list_entry(list->next, struct napi_struct, poll_list); - - have = netpoll_poll_lock(n); - - weight = n->weight; - - /* This NAPI_STATE_SCHED test is for avoiding a race - * with netpoll's poll_napi(). Only the entity which - * obtains the lock and sees NAPI_STATE_SCHED set will - * actually make the ->poll() call. Therefore we avoid - * accidently calling ->poll() when NAPI is not scheduled. - */ - work = 0; - if (test_bit(NAPI_STATE_SCHED, &n->state)) - work = n->poll(n, weight); - - WARN_ON_ONCE(work > weight); - - budget -= work; - - local_irq_disable(); - - /* Drivers must not modify the NAPI state if they - * consume the entire weight. In such cases this code - * still "owns" the NAPI instance and therefore can - * move the instance around on the list at-will. - */ - if (unlikely(work == weight)) { - if (unlikely(napi_disable_pending(n))) - __napi_complete(n); - else - list_move_tail(&n->poll_list, list); - } - - netpoll_poll_unlock(have); - } -out: - local_irq_enable(); - -#ifdef CONFIG_NET_DMA - /* - * There may not be any more sk_buffs coming right now, so push - * any pending DMA copies to hardware - */ - dma_issue_pending_all(); -#endif - - return; - -softnet_break: - __get_cpu_var(netdev_rx_stat).time_squeeze++; - __raise_softirq_irqoff(NET_RX_SOFTIRQ); - goto out; -} - -static gifconf_func_t * gifconf_list [NPROTO]; - -/** - * register_gifconf - register a SIOCGIF handler - * @family: Address family - * @gifconf: Function handler - * - * Register protocol dependent address dumping routines. The handler - * that is passed must not be freed or reused until it has been replaced - * by another handler. - */ -int register_gifconf(unsigned int family, gifconf_func_t * gifconf) -{ - if (family >= NPROTO) - return -EINVAL; - gifconf_list[family] = gifconf; - return 0; -} - - -/* - * Map an interface index to its name (SIOCGIFNAME) - */ - -/* - * We need this ioctl for efficient implementation of the - * if_indextoname() function required by the IPv6 API. Without - * it, we would have to search all the interfaces to find a - * match. --pb - */ - -static int dev_ifname(struct net *net, struct ifreq __user *arg) -{ - struct net_device *dev; - struct ifreq ifr; - - /* - * Fetch the caller's info block. - */ - - if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) - return -EFAULT; - - read_lock(&dev_base_lock); - dev = __dev_get_by_index(net, ifr.ifr_ifindex); - if (!dev) { - read_unlock(&dev_base_lock); - return -ENODEV; - } - - strcpy(ifr.ifr_name, dev->name); - read_unlock(&dev_base_lock); - - if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) - return -EFAULT; - return 0; -} - -/* - * Perform a SIOCGIFCONF call. This structure will change - * size eventually, and there is nothing I can do about it. - * Thus we will need a 'compatibility mode'. - */ - -static int dev_ifconf(struct net *net, char __user *arg) -{ - struct ifconf ifc; - struct net_device *dev; - char __user *pos; - int len; - int total; - int i; - - /* - * Fetch the caller's info block. - */ - - if (copy_from_user(&ifc, arg, sizeof(struct ifconf))) - return -EFAULT; - - pos = ifc.ifc_buf; - len = ifc.ifc_len; - - /* - * Loop over the interfaces, and write an info block for each. - */ - - total = 0; - for_each_netdev(net, dev) { - for (i = 0; i < NPROTO; i++) { - if (gifconf_list[i]) { - int done; - if (!pos) - done = gifconf_list[i](dev, NULL, 0); - else - done = gifconf_list[i](dev, pos + total, - len - total); - if (done < 0) - return -EFAULT; - total += done; - } - } - } - - /* - * All done. Write the updated control block back to the caller. - */ - ifc.ifc_len = total; - - /* - * Both BSD and Solaris return 0 here, so we do too. - */ - return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0; -} - -#ifdef CONFIG_PROC_FS -/* - * This is invoked by the /proc filesystem handler to display a device - * in detail. - */ -void *dev_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(dev_base_lock) -{ - struct net *net = seq_file_net(seq); - loff_t off; - struct net_device *dev; - - read_lock(&dev_base_lock); - if (!*pos) - return SEQ_START_TOKEN; - - off = 1; - for_each_netdev(net, dev) - if (off++ == *pos) - return dev; - - return NULL; -} - -void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - struct net *net = seq_file_net(seq); - ++*pos; - return v == SEQ_START_TOKEN ? - first_net_device(net) : next_net_device((struct net_device *)v); -} - -void dev_seq_stop(struct seq_file *seq, void *v) - __releases(dev_base_lock) -{ - read_unlock(&dev_base_lock); -} - -static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) -{ - const struct net_device_stats *stats = dev_get_stats(dev); - - seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu " - "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n", - dev->name, stats->rx_bytes, stats->rx_packets, - stats->rx_errors, - stats->rx_dropped + stats->rx_missed_errors, - stats->rx_fifo_errors, - stats->rx_length_errors + stats->rx_over_errors + - stats->rx_crc_errors + stats->rx_frame_errors, - stats->rx_compressed, stats->multicast, - stats->tx_bytes, stats->tx_packets, - stats->tx_errors, stats->tx_dropped, - stats->tx_fifo_errors, stats->collisions, - stats->tx_carrier_errors + - stats->tx_aborted_errors + - stats->tx_window_errors + - stats->tx_heartbeat_errors, - stats->tx_compressed); -} - -/* - * Called from the PROCfs module. This now uses the new arbitrary sized - * /proc/net interface to create /proc/net/dev - */ -static int dev_seq_show(struct seq_file *seq, void *v) -{ - if (v == SEQ_START_TOKEN) - seq_puts(seq, "Inter-| Receive " - " | Transmit\n" - " face |bytes packets errs drop fifo frame " - "compressed multicast|bytes packets errs " - "drop fifo colls carrier compressed\n"); - else - dev_seq_printf_stats(seq, v); - return 0; -} - -static struct netif_rx_stats *softnet_get_online(loff_t *pos) -{ - struct netif_rx_stats *rc = NULL; - - while (*pos < nr_cpu_ids) - if (cpu_online(*pos)) { - rc = &per_cpu(netdev_rx_stat, *pos); - break; - } else - ++*pos; - return rc; -} - -static void *softnet_seq_start(struct seq_file *seq, loff_t *pos) -{ - return softnet_get_online(pos); -} - -static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - ++*pos; - return softnet_get_online(pos); -} - -static void softnet_seq_stop(struct seq_file *seq, void *v) -{ -} - -static int softnet_seq_show(struct seq_file *seq, void *v) -{ - struct netif_rx_stats *s = v; - - seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n", - s->total, s->dropped, s->time_squeeze, 0, - 0, 0, 0, 0, /* was fastroute */ - s->cpu_collision ); - return 0; -} - -static const struct seq_operations dev_seq_ops = { - .start = dev_seq_start, - .next = dev_seq_next, - .stop = dev_seq_stop, - .show = dev_seq_show, -}; - -static int dev_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &dev_seq_ops, - sizeof(struct seq_net_private)); -} - -static const struct file_operations dev_seq_fops = { - .owner = THIS_MODULE, - .open = dev_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - -static const struct seq_operations softnet_seq_ops = { - .start = softnet_seq_start, - .next = softnet_seq_next, - .stop = softnet_seq_stop, - .show = softnet_seq_show, -}; - -static int softnet_seq_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &softnet_seq_ops); -} - -static const struct file_operations softnet_seq_fops = { - .owner = THIS_MODULE, - .open = softnet_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static void *ptype_get_idx(loff_t pos) -{ - struct packet_type *pt = NULL; - loff_t i = 0; - int t; - - list_for_each_entry_rcu(pt, &ptype_all, list) { - if (i == pos) - return pt; - ++i; - } - - for (t = 0; t < PTYPE_HASH_SIZE; t++) { - list_for_each_entry_rcu(pt, &ptype_base[t], list) { - if (i == pos) - return pt; - ++i; - } - } - return NULL; -} - -static void *ptype_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(RCU) -{ - rcu_read_lock(); - return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN; -} - -static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - struct packet_type *pt; - struct list_head *nxt; - int hash; - - ++*pos; - if (v == SEQ_START_TOKEN) - return ptype_get_idx(0); - - pt = v; - nxt = pt->list.next; - if (pt->type == htons(ETH_P_ALL)) { - if (nxt != &ptype_all) - goto found; - hash = 0; - nxt = ptype_base[0].next; - } else - hash = ntohs(pt->type) & PTYPE_HASH_MASK; - - while (nxt == &ptype_base[hash]) { - if (++hash >= PTYPE_HASH_SIZE) - return NULL; - nxt = ptype_base[hash].next; - } -found: - return list_entry(nxt, struct packet_type, list); -} - -static void ptype_seq_stop(struct seq_file *seq, void *v) - __releases(RCU) -{ - rcu_read_unlock(); -} - -static int ptype_seq_show(struct seq_file *seq, void *v) -{ - struct packet_type *pt = v; - - if (v == SEQ_START_TOKEN) - seq_puts(seq, "Type Device Function\n"); - else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) { - if (pt->type == htons(ETH_P_ALL)) - seq_puts(seq, "ALL "); - else - seq_printf(seq, "%04x", ntohs(pt->type)); - - seq_printf(seq, " %-8s %pF\n", - pt->dev ? pt->dev->name : "", pt->func); - } - - return 0; -} - -static const struct seq_operations ptype_seq_ops = { - .start = ptype_seq_start, - .next = ptype_seq_next, - .stop = ptype_seq_stop, - .show = ptype_seq_show, -}; - -static int ptype_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &ptype_seq_ops, - sizeof(struct seq_net_private)); -} - -static const struct file_operations ptype_seq_fops = { - .owner = THIS_MODULE, - .open = ptype_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - - -static int __net_init dev_proc_net_init(struct net *net) -{ - int rc = -ENOMEM; - - if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops)) - goto out; - if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops)) - goto out_dev; - if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops)) - goto out_softnet; - - if (wext_proc_init(net)) - goto out_ptype; - rc = 0; -out: - return rc; -out_ptype: - proc_net_remove(net, "ptype"); -out_softnet: - proc_net_remove(net, "softnet_stat"); -out_dev: - proc_net_remove(net, "dev"); - goto out; -} - -static void __net_exit dev_proc_net_exit(struct net *net) -{ - wext_proc_exit(net); - - proc_net_remove(net, "ptype"); - proc_net_remove(net, "softnet_stat"); - proc_net_remove(net, "dev"); -} - -static struct pernet_operations __net_initdata dev_proc_ops = { - .init = dev_proc_net_init, - .exit = dev_proc_net_exit, -}; - -static int __init dev_proc_init(void) -{ - return register_pernet_subsys(&dev_proc_ops); -} -#else -#define dev_proc_init() 0 -#endif /* CONFIG_PROC_FS */ - - -/** - * netdev_set_master - set up master/slave pair - * @slave: slave device - * @master: new master device - * - * Changes the master device of the slave. Pass %NULL to break the - * bonding. The caller must hold the RTNL semaphore. On a failure - * a negative errno code is returned. On success the reference counts - * are adjusted, %RTM_NEWLINK is sent to the routing socket and the - * function returns zero. - */ -int netdev_set_master(struct net_device *slave, struct net_device *master) -{ - struct net_device *old = slave->master; - - ASSERT_RTNL(); - - if (master) { - if (old) - return -EBUSY; - dev_hold(master); - } - - slave->master = master; - - synchronize_net(); - - if (old) - dev_put(old); - - if (master) - slave->flags |= IFF_SLAVE; - else - slave->flags &= ~IFF_SLAVE; - - rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE); - return 0; -} - -static void dev_change_rx_flags(struct net_device *dev, int flags) -{ - const struct net_device_ops *ops = dev->netdev_ops; - - if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags) - ops->ndo_change_rx_flags(dev, flags); -} - -static int __dev_set_promiscuity(struct net_device *dev, int inc) -{ - unsigned short old_flags = dev->flags; - uid_t uid; - gid_t gid; - - ASSERT_RTNL(); - - dev->flags |= IFF_PROMISC; - dev->promiscuity += inc; - if (dev->promiscuity == 0) { - /* - * Avoid overflow. - * If inc causes overflow, untouch promisc and return error. - */ - if (inc < 0) - dev->flags &= ~IFF_PROMISC; - else { - dev->promiscuity -= inc; - printk(KERN_WARNING "%s: promiscuity touches roof, " - "set promiscuity failed, promiscuity feature " - "of device might be broken.\n", dev->name); - return -EOVERFLOW; - } - } - if (dev->flags != old_flags) { - printk(KERN_INFO "device %s %s promiscuous mode\n", - dev->name, (dev->flags & IFF_PROMISC) ? "entered" : - "left"); - if (audit_enabled) { - current_uid_gid(&uid, &gid); - audit_log(current->audit_context, GFP_ATOMIC, - AUDIT_ANOM_PROMISCUOUS, - "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u", - dev->name, (dev->flags & IFF_PROMISC), - (old_flags & IFF_PROMISC), - audit_get_loginuid(current), - uid, gid, - audit_get_sessionid(current)); - } - - dev_change_rx_flags(dev, IFF_PROMISC); - } - return 0; -} - -/** - * dev_set_promiscuity - update promiscuity count on a device - * @dev: device - * @inc: modifier - * - * Add or remove promiscuity from a device. While the count in the device - * remains above zero the interface remains promiscuous. Once it hits zero - * the device reverts back to normal filtering operation. A negative inc - * value is used to drop promiscuity on the device. - * Return 0 if successful or a negative errno code on error. - */ -int dev_set_promiscuity(struct net_device *dev, int inc) -{ - unsigned short old_flags = dev->flags; - int err; - - err = __dev_set_promiscuity(dev, inc); - if (err < 0) - return err; - if (dev->flags != old_flags) - dev_set_rx_mode(dev); - return err; -} - -/** - * dev_set_allmulti - update allmulti count on a device - * @dev: device - * @inc: modifier - * - * Add or remove reception of all multicast frames to a device. While the - * count in the device remains above zero the interface remains listening - * to all interfaces. Once it hits zero the device reverts back to normal - * filtering operation. A negative @inc value is used to drop the counter - * when releasing a resource needing all multicasts. - * Return 0 if successful or a negative errno code on error. - */ - -int dev_set_allmulti(struct net_device *dev, int inc) -{ - unsigned short old_flags = dev->flags; - - ASSERT_RTNL(); - - dev->flags |= IFF_ALLMULTI; - dev->allmulti += inc; - if (dev->allmulti == 0) { - /* - * Avoid overflow. - * If inc causes overflow, untouch allmulti and return error. - */ - if (inc < 0) - dev->flags &= ~IFF_ALLMULTI; - else { - dev->allmulti -= inc; - printk(KERN_WARNING "%s: allmulti touches roof, " - "set allmulti failed, allmulti feature of " - "device might be broken.\n", dev->name); - return -EOVERFLOW; - } - } - if (dev->flags ^ old_flags) { - dev_change_rx_flags(dev, IFF_ALLMULTI); - dev_set_rx_mode(dev); - } - return 0; -} - -/* - * Upload unicast and multicast address lists to device and - * configure RX filtering. When the device doesn't support unicast - * filtering it is put in promiscuous mode while unicast addresses - * are present. - */ -void __dev_set_rx_mode(struct net_device *dev) -{ - const struct net_device_ops *ops = dev->netdev_ops; - - /* dev_open will call this function so the list will stay sane. */ - if (!(dev->flags&IFF_UP)) - return; - - if (!netif_device_present(dev)) - return; - - if (ops->ndo_set_rx_mode) - ops->ndo_set_rx_mode(dev); - else { - /* Unicast addresses changes may only happen under the rtnl, - * therefore calling __dev_set_promiscuity here is safe. - */ - if (dev->uc_count > 0 && !dev->uc_promisc) { - __dev_set_promiscuity(dev, 1); - dev->uc_promisc = 1; - } else if (dev->uc_count == 0 && dev->uc_promisc) { - __dev_set_promiscuity(dev, -1); - dev->uc_promisc = 0; - } - - if (ops->ndo_set_multicast_list) - ops->ndo_set_multicast_list(dev); - } -} - -void dev_set_rx_mode(struct net_device *dev) -{ - netif_addr_lock_bh(dev); - __dev_set_rx_mode(dev); - netif_addr_unlock_bh(dev); -} - -int __dev_addr_delete(struct dev_addr_list **list, int *count, - void *addr, int alen, int glbl) -{ - struct dev_addr_list *da; - - for (; (da = *list) != NULL; list = &da->next) { - if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 && - alen == da->da_addrlen) { - if (glbl) { - int old_glbl = da->da_gusers; - da->da_gusers = 0; - if (old_glbl == 0) - break; - } - if (--da->da_users) - return 0; - - *list = da->next; - kfree(da); - (*count)--; - return 0; - } - } - return -ENOENT; -} - -int __dev_addr_add(struct dev_addr_list **list, int *count, - void *addr, int alen, int glbl) -{ - struct dev_addr_list *da; - - for (da = *list; da != NULL; da = da->next) { - if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 && - da->da_addrlen == alen) { - if (glbl) { - int old_glbl = da->da_gusers; - da->da_gusers = 1; - if (old_glbl) - return 0; - } - da->da_users++; - return 0; - } - } - - da = kzalloc(sizeof(*da), GFP_ATOMIC); - if (da == NULL) - return -ENOMEM; - memcpy(da->da_addr, addr, alen); - da->da_addrlen = alen; - da->da_users = 1; - da->da_gusers = glbl ? 1 : 0; - da->next = *list; - *list = da; - (*count)++; - return 0; -} - -/** - * dev_unicast_delete - Release secondary unicast address. - * @dev: device - * @addr: address to delete - * @alen: length of @addr - * - * Release reference to a secondary unicast address and remove it - * from the device if the reference count drops to zero. - * - * The caller must hold the rtnl_mutex. - */ -int dev_unicast_delete(struct net_device *dev, void *addr, int alen) -{ - int err; - - ASSERT_RTNL(); - - netif_addr_lock_bh(dev); - err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0); - if (!err) - __dev_set_rx_mode(dev); - netif_addr_unlock_bh(dev); - return err; -} -EXPORT_SYMBOL(dev_unicast_delete); - -/** - * dev_unicast_add - add a secondary unicast address - * @dev: device - * @addr: address to add - * @alen: length of @addr - * - * Add a secondary unicast address to the device or increase - * the reference count if it already exists. - * - * The caller must hold the rtnl_mutex. - */ -int dev_unicast_add(struct net_device *dev, void *addr, int alen) -{ - int err; - - ASSERT_RTNL(); - - netif_addr_lock_bh(dev); - err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0); - if (!err) - __dev_set_rx_mode(dev); - netif_addr_unlock_bh(dev); - return err; -} -EXPORT_SYMBOL(dev_unicast_add); - -int __dev_addr_sync(struct dev_addr_list **to, int *to_count, - struct dev_addr_list **from, int *from_count) -{ - struct dev_addr_list *da, *next; - int err = 0; - - da = *from; - while (da != NULL) { - next = da->next; - if (!da->da_synced) { - err = __dev_addr_add(to, to_count, - da->da_addr, da->da_addrlen, 0); - if (err < 0) - break; - da->da_synced = 1; - da->da_users++; - } else if (da->da_users == 1) { - __dev_addr_delete(to, to_count, - da->da_addr, da->da_addrlen, 0); - __dev_addr_delete(from, from_count, - da->da_addr, da->da_addrlen, 0); - } - da = next; - } - return err; -} - -void __dev_addr_unsync(struct dev_addr_list **to, int *to_count, - struct dev_addr_list **from, int *from_count) -{ - struct dev_addr_list *da, *next; - - da = *from; - while (da != NULL) { - next = da->next; - if (da->da_synced) { - __dev_addr_delete(to, to_count, - da->da_addr, da->da_addrlen, 0); - da->da_synced = 0; - __dev_addr_delete(from, from_count, - da->da_addr, da->da_addrlen, 0); - } - da = next; - } -} - -/** - * dev_unicast_sync - Synchronize device's unicast list to another device - * @to: destination device - * @from: source device - * - * Add newly added addresses to the destination device and release - * addresses that have no users left. The source device must be - * locked by netif_addr_lock_bh. - * - * This function is intended to be called from the dev->set_rx_mode - * function of layered software devices. - */ -int dev_unicast_sync(struct net_device *to, struct net_device *from) -{ - int err = 0; - - netif_addr_lock_bh(to); - err = __dev_addr_sync(&to->uc_list, &to->uc_count, - &from->uc_list, &from->uc_count); - if (!err) - __dev_set_rx_mode(to); - netif_addr_unlock_bh(to); - return err; -} -EXPORT_SYMBOL(dev_unicast_sync); - -/** - * dev_unicast_unsync - Remove synchronized addresses from the destination device - * @to: destination device - * @from: source device - * - * Remove all addresses that were added to the destination device by - * dev_unicast_sync(). This function is intended to be called from the - * dev->stop function of layered software devices. - */ -void dev_unicast_unsync(struct net_device *to, struct net_device *from) -{ - netif_addr_lock_bh(from); - netif_addr_lock(to); - - __dev_addr_unsync(&to->uc_list, &to->uc_count, - &from->uc_list, &from->uc_count); - __dev_set_rx_mode(to); - - netif_addr_unlock(to); - netif_addr_unlock_bh(from); -} -EXPORT_SYMBOL(dev_unicast_unsync); - -static void __dev_addr_discard(struct dev_addr_list **list) -{ - struct dev_addr_list *tmp; - - while (*list != NULL) { - tmp = *list; - *list = tmp->next; - if (tmp->da_users > tmp->da_gusers) - printk("__dev_addr_discard: address leakage! " - "da_users=%d\n", tmp->da_users); - kfree(tmp); - } -} - -static void dev_addr_discard(struct net_device *dev) -{ - netif_addr_lock_bh(dev); - - __dev_addr_discard(&dev->uc_list); - dev->uc_count = 0; - - __dev_addr_discard(&dev->mc_list); - dev->mc_count = 0; - - netif_addr_unlock_bh(dev); -} - -/** - * dev_get_flags - get flags reported to userspace - * @dev: device - * - * Get the combination of flag bits exported through APIs to userspace. - */ -unsigned dev_get_flags(const struct net_device *dev) -{ - unsigned flags; - - flags = (dev->flags & ~(IFF_PROMISC | - IFF_ALLMULTI | - IFF_RUNNING | - IFF_LOWER_UP | - IFF_DORMANT)) | - (dev->gflags & (IFF_PROMISC | - IFF_ALLMULTI)); - - if (netif_running(dev)) { - if (netif_oper_up(dev)) - flags |= IFF_RUNNING; - if (netif_carrier_ok(dev)) - flags |= IFF_LOWER_UP; - if (netif_dormant(dev)) - flags |= IFF_DORMANT; - } - - return flags; -} - -/** - * dev_change_flags - change device settings - * @dev: device - * @flags: device state flags - * - * Change settings on device based state flags. The flags are - * in the userspace exported format. - */ -int dev_change_flags(struct net_device *dev, unsigned flags) -{ - int ret, changes; - int old_flags = dev->flags; - - ASSERT_RTNL(); - - /* - * Set the flags on our device. - */ - - dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP | - IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL | - IFF_AUTOMEDIA)) | - (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC | - IFF_ALLMULTI)); - - /* - * Load in the correct multicast list now the flags have changed. - */ - - if ((old_flags ^ flags) & IFF_MULTICAST) - dev_change_rx_flags(dev, IFF_MULTICAST); - - dev_set_rx_mode(dev); - - /* - * Have we downed the interface. We handle IFF_UP ourselves - * according to user attempts to set it, rather than blindly - * setting it. - */ - - ret = 0; - if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */ - ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev); - - if (!ret) - dev_set_rx_mode(dev); - } - - if (dev->flags & IFF_UP && - ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI | - IFF_VOLATILE))) - call_netdevice_notifiers(NETDEV_CHANGE, dev); - - if ((flags ^ dev->gflags) & IFF_PROMISC) { - int inc = (flags & IFF_PROMISC) ? +1 : -1; - dev->gflags ^= IFF_PROMISC; - dev_set_promiscuity(dev, inc); - } - - /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI - is important. Some (broken) drivers set IFF_PROMISC, when - IFF_ALLMULTI is requested not asking us and not reporting. - */ - if ((flags ^ dev->gflags) & IFF_ALLMULTI) { - int inc = (flags & IFF_ALLMULTI) ? +1 : -1; - dev->gflags ^= IFF_ALLMULTI; - dev_set_allmulti(dev, inc); - } - - /* Exclude state transition flags, already notified */ - changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING); - if (changes) - rtmsg_ifinfo(RTM_NEWLINK, dev, changes); - - return ret; -} - -/** - * dev_set_mtu - Change maximum transfer unit - * @dev: device - * @new_mtu: new transfer unit - * - * Change the maximum transfer size of the network device. - */ -int dev_set_mtu(struct net_device *dev, int new_mtu) -{ - const struct net_device_ops *ops = dev->netdev_ops; - int err; - - if (new_mtu == dev->mtu) - return 0; - - /* MTU must be positive. */ - if (new_mtu < 0) - return -EINVAL; - - if (!netif_device_present(dev)) - return -ENODEV; - - err = 0; - if (ops->ndo_change_mtu) - err = ops->ndo_change_mtu(dev, new_mtu); - else - dev->mtu = new_mtu; - - if (!err && dev->flags & IFF_UP) - call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); - return err; -} - -/** - * dev_set_mac_address - Change Media Access Control Address - * @dev: device - * @sa: new address - * - * Change the hardware (MAC) address of the device - */ -int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa) -{ - const struct net_device_ops *ops = dev->netdev_ops; - int err; - - if (!ops->ndo_set_mac_address) - return -EOPNOTSUPP; - if (sa->sa_family != dev->type) - return -EINVAL; - if (!netif_device_present(dev)) - return -ENODEV; - err = ops->ndo_set_mac_address(dev, sa); - if (!err) - call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); - return err; -} - -/* - * Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock) - */ -static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd) -{ - int err; - struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); - - if (!dev) - return -ENODEV; - - switch (cmd) { - case SIOCGIFFLAGS: /* Get interface flags */ - ifr->ifr_flags = dev_get_flags(dev); - return 0; - - case SIOCGIFMETRIC: /* Get the metric on the interface - (currently unused) */ - ifr->ifr_metric = 0; - return 0; - - case SIOCGIFMTU: /* Get the MTU of a device */ - ifr->ifr_mtu = dev->mtu; - return 0; - - case SIOCGIFHWADDR: - if (!dev->addr_len) - memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data); - else - memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr, - min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); - ifr->ifr_hwaddr.sa_family = dev->type; - return 0; - - case SIOCGIFSLAVE: - err = -EINVAL; - break; - - case SIOCGIFMAP: - ifr->ifr_map.mem_start = dev->mem_start; - ifr->ifr_map.mem_end = dev->mem_end; - ifr->ifr_map.base_addr = dev->base_addr; - ifr->ifr_map.irq = dev->irq; - ifr->ifr_map.dma = dev->dma; - ifr->ifr_map.port = dev->if_port; - return 0; - - case SIOCGIFINDEX: - ifr->ifr_ifindex = dev->ifindex; - return 0; - - case SIOCGIFTXQLEN: - ifr->ifr_qlen = dev->tx_queue_len; - return 0; - - default: - /* dev_ioctl() should ensure this case - * is never reached - */ - WARN_ON(1); - err = -EINVAL; - break; - - } - return err; -} - -/* - * Perform the SIOCxIFxxx calls, inside rtnl_lock() - */ -static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) -{ - int err; - struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); - const struct net_device_ops *ops; - - if (!dev) - return -ENODEV; - - ops = dev->netdev_ops; - - switch (cmd) { - case SIOCSIFFLAGS: /* Set interface flags */ - return dev_change_flags(dev, ifr->ifr_flags); - - case SIOCSIFMETRIC: /* Set the metric on the interface - (currently unused) */ - return -EOPNOTSUPP; - - case SIOCSIFMTU: /* Set the MTU of a device */ - return dev_set_mtu(dev, ifr->ifr_mtu); - - case SIOCSIFHWADDR: - return dev_set_mac_address(dev, &ifr->ifr_hwaddr); - - case SIOCSIFHWBROADCAST: - if (ifr->ifr_hwaddr.sa_family != dev->type) - return -EINVAL; - memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, - min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); - call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); - return 0; - - case SIOCSIFMAP: - if (ops->ndo_set_config) { - if (!netif_device_present(dev)) - return -ENODEV; - return ops->ndo_set_config(dev, &ifr->ifr_map); - } - return -EOPNOTSUPP; - - case SIOCADDMULTI: - if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) || - ifr->ifr_hwaddr.sa_family != AF_UNSPEC) - return -EINVAL; - if (!netif_device_present(dev)) - return -ENODEV; - return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data, - dev->addr_len, 1); - - case SIOCDELMULTI: - if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) || - ifr->ifr_hwaddr.sa_family != AF_UNSPEC) - return -EINVAL; - if (!netif_device_present(dev)) - return -ENODEV; - return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data, - dev->addr_len, 1); - - case SIOCSIFTXQLEN: - if (ifr->ifr_qlen < 0) - return -EINVAL; - dev->tx_queue_len = ifr->ifr_qlen; - return 0; - - case SIOCSIFNAME: - ifr->ifr_newname[IFNAMSIZ-1] = '\0'; - return dev_change_name(dev, ifr->ifr_newname); - - /* - * Unknown or private ioctl - */ - - default: - if ((cmd >= SIOCDEVPRIVATE && - cmd <= SIOCDEVPRIVATE + 15) || - cmd == SIOCBONDENSLAVE || - cmd == SIOCBONDRELEASE || - cmd == SIOCBONDSETHWADDR || - cmd == SIOCBONDSLAVEINFOQUERY || - cmd == SIOCBONDINFOQUERY || - cmd == SIOCBONDCHANGEACTIVE || - cmd == SIOCGMIIPHY || - cmd == SIOCGMIIREG || - cmd == SIOCSMIIREG || - cmd == SIOCBRADDIF || - cmd == SIOCBRDELIF || - cmd == SIOCWANDEV) { - err = -EOPNOTSUPP; - if (ops->ndo_do_ioctl) { - if (netif_device_present(dev)) - err = ops->ndo_do_ioctl(dev, ifr, cmd); - else - err = -ENODEV; - } - } else - err = -EINVAL; - - } - return err; -} - -/* - * This function handles all "interface"-type I/O control requests. The actual - * 'doing' part of this is dev_ifsioc above. - */ - -/** - * dev_ioctl - network device ioctl - * @net: the applicable net namespace - * @cmd: command to issue - * @arg: pointer to a struct ifreq in user space - * - * Issue ioctl functions to devices. This is normally called by the - * user space syscall interfaces but can sometimes be useful for - * other purposes. The return value is the return from the syscall if - * positive or a negative errno code on error. - */ - -int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) -{ - struct ifreq ifr; - int ret; - char *colon; - - /* One special case: SIOCGIFCONF takes ifconf argument - and requires shared lock, because it sleeps writing - to user space. - */ - - if (cmd == SIOCGIFCONF) { - rtnl_lock(); - ret = dev_ifconf(net, (char __user *) arg); - rtnl_unlock(); - return ret; - } - if (cmd == SIOCGIFNAME) - return dev_ifname(net, (struct ifreq __user *)arg); - - if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) - return -EFAULT; - - ifr.ifr_name[IFNAMSIZ-1] = 0; - - colon = strchr(ifr.ifr_name, ':'); - if (colon) - *colon = 0; - - /* - * See which interface the caller is talking about. - */ - - switch (cmd) { - /* - * These ioctl calls: - * - can be done by all. - * - atomic and do not require locking. - * - return a value - */ - case SIOCGIFFLAGS: - case SIOCGIFMETRIC: - case SIOCGIFMTU: - case SIOCGIFHWADDR: - case SIOCGIFSLAVE: - case SIOCGIFMAP: - case SIOCGIFINDEX: - case SIOCGIFTXQLEN: - dev_load(net, ifr.ifr_name); - read_lock(&dev_base_lock); - ret = dev_ifsioc_locked(net, &ifr, cmd); - read_unlock(&dev_base_lock); - if (!ret) { - if (colon) - *colon = ':'; - if (copy_to_user(arg, &ifr, - sizeof(struct ifreq))) - ret = -EFAULT; - } - return ret; - - case SIOCETHTOOL: - dev_load(net, ifr.ifr_name); - rtnl_lock(); - ret = dev_ethtool(net, &ifr); - rtnl_unlock(); - if (!ret) { - if (colon) - *colon = ':'; - if (copy_to_user(arg, &ifr, - sizeof(struct ifreq))) - ret = -EFAULT; - } - return ret; - - /* - * These ioctl calls: - * - require superuser power. - * - require strict serialization. - * - return a value - */ - case SIOCGMIIPHY: - case SIOCGMIIREG: - case SIOCSIFNAME: - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - dev_load(net, ifr.ifr_name); - rtnl_lock(); - ret = dev_ifsioc(net, &ifr, cmd); - rtnl_unlock(); - if (!ret) { - if (colon) - *colon = ':'; - if (copy_to_user(arg, &ifr, - sizeof(struct ifreq))) - ret = -EFAULT; - } - return ret; - - /* - * These ioctl calls: - * - require superuser power. - * - require strict serialization. - * - do not return a value - */ - case SIOCSIFFLAGS: - case SIOCSIFMETRIC: - case SIOCSIFMTU: - case SIOCSIFMAP: - case SIOCSIFHWADDR: - case SIOCSIFSLAVE: - case SIOCADDMULTI: - case SIOCDELMULTI: - case SIOCSIFHWBROADCAST: - case SIOCSIFTXQLEN: - case SIOCSMIIREG: - case SIOCBONDENSLAVE: - case SIOCBONDRELEASE: - case SIOCBONDSETHWADDR: - case SIOCBONDCHANGEACTIVE: - case SIOCBRADDIF: - case SIOCBRDELIF: - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - /* fall through */ - case SIOCBONDSLAVEINFOQUERY: - case SIOCBONDINFOQUERY: - dev_load(net, ifr.ifr_name); - rtnl_lock(); - ret = dev_ifsioc(net, &ifr, cmd); - rtnl_unlock(); - return ret; - - case SIOCGIFMEM: - /* Get the per device memory space. We can add this but - * currently do not support it */ - case SIOCSIFMEM: - /* Set the per device memory buffer space. - * Not applicable in our case */ - case SIOCSIFLINK: - return -EINVAL; - - /* - * Unknown or private ioctl. - */ - default: - if (cmd == SIOCWANDEV || - (cmd >= SIOCDEVPRIVATE && - cmd <= SIOCDEVPRIVATE + 15)) { - dev_load(net, ifr.ifr_name); - rtnl_lock(); - ret = dev_ifsioc(net, &ifr, cmd); - rtnl_unlock(); - if (!ret && copy_to_user(arg, &ifr, - sizeof(struct ifreq))) - ret = -EFAULT; - return ret; - } - /* Take care of Wireless Extensions */ - if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) - return wext_handle_ioctl(net, &ifr, cmd, arg); - return -EINVAL; - } -} - - -/** - * dev_new_index - allocate an ifindex - * @net: the applicable net namespace - * - * Returns a suitable unique value for a new device interface - * number. The caller must hold the rtnl semaphore or the - * dev_base_lock to be sure it remains unique. - */ -static int dev_new_index(struct net *net) -{ - static int ifindex; - for (;;) { - if (++ifindex <= 0) - ifindex = 1; - if (!__dev_get_by_index(net, ifindex)) - return ifindex; - } -} - -/* Delayed registration/unregisteration */ -static LIST_HEAD(net_todo_list); - -static void net_set_todo(struct net_device *dev) -{ - list_add_tail(&dev->todo_list, &net_todo_list); -} - -static void rollback_registered(struct net_device *dev) -{ - BUG_ON(dev_boot_phase); - ASSERT_RTNL(); - - /* Some devices call without registering for initialization unwind. */ - if (dev->reg_state == NETREG_UNINITIALIZED) { - printk(KERN_DEBUG "unregister_netdevice: device %s/%p never " - "was registered\n", dev->name, dev); - - WARN_ON(1); - return; - } - - BUG_ON(dev->reg_state != NETREG_REGISTERED); - - /* If device is running, close it first. */ - dev_close(dev); - - /* And unlink it from device chain. */ - unlist_netdevice(dev); - - dev->reg_state = NETREG_UNREGISTERING; - - synchronize_net(); - - /* Shutdown queueing discipline. */ - dev_shutdown(dev); - - - /* Notify protocols, that we are about to destroy - this device. They should clean all the things. - */ - call_netdevice_notifiers(NETDEV_UNREGISTER, dev); - - /* - * Flush the unicast and multicast chains - */ - dev_addr_discard(dev); - - if (dev->netdev_ops->ndo_uninit) - dev->netdev_ops->ndo_uninit(dev); - - /* Notifier chain MUST detach us from master device. */ - WARN_ON(dev->master); - - /* Remove entries from kobject tree */ - netdev_unregister_kobject(dev); - - synchronize_net(); - - dev_put(dev); -} - -static void __netdev_init_queue_locks_one(struct net_device *dev, - struct netdev_queue *dev_queue, - void *_unused) -{ - spin_lock_init(&dev_queue->_xmit_lock); - netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type); - dev_queue->xmit_lock_owner = -1; -} - -static void netdev_init_queue_locks(struct net_device *dev) -{ - netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL); - __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL); -} - -unsigned long netdev_fix_features(unsigned long features, const char *name) -{ - /* Fix illegal SG+CSUM combinations. */ - if ((features & NETIF_F_SG) && - !(features & NETIF_F_ALL_CSUM)) { - if (name) - printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no " - "checksum feature.\n", name); - features &= ~NETIF_F_SG; - } - - /* TSO requires that SG is present as well. */ - if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) { - if (name) - printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no " - "SG feature.\n", name); - features &= ~NETIF_F_TSO; - } - - if (features & NETIF_F_UFO) { - if (!(features & NETIF_F_GEN_CSUM)) { - if (name) - printk(KERN_ERR "%s: Dropping NETIF_F_UFO " - "since no NETIF_F_HW_CSUM feature.\n", - name); - features &= ~NETIF_F_UFO; - } - - if (!(features & NETIF_F_SG)) { - if (name) - printk(KERN_ERR "%s: Dropping NETIF_F_UFO " - "since no NETIF_F_SG feature.\n", name); - features &= ~NETIF_F_UFO; - } - } - - return features; -} -EXPORT_SYMBOL(netdev_fix_features); - -/* Some devices need to (re-)set their netdev_ops inside - * ->init() or similar. If that happens, we have to setup - * the compat pointers again. - */ -void netdev_resync_ops(struct net_device *dev) -{ -#ifdef CONFIG_COMPAT_NET_DEV_OPS - const struct net_device_ops *ops = dev->netdev_ops; - - dev->init = ops->ndo_init; - dev->uninit = ops->ndo_uninit; - dev->open = ops->ndo_open; - dev->change_rx_flags = ops->ndo_change_rx_flags; - dev->set_rx_mode = ops->ndo_set_rx_mode; - dev->set_multicast_list = ops->ndo_set_multicast_list; - dev->set_mac_address = ops->ndo_set_mac_address; - dev->validate_addr = ops->ndo_validate_addr; - dev->do_ioctl = ops->ndo_do_ioctl; - dev->set_config = ops->ndo_set_config; - dev->change_mtu = ops->ndo_change_mtu; - dev->neigh_setup = ops->ndo_neigh_setup; - dev->tx_timeout = ops->ndo_tx_timeout; - dev->get_stats = ops->ndo_get_stats; - dev->vlan_rx_register = ops->ndo_vlan_rx_register; - dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid; - dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid; -#ifdef CONFIG_NET_POLL_CONTROLLER - dev->poll_controller = ops->ndo_poll_controller; -#endif -#endif -} -EXPORT_SYMBOL(netdev_resync_ops); - -/** - * register_netdevice - register a network device - * @dev: device to register - * - * Take a completed network device structure and add it to the kernel - * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier - * chain. 0 is returned on success. A negative errno code is returned - * on a failure to set up the device, or if the name is a duplicate. - * - * Callers must hold the rtnl semaphore. You may want - * register_netdev() instead of this. - * - * BUGS: - * The locking appears insufficient to guarantee two parallel registers - * will not get the same name. - */ - -int register_netdevice(struct net_device *dev) -{ - struct hlist_head *head; - struct hlist_node *p; - int ret; - struct net *net = dev_net(dev); - - BUG_ON(dev_boot_phase); - ASSERT_RTNL(); - - might_sleep(); - - /* When net_device's are persistent, this will be fatal. */ - BUG_ON(dev->reg_state != NETREG_UNINITIALIZED); - BUG_ON(!net); - - spin_lock_init(&dev->addr_list_lock); - netdev_set_addr_lockdep_class(dev); - netdev_init_queue_locks(dev); - - dev->iflink = -1; - -#ifdef CONFIG_COMPAT_NET_DEV_OPS - /* Netdevice_ops API compatiability support. - * This is temporary until all network devices are converted. - */ - if (dev->netdev_ops) { - netdev_resync_ops(dev); - } else { - char drivername[64]; - pr_info("%s (%s): not using net_device_ops yet\n", - dev->name, netdev_drivername(dev, drivername, 64)); - - /* This works only because net_device_ops and the - compatiablity structure are the same. */ - dev->netdev_ops = (void *) &(dev->init); - } -#endif - - /* Init, if this function is available */ - if (dev->netdev_ops->ndo_init) { - ret = dev->netdev_ops->ndo_init(dev); - if (ret) { - if (ret > 0) - ret = -EIO; - goto out; - } - } - - if (!dev_valid_name(dev->name)) { - ret = -EINVAL; - goto err_uninit; - } - - dev->ifindex = dev_new_index(net); - if (dev->iflink == -1) - dev->iflink = dev->ifindex; - - /* Check for existence of name */ - head = dev_name_hash(net, dev->name); - hlist_for_each(p, head) { - struct net_device *d - = hlist_entry(p, struct net_device, name_hlist); - if (!strncmp(d->name, dev->name, IFNAMSIZ)) { - ret = -EEXIST; - goto err_uninit; - } - } - - /* Fix illegal checksum combinations */ - if ((dev->features & NETIF_F_HW_CSUM) && - (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { - printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n", - dev->name); - dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); - } - - if ((dev->features & NETIF_F_NO_CSUM) && - (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { - printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n", - dev->name); - dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM); - } - - dev->features = netdev_fix_features(dev->features, dev->name); - - /* Enable software GSO if SG is supported. */ - if (dev->features & NETIF_F_SG) - dev->features |= NETIF_F_GSO; - - netdev_initialize_kobject(dev); - ret = netdev_register_kobject(dev); - if (ret) - goto err_uninit; - dev->reg_state = NETREG_REGISTERED; - - /* - * Default initial state at registry is that the - * device is present. - */ - - set_bit(__LINK_STATE_PRESENT, &dev->state); - - dev_init_scheduler(dev); - dev_hold(dev); - list_netdevice(dev); - - /* Notify protocols, that a new device appeared. */ - ret = call_netdevice_notifiers(NETDEV_REGISTER, dev); - ret = notifier_to_errno(ret); - if (ret) { - rollback_registered(dev); - dev->reg_state = NETREG_UNREGISTERED; - } - -out: - return ret; - -err_uninit: - if (dev->netdev_ops->ndo_uninit) - dev->netdev_ops->ndo_uninit(dev); - goto out; -} - -/** - * init_dummy_netdev - init a dummy network device for NAPI - * @dev: device to init - * - * This takes a network device structure and initialize the minimum - * amount of fields so it can be used to schedule NAPI polls without - * registering a full blown interface. This is to be used by drivers - * that need to tie several hardware interfaces to a single NAPI - * poll scheduler due to HW limitations. - */ -int init_dummy_netdev(struct net_device *dev) -{ - /* Clear everything. Note we don't initialize spinlocks - * are they aren't supposed to be taken by any of the - * NAPI code and this dummy netdev is supposed to be - * only ever used for NAPI polls - */ - memset(dev, 0, sizeof(struct net_device)); - - /* make sure we BUG if trying to hit standard - * register/unregister code path - */ - dev->reg_state = NETREG_DUMMY; - - /* initialize the ref count */ - atomic_set(&dev->refcnt, 1); - - /* NAPI wants this */ - INIT_LIST_HEAD(&dev->napi_list); - - /* a dummy interface is started by default */ - set_bit(__LINK_STATE_PRESENT, &dev->state); - set_bit(__LINK_STATE_START, &dev->state); - - return 0; -} -EXPORT_SYMBOL_GPL(init_dummy_netdev); - - -/** - * register_netdev - register a network device - * @dev: device to register - * - * Take a completed network device structure and add it to the kernel - * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier - * chain. 0 is returned on success. A negative errno code is returned - * on a failure to set up the device, or if the name is a duplicate. - * - * This is a wrapper around register_netdevice that takes the rtnl semaphore - * and expands the device name if you passed a format string to - * alloc_netdev. - */ -int register_netdev(struct net_device *dev) -{ - int err; - - rtnl_lock(); - - /* - * If the name is a format string the caller wants us to do a - * name allocation. - */ - if (strchr(dev->name, '%')) { - err = dev_alloc_name(dev, dev->name); - if (err < 0) - goto out; - } - - err = register_netdevice(dev); -out: - rtnl_unlock(); - return err; -} -EXPORT_SYMBOL(register_netdev); - -/* - * netdev_wait_allrefs - wait until all references are gone. - * - * This is called when unregistering network devices. - * - * Any protocol or device that holds a reference should register - * for netdevice notification, and cleanup and put back the - * reference if they receive an UNREGISTER event. - * We can get stuck here if buggy protocols don't correctly - * call dev_put. - */ -static void netdev_wait_allrefs(struct net_device *dev) -{ - unsigned long rebroadcast_time, warning_time; - - rebroadcast_time = warning_time = jiffies; - while (atomic_read(&dev->refcnt) != 0) { - if (time_after(jiffies, rebroadcast_time + 1 * HZ)) { - rtnl_lock(); - - /* Rebroadcast unregister notification */ - call_netdevice_notifiers(NETDEV_UNREGISTER, dev); - - if (test_bit(__LINK_STATE_LINKWATCH_PENDING, - &dev->state)) { - /* We must not have linkwatch events - * pending on unregister. If this - * happens, we simply run the queue - * unscheduled, resulting in a noop - * for this device. - */ - linkwatch_run_queue(); - } - - __rtnl_unlock(); - - rebroadcast_time = jiffies; - } - - msleep(250); - - if (time_after(jiffies, warning_time + 10 * HZ)) { - printk(KERN_EMERG "unregister_netdevice: " - "waiting for %s to become free. Usage " - "count = %d\n", - dev->name, atomic_read(&dev->refcnt)); - warning_time = jiffies; - } - } -} - -/* The sequence is: - * - * rtnl_lock(); - * ... - * register_netdevice(x1); - * register_netdevice(x2); - * ... - * unregister_netdevice(y1); - * unregister_netdevice(y2); - * ... - * rtnl_unlock(); - * free_netdev(y1); - * free_netdev(y2); - * - * We are invoked by rtnl_unlock(). - * This allows us to deal with problems: - * 1) We can delete sysfs objects which invoke hotplug - * without deadlocking with linkwatch via keventd. - * 2) Since we run with the RTNL semaphore not held, we can sleep - * safely in order to wait for the netdev refcnt to drop to zero. - * - * We must not return until all unregister events added during - * the interval the lock was held have been completed. - */ -void netdev_run_todo(void) -{ - struct list_head list; - - /* Snapshot list, allow later requests */ - list_replace_init(&net_todo_list, &list); - - __rtnl_unlock(); - - while (!list_empty(&list)) { - struct net_device *dev - = list_entry(list.next, struct net_device, todo_list); - list_del(&dev->todo_list); - - if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) { - printk(KERN_ERR "network todo '%s' but state %d\n", - dev->name, dev->reg_state); - dump_stack(); - continue; - } - - dev->reg_state = NETREG_UNREGISTERED; - - on_each_cpu(flush_backlog, dev, 1); - - netdev_wait_allrefs(dev); - - /* paranoia */ - BUG_ON(atomic_read(&dev->refcnt)); - WARN_ON(dev->ip_ptr); - WARN_ON(dev->ip6_ptr); - WARN_ON(dev->dn_ptr); - - if (dev->destructor) - dev->destructor(dev); - - /* Free network device */ - kobject_put(&dev->dev.kobj); - } -} - -/** - * dev_get_stats - get network device statistics - * @dev: device to get statistics from - * - * Get network statistics from device. The device driver may provide - * its own method by setting dev->netdev_ops->get_stats; otherwise - * the internal statistics structure is used. - */ -const struct net_device_stats *dev_get_stats(struct net_device *dev) - { - const struct net_device_ops *ops = dev->netdev_ops; - - if (ops->ndo_get_stats) - return ops->ndo_get_stats(dev); - else - return &dev->stats; -} -EXPORT_SYMBOL(dev_get_stats); - -static void netdev_init_one_queue(struct net_device *dev, - struct netdev_queue *queue, - void *_unused) -{ - queue->dev = dev; -} - -static void netdev_init_queues(struct net_device *dev) -{ - netdev_init_one_queue(dev, &dev->rx_queue, NULL); - netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL); - spin_lock_init(&dev->tx_global_lock); -} - -/** - * alloc_netdev_mq - allocate network device - * @sizeof_priv: size of private data to allocate space for - * @name: device name format string - * @setup: callback to initialize device - * @queue_count: the number of subqueues to allocate - * - * Allocates a struct net_device with private data area for driver use - * and performs basic initialization. Also allocates subquue structs - * for each queue on the device at the end of the netdevice. - */ -struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, - void (*setup)(struct net_device *), unsigned int queue_count) -{ - struct netdev_queue *tx; - struct net_device *dev; - size_t alloc_size; - void *p; - - BUG_ON(strlen(name) >= sizeof(dev->name)); - - alloc_size = sizeof(struct net_device); - if (sizeof_priv) { - /* ensure 32-byte alignment of private area */ - alloc_size = (alloc_size + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST; - alloc_size += sizeof_priv; - } - /* ensure 32-byte alignment of whole construct */ - alloc_size += NETDEV_ALIGN_CONST; - - p = kzalloc(alloc_size, GFP_KERNEL); - if (!p) { - printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n"); - return NULL; - } - - tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL); - if (!tx) { - printk(KERN_ERR "alloc_netdev: Unable to allocate " - "tx qdiscs.\n"); - kfree(p); - return NULL; - } - - dev = (struct net_device *) - (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST); - dev->padded = (char *)dev - (char *)p; - dev_net_set(dev, &init_net); - - dev->_tx = tx; - dev->num_tx_queues = queue_count; - dev->real_num_tx_queues = queue_count; - - dev->gso_max_size = GSO_MAX_SIZE; - - netdev_init_queues(dev); - - INIT_LIST_HEAD(&dev->napi_list); - setup(dev); - strcpy(dev->name, name); - return dev; -} -EXPORT_SYMBOL(alloc_netdev_mq); - -/** - * free_netdev - free network device - * @dev: device - * - * This function does the last stage of destroying an allocated device - * interface. The reference to the device object is released. - * If this is the last reference then it will be freed. - */ -void free_netdev(struct net_device *dev) -{ - struct napi_struct *p, *n; - - release_net(dev_net(dev)); - - kfree(dev->_tx); - - list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) - netif_napi_del(p); - - /* Compatibility with error handling in drivers */ - if (dev->reg_state == NETREG_UNINITIALIZED) { - kfree((char *)dev - dev->padded); - return; - } - - BUG_ON(dev->reg_state != NETREG_UNREGISTERED); - dev->reg_state = NETREG_RELEASED; - - /* will free via device release */ - put_device(&dev->dev); -} - -/** - * synchronize_net - Synchronize with packet receive processing - * - * Wait for packets currently being received to be done. - * Does not block later packets from starting. - */ -void synchronize_net(void) -{ - might_sleep(); -#ifndef DDE_LINUX - synchronize_rcu(); -#endif -} - -/** - * unregister_netdevice - remove device from the kernel - * @dev: device - * - * This function shuts down a device interface and removes it - * from the kernel tables. - * - * Callers must hold the rtnl semaphore. You may want - * unregister_netdev() instead of this. - */ - -void unregister_netdevice(struct net_device *dev) -{ - ASSERT_RTNL(); - - rollback_registered(dev); - /* Finish processing unregister after unlock */ - net_set_todo(dev); -} - -/** - * unregister_netdev - remove device from the kernel - * @dev: device - * - * This function shuts down a device interface and removes it - * from the kernel tables. - * - * This is just a wrapper for unregister_netdevice that takes - * the rtnl semaphore. In general you want to use this and not - * unregister_netdevice. - */ -void unregister_netdev(struct net_device *dev) -{ - rtnl_lock(); - unregister_netdevice(dev); - rtnl_unlock(); -} - -EXPORT_SYMBOL(unregister_netdev); - -/** - * dev_change_net_namespace - move device to different nethost namespace - * @dev: device - * @net: network namespace - * @pat: If not NULL name pattern to try if the current device name - * is already taken in the destination network namespace. - * - * This function shuts down a device interface and moves it - * to a new network namespace. On success 0 is returned, on - * a failure a netagive errno code is returned. - * - * Callers must hold the rtnl semaphore. - */ - -int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat) -{ - char buf[IFNAMSIZ]; - const char *destname; - int err; - - ASSERT_RTNL(); - - /* Don't allow namespace local devices to be moved. */ - err = -EINVAL; - if (dev->features & NETIF_F_NETNS_LOCAL) - goto out; - -#ifdef CONFIG_SYSFS - /* Don't allow real devices to be moved when sysfs - * is enabled. - */ - err = -EINVAL; - if (dev->dev.parent) - goto out; -#endif - - /* Ensure the device has been registrered */ - err = -EINVAL; - if (dev->reg_state != NETREG_REGISTERED) - goto out; - - /* Get out if there is nothing todo */ - err = 0; - if (net_eq(dev_net(dev), net)) - goto out; - - /* Pick the destination device name, and ensure - * we can use it in the destination network namespace. - */ - err = -EEXIST; - destname = dev->name; - if (__dev_get_by_name(net, destname)) { - /* We get here if we can't use the current device name */ - if (!pat) - goto out; - if (!dev_valid_name(pat)) - goto out; - if (strchr(pat, '%')) { - if (__dev_alloc_name(net, pat, buf) < 0) - goto out; - destname = buf; - } else - destname = pat; - if (__dev_get_by_name(net, destname)) - goto out; - } - - /* - * And now a mini version of register_netdevice unregister_netdevice. - */ - - /* If device is running close it first. */ - dev_close(dev); - - /* And unlink it from device chain */ - err = -ENODEV; - unlist_netdevice(dev); - - synchronize_net(); - - /* Shutdown queueing discipline. */ - dev_shutdown(dev); - - /* Notify protocols, that we are about to destroy - this device. They should clean all the things. - */ - call_netdevice_notifiers(NETDEV_UNREGISTER, dev); - - /* - * Flush the unicast and multicast chains - */ - dev_addr_discard(dev); - - netdev_unregister_kobject(dev); - - /* Actually switch the network namespace */ - dev_net_set(dev, net); - - /* Assign the new device name */ - if (destname != dev->name) - strcpy(dev->name, destname); - - /* If there is an ifindex conflict assign a new one */ - if (__dev_get_by_index(net, dev->ifindex)) { - int iflink = (dev->iflink == dev->ifindex); - dev->ifindex = dev_new_index(net); - if (iflink) - dev->iflink = dev->ifindex; - } - - /* Fixup kobjects */ - err = netdev_register_kobject(dev); - WARN_ON(err); - - /* Add the device back in the hashes */ - list_netdevice(dev); - - /* Notify protocols, that a new device appeared. */ - call_netdevice_notifiers(NETDEV_REGISTER, dev); - - synchronize_net(); - err = 0; -out: - return err; -} - -static int dev_cpu_callback(struct notifier_block *nfb, - unsigned long action, - void *ocpu) -{ - struct sk_buff **list_skb; - struct Qdisc **list_net; - struct sk_buff *skb; - unsigned int cpu, oldcpu = (unsigned long)ocpu; - struct softnet_data *sd, *oldsd; - - if (action != CPU_DEAD && action != CPU_DEAD_FROZEN) - return NOTIFY_OK; - - local_irq_disable(); - cpu = smp_processor_id(); - sd = &per_cpu(softnet_data, cpu); - oldsd = &per_cpu(softnet_data, oldcpu); - - /* Find end of our completion_queue. */ - list_skb = &sd->completion_queue; - while (*list_skb) - list_skb = &(*list_skb)->next; - /* Append completion queue from offline CPU. */ - *list_skb = oldsd->completion_queue; - oldsd->completion_queue = NULL; - - /* Find end of our output_queue. */ - list_net = &sd->output_queue; - while (*list_net) - list_net = &(*list_net)->next_sched; - /* Append output queue from offline CPU. */ - *list_net = oldsd->output_queue; - oldsd->output_queue = NULL; - - raise_softirq_irqoff(NET_TX_SOFTIRQ); - local_irq_enable(); - - /* Process offline CPU's input_pkt_queue */ - while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) - netif_rx(skb); - - return NOTIFY_OK; -} - - -/** - * netdev_increment_features - increment feature set by one - * @all: current feature set - * @one: new feature set - * @mask: mask feature set - * - * Computes a new feature set after adding a device with feature set - * @one to the master device with current feature set @all. Will not - * enable anything that is off in @mask. Returns the new feature set. - */ -unsigned long netdev_increment_features(unsigned long all, unsigned long one, - unsigned long mask) -{ - /* If device needs checksumming, downgrade to it. */ - if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM)) - all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM); - else if (mask & NETIF_F_ALL_CSUM) { - /* If one device supports v4/v6 checksumming, set for all. */ - if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) && - !(all & NETIF_F_GEN_CSUM)) { - all &= ~NETIF_F_ALL_CSUM; - all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM); - } - - /* If one device supports hw checksumming, set for all. */ - if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) { - all &= ~NETIF_F_ALL_CSUM; - all |= NETIF_F_HW_CSUM; - } - } - - one |= NETIF_F_ALL_CSUM; - - one |= all & NETIF_F_ONE_FOR_ALL; - all &= one | NETIF_F_LLTX | NETIF_F_GSO; - all |= one & mask & NETIF_F_ONE_FOR_ALL; - - return all; -} -EXPORT_SYMBOL(netdev_increment_features); - -static struct hlist_head *netdev_create_hash(void) -{ - int i; - struct hlist_head *hash; - - hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL); - if (hash != NULL) - for (i = 0; i < NETDEV_HASHENTRIES; i++) - INIT_HLIST_HEAD(&hash[i]); - - return hash; -} - -/* Initialize per network namespace state */ -static int __net_init netdev_init(struct net *net) -{ - INIT_LIST_HEAD(&net->dev_base_head); - - net->dev_name_head = netdev_create_hash(); - if (net->dev_name_head == NULL) - goto err_name; - - net->dev_index_head = netdev_create_hash(); - if (net->dev_index_head == NULL) - goto err_idx; - - return 0; - -err_idx: - kfree(net->dev_name_head); -err_name: - return -ENOMEM; -} - -/** - * netdev_drivername - network driver for the device - * @dev: network device - * @buffer: buffer for resulting name - * @len: size of buffer - * - * Determine network driver for device. - */ -char *netdev_drivername(const struct net_device *dev, char *buffer, int len) -{ - const struct device_driver *driver; - const struct device *parent; - - if (len <= 0 || !buffer) - return buffer; - buffer[0] = 0; - - parent = dev->dev.parent; - - if (!parent) - return buffer; - - driver = parent->driver; - if (driver && driver->name) - strlcpy(buffer, driver->name, len); - return buffer; -} - -static void __net_exit netdev_exit(struct net *net) -{ - kfree(net->dev_name_head); - kfree(net->dev_index_head); -} - -static struct pernet_operations __net_initdata netdev_net_ops = { - .init = netdev_init, - .exit = netdev_exit, -}; - -static void __net_exit default_device_exit(struct net *net) -{ - struct net_device *dev; - /* - * Push all migratable of the network devices back to the - * initial network namespace - */ - rtnl_lock(); -restart: - for_each_netdev(net, dev) { - int err; - char fb_name[IFNAMSIZ]; - - /* Ignore unmoveable devices (i.e. loopback) */ - if (dev->features & NETIF_F_NETNS_LOCAL) - continue; - - /* Delete virtual devices */ - if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) { - dev->rtnl_link_ops->dellink(dev); - goto restart; - } - - /* Push remaing network devices to init_net */ - snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex); - err = dev_change_net_namespace(dev, &init_net, fb_name); - if (err) { - printk(KERN_EMERG "%s: failed to move %s to init_net: %d\n", - __func__, dev->name, err); - BUG(); - } - goto restart; - } - rtnl_unlock(); -} - -static struct pernet_operations __net_initdata default_device_ops = { - .exit = default_device_exit, -}; - -/* - * Initialize the DEV module. At boot time this walks the device list and - * unhooks any devices that fail to initialise (normally hardware not - * present) and leaves us with a valid list of present and active devices. - * - */ - -/* - * This is called single threaded during boot, so no need - * to take the rtnl semaphore. - */ -static int __init net_dev_init(void) -{ - int i, rc = -ENOMEM; - - BUG_ON(!dev_boot_phase); - - if (dev_proc_init()) - goto out; - - if (netdev_kobject_init()) - goto out; - - INIT_LIST_HEAD(&ptype_all); - for (i = 0; i < PTYPE_HASH_SIZE; i++) - INIT_LIST_HEAD(&ptype_base[i]); - - if (register_pernet_subsys(&netdev_net_ops)) - goto out; - - /* - * Initialise the packet receive queues. - */ - - for_each_possible_cpu(i) { - struct softnet_data *queue; - - queue = &per_cpu(softnet_data, i); - skb_queue_head_init(&queue->input_pkt_queue); - queue->completion_queue = NULL; - INIT_LIST_HEAD(&queue->poll_list); - - queue->backlog.poll = process_backlog; - queue->backlog.weight = weight_p; - queue->backlog.gro_list = NULL; - } - - dev_boot_phase = 0; - - /* The loopback device is special if any other network devices - * is present in a network namespace the loopback device must - * be present. Since we now dynamically allocate and free the - * loopback device ensure this invariant is maintained by - * keeping the loopback device as the first device on the - * list of network devices. Ensuring the loopback devices - * is the first device that appears and the last network device - * that disappears. - */ -#ifndef DDE_LINUX - if (register_pernet_device(&loopback_net_ops)) - goto out; -#endif - - if (register_pernet_device(&default_device_ops)) - goto out; - - open_softirq(NET_TX_SOFTIRQ, net_tx_action); - open_softirq(NET_RX_SOFTIRQ, net_rx_action); - - hotcpu_notifier(dev_cpu_callback, 0); -#ifndef DDE_LINUX - dst_init(); -#endif - dev_mcast_init(); - rc = 0; -out: - return rc; -} - -subsys_initcall(net_dev_init); - -EXPORT_SYMBOL(__dev_get_by_index); -EXPORT_SYMBOL(__dev_get_by_name); -EXPORT_SYMBOL(__dev_remove_pack); -EXPORT_SYMBOL(dev_valid_name); -EXPORT_SYMBOL(dev_add_pack); -EXPORT_SYMBOL(dev_alloc_name); -EXPORT_SYMBOL(dev_close); -EXPORT_SYMBOL(dev_get_by_flags); -EXPORT_SYMBOL(dev_get_by_index); -EXPORT_SYMBOL(dev_get_by_name); -EXPORT_SYMBOL(dev_open); -EXPORT_SYMBOL(dev_queue_xmit); -EXPORT_SYMBOL(dev_remove_pack); -EXPORT_SYMBOL(dev_set_allmulti); -EXPORT_SYMBOL(dev_set_promiscuity); -EXPORT_SYMBOL(dev_change_flags); -EXPORT_SYMBOL(dev_set_mtu); -EXPORT_SYMBOL(dev_set_mac_address); -EXPORT_SYMBOL(free_netdev); -EXPORT_SYMBOL(netdev_boot_setup_check); -EXPORT_SYMBOL(netdev_set_master); -EXPORT_SYMBOL(netdev_state_change); -EXPORT_SYMBOL(netif_receive_skb); -EXPORT_SYMBOL(netif_rx); -EXPORT_SYMBOL(register_gifconf); -EXPORT_SYMBOL(register_netdevice); -EXPORT_SYMBOL(register_netdevice_notifier); -EXPORT_SYMBOL(skb_checksum_help); -EXPORT_SYMBOL(synchronize_net); -EXPORT_SYMBOL(unregister_netdevice); -EXPORT_SYMBOL(unregister_netdevice_notifier); -EXPORT_SYMBOL(net_enable_timestamp); -EXPORT_SYMBOL(net_disable_timestamp); -EXPORT_SYMBOL(dev_get_flags); - -#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) -EXPORT_SYMBOL(br_handle_frame_hook); -EXPORT_SYMBOL(br_fdb_get_hook); -EXPORT_SYMBOL(br_fdb_put_hook); -#endif - -#ifdef CONFIG_KMOD -EXPORT_SYMBOL(dev_load); -#endif - -EXPORT_PER_CPU_SYMBOL(softnet_data); diff --git a/libdde_linux26/lib/src/net/core/link_watch.c b/libdde_linux26/lib/src/net/core/link_watch.c deleted file mode 100644 index 1afdb815..00000000 --- a/libdde_linux26/lib/src/net/core/link_watch.c +++ /dev/null @@ -1,238 +0,0 @@ -/* - * Linux network device link state notification - * - * Author: - * Stefan Rompf <sux@loplof.de> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - */ - -#include <linux/module.h> -#include <linux/netdevice.h> -#include <linux/if.h> -#include <net/sock.h> -#include <net/pkt_sched.h> -#include <linux/rtnetlink.h> -#include <linux/jiffies.h> -#include <linux/spinlock.h> -#include <linux/slab.h> -#include <linux/workqueue.h> -#include <linux/bitops.h> -#include <asm/types.h> - - -enum lw_bits { - LW_URGENT = 0, -}; - -static unsigned long linkwatch_flags; -static unsigned long linkwatch_nextevent; - -static void linkwatch_event(struct work_struct *dummy); -static DECLARE_DELAYED_WORK(linkwatch_work, linkwatch_event); - -static struct net_device *lweventlist; -static DEFINE_SPINLOCK(lweventlist_lock); - -static unsigned char default_operstate(const struct net_device *dev) -{ -#ifndef DDE_LINUX - if (!netif_carrier_ok(dev)) - return (dev->ifindex != dev->iflink ? - IF_OPER_LOWERLAYERDOWN : IF_OPER_DOWN); - - if (netif_dormant(dev)) - return IF_OPER_DORMANT; -#endif - - return IF_OPER_UP; -} - - -static void rfc2863_policy(struct net_device *dev) -{ -#ifndef DDE_LINUX - unsigned char operstate = default_operstate(dev); - - if (operstate == dev->operstate) - return; - - write_lock_bh(&dev_base_lock); - - switch(dev->link_mode) { - case IF_LINK_MODE_DORMANT: - if (operstate == IF_OPER_UP) - operstate = IF_OPER_DORMANT; - break; - - case IF_LINK_MODE_DEFAULT: - default: - break; - } - - dev->operstate = operstate; - - write_unlock_bh(&dev_base_lock); -#endif -} - - -static bool linkwatch_urgent_event(struct net_device *dev) -{ - return netif_running(dev) && netif_carrier_ok(dev) && - qdisc_tx_changing(dev); -} - - -static void linkwatch_add_event(struct net_device *dev) -{ - unsigned long flags; - - spin_lock_irqsave(&lweventlist_lock, flags); - dev->link_watch_next = lweventlist; - lweventlist = dev; - spin_unlock_irqrestore(&lweventlist_lock, flags); -} - - -static void linkwatch_schedule_work(int urgent) -{ - unsigned long delay = linkwatch_nextevent - jiffies; - - if (test_bit(LW_URGENT, &linkwatch_flags)) - return; - - /* Minimise down-time: drop delay for up event. */ - if (urgent) { - if (test_and_set_bit(LW_URGENT, &linkwatch_flags)) - return; - delay = 0; - } - - /* If we wrap around we'll delay it by at most HZ. */ - if (delay > HZ) - delay = 0; - - /* - * This is true if we've scheduled it immeditately or if we don't - * need an immediate execution and it's already pending. - */ - if (schedule_delayed_work(&linkwatch_work, delay) == !delay) - return; - - /* Don't bother if there is nothing urgent. */ - if (!test_bit(LW_URGENT, &linkwatch_flags)) - return; - - /* It's already running which is good enough. */ - if (!cancel_delayed_work(&linkwatch_work)) - return; - - /* Otherwise we reschedule it again for immediate exection. */ - schedule_delayed_work(&linkwatch_work, 0); -} - - -static void __linkwatch_run_queue(int urgent_only) -{ -#ifndef DDE_LINUX - struct net_device *next; - - /* - * Limit the number of linkwatch events to one - * per second so that a runaway driver does not - * cause a storm of messages on the netlink - * socket. This limit does not apply to up events - * while the device qdisc is down. - */ - if (!urgent_only) - linkwatch_nextevent = jiffies + HZ; - /* Limit wrap-around effect on delay. */ - else if (time_after(linkwatch_nextevent, jiffies + HZ)) - linkwatch_nextevent = jiffies; - - clear_bit(LW_URGENT, &linkwatch_flags); - - spin_lock_irq(&lweventlist_lock); - next = lweventlist; - lweventlist = NULL; - spin_unlock_irq(&lweventlist_lock); - - while (next) { - struct net_device *dev = next; - - next = dev->link_watch_next; - - if (urgent_only && !linkwatch_urgent_event(dev)) { - linkwatch_add_event(dev); - continue; - } - - /* - * Make sure the above read is complete since it can be - * rewritten as soon as we clear the bit below. - */ - smp_mb__before_clear_bit(); - - /* We are about to handle this device, - * so new events can be accepted - */ - clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state); - - rfc2863_policy(dev); - if (dev->flags & IFF_UP) { - if (netif_carrier_ok(dev)) - dev_activate(dev); - else - dev_deactivate(dev); - - netdev_state_change(dev); - } - - dev_put(dev); - } - - if (lweventlist) - linkwatch_schedule_work(0); -#endif -} - - -/* Must be called with the rtnl semaphore held */ -void linkwatch_run_queue(void) -{ - __linkwatch_run_queue(0); -} - - -static void linkwatch_event(struct work_struct *dummy) -{ -#ifndef DDE_LINUX - rtnl_lock(); - __linkwatch_run_queue(time_after(linkwatch_nextevent, jiffies)); - rtnl_unlock(); -#endif -} - - -void linkwatch_fire_event(struct net_device *dev) -{ -#ifndef DDE_LINUX - bool urgent = linkwatch_urgent_event(dev); - - if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) { - dev_hold(dev); - - linkwatch_add_event(dev); - } else if (!urgent) - return; - - linkwatch_schedule_work(urgent); -#endif -} - -EXPORT_SYMBOL(linkwatch_fire_event); diff --git a/libdde_linux26/lib/src/net/core/net_namespace.c b/libdde_linux26/lib/src/net/core/net_namespace.c deleted file mode 100644 index ab5a0a7f..00000000 --- a/libdde_linux26/lib/src/net/core/net_namespace.c +++ /dev/null @@ -1,511 +0,0 @@ -#include <linux/workqueue.h> -#include <linux/rtnetlink.h> -#include <linux/cache.h> -#include <linux/slab.h> -#include <linux/list.h> -#include <linux/delay.h> -#include <linux/sched.h> -#include <linux/idr.h> -#include <net/net_namespace.h> -#include <net/netns/generic.h> - -/* - * Our network namespace constructor/destructor lists - */ - -static LIST_HEAD(pernet_list); -static struct list_head *first_device = &pernet_list; -static DEFINE_MUTEX(net_mutex); - -LIST_HEAD(net_namespace_list); -EXPORT_SYMBOL_GPL(net_namespace_list); - -struct net init_net; -EXPORT_SYMBOL(init_net); - -#define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ - -/* - * setup_net runs the initializers for the network namespace object. - */ -static __net_init int setup_net(struct net *net) -{ - /* Must be called with net_mutex held */ - struct pernet_operations *ops; - int error = 0; - - atomic_set(&net->count, 1); - -#ifdef NETNS_REFCNT_DEBUG - atomic_set(&net->use_count, 0); -#endif - - list_for_each_entry(ops, &pernet_list, list) { - if (ops->init) { - error = ops->init(net); - if (error < 0) - goto out_undo; - } - } -out: - return error; - -out_undo: - /* Walk through the list backwards calling the exit functions - * for the pernet modules whose init functions did not fail. - */ - list_for_each_entry_continue_reverse(ops, &pernet_list, list) { - if (ops->exit) - ops->exit(net); - } - -#ifndef DDE_LINUX - rcu_barrier(); -#endif - goto out; -} - -static struct net_generic *net_alloc_generic(void) -{ - struct net_generic *ng; - size_t generic_size = sizeof(struct net_generic) + - INITIAL_NET_GEN_PTRS * sizeof(void *); - - ng = kzalloc(generic_size, GFP_KERNEL); - if (ng) - ng->len = INITIAL_NET_GEN_PTRS; - - return ng; -} - -#ifdef CONFIG_NET_NS -static struct kmem_cache *net_cachep; -static struct workqueue_struct *netns_wq; - -static struct net *net_alloc(void) -{ - struct net *net = NULL; - struct net_generic *ng; - - ng = net_alloc_generic(); - if (!ng) - goto out; - - net = kmem_cache_zalloc(net_cachep, GFP_KERNEL); - if (!net) - goto out_free; - - rcu_assign_pointer(net->gen, ng); -out: - return net; - -out_free: - kfree(ng); - goto out; -} - -static void net_free(struct net *net) -{ -#ifdef NETNS_REFCNT_DEBUG - if (unlikely(atomic_read(&net->use_count) != 0)) { - printk(KERN_EMERG "network namespace not free! Usage: %d\n", - atomic_read(&net->use_count)); - return; - } -#endif - kfree(net->gen); - kmem_cache_free(net_cachep, net); -} - -struct net *copy_net_ns(unsigned long flags, struct net *old_net) -{ - struct net *new_net = NULL; - int err; - - get_net(old_net); - - if (!(flags & CLONE_NEWNET)) - return old_net; - - err = -ENOMEM; - new_net = net_alloc(); - if (!new_net) - goto out_err; - - mutex_lock(&net_mutex); - err = setup_net(new_net); - if (!err) { - rtnl_lock(); - list_add_tail(&new_net->list, &net_namespace_list); - rtnl_unlock(); - } - mutex_unlock(&net_mutex); - - if (err) - goto out_free; -out: - put_net(old_net); - return new_net; - -out_free: - net_free(new_net); -out_err: - new_net = ERR_PTR(err); - goto out; -} - -static void cleanup_net(struct work_struct *work) -{ - struct pernet_operations *ops; - struct net *net; - - net = container_of(work, struct net, work); - - mutex_lock(&net_mutex); - - /* Don't let anyone else find us. */ - rtnl_lock(); - list_del(&net->list); - rtnl_unlock(); - - /* Run all of the network namespace exit methods */ - list_for_each_entry_reverse(ops, &pernet_list, list) { - if (ops->exit) - ops->exit(net); - } - - mutex_unlock(&net_mutex); - - /* Ensure there are no outstanding rcu callbacks using this - * network namespace. - */ - rcu_barrier(); - - /* Finally it is safe to free my network namespace structure */ - net_free(net); -} - -void __put_net(struct net *net) -{ - /* Cleanup the network namespace in process context */ - INIT_WORK(&net->work, cleanup_net); - queue_work(netns_wq, &net->work); -} -EXPORT_SYMBOL_GPL(__put_net); - -#else -struct net *copy_net_ns(unsigned long flags, struct net *old_net) -{ - if (flags & CLONE_NEWNET) - return ERR_PTR(-EINVAL); - return old_net; -} -#endif - -static int __init net_ns_init(void) -{ - struct net_generic *ng; - int err; - - printk(KERN_INFO "net_namespace: %zd bytes\n", sizeof(struct net)); -#ifdef CONFIG_NET_NS - net_cachep = kmem_cache_create("net_namespace", sizeof(struct net), - SMP_CACHE_BYTES, - SLAB_PANIC, NULL); - - /* Create workqueue for cleanup */ - netns_wq = create_singlethread_workqueue("netns"); - if (!netns_wq) - panic("Could not create netns workq"); -#endif - - ng = net_alloc_generic(); - if (!ng) - panic("Could not allocate generic netns"); - - rcu_assign_pointer(init_net.gen, ng); - - mutex_lock(&net_mutex); - err = setup_net(&init_net); - - rtnl_lock(); - list_add_tail(&init_net.list, &net_namespace_list); - rtnl_unlock(); - - mutex_unlock(&net_mutex); - if (err) - panic("Could not setup the initial network namespace"); - - return 0; -} - -pure_initcall(net_ns_init); - -#ifdef CONFIG_NET_NS -static int register_pernet_operations(struct list_head *list, - struct pernet_operations *ops) -{ - struct net *net, *undo_net; - int error; - - list_add_tail(&ops->list, list); - if (ops->init) { - for_each_net(net) { - error = ops->init(net); - if (error) - goto out_undo; - } - } - return 0; - -out_undo: - /* If I have an error cleanup all namespaces I initialized */ - list_del(&ops->list); - if (ops->exit) { - for_each_net(undo_net) { - if (undo_net == net) - goto undone; - ops->exit(undo_net); - } - } -undone: - return error; -} - -static void unregister_pernet_operations(struct pernet_operations *ops) -{ - struct net *net; - - list_del(&ops->list); - if (ops->exit) - for_each_net(net) - ops->exit(net); -} - -#else - -static int register_pernet_operations(struct list_head *list, - struct pernet_operations *ops) -{ - if (ops->init == NULL) - return 0; - return ops->init(&init_net); -} - -static void unregister_pernet_operations(struct pernet_operations *ops) -{ - if (ops->exit) - ops->exit(&init_net); -} -#endif - -static DEFINE_IDA(net_generic_ids); - -/** - * register_pernet_subsys - register a network namespace subsystem - * @ops: pernet operations structure for the subsystem - * - * Register a subsystem which has init and exit functions - * that are called when network namespaces are created and - * destroyed respectively. - * - * When registered all network namespace init functions are - * called for every existing network namespace. Allowing kernel - * modules to have a race free view of the set of network namespaces. - * - * When a new network namespace is created all of the init - * methods are called in the order in which they were registered. - * - * When a network namespace is destroyed all of the exit methods - * are called in the reverse of the order with which they were - * registered. - */ -int register_pernet_subsys(struct pernet_operations *ops) -{ - int error; - mutex_lock(&net_mutex); - error = register_pernet_operations(first_device, ops); - mutex_unlock(&net_mutex); - return error; -} -EXPORT_SYMBOL_GPL(register_pernet_subsys); - -/** - * unregister_pernet_subsys - unregister a network namespace subsystem - * @ops: pernet operations structure to manipulate - * - * Remove the pernet operations structure from the list to be - * used when network namespaces are created or destroyed. In - * addition run the exit method for all existing network - * namespaces. - */ -void unregister_pernet_subsys(struct pernet_operations *module) -{ - mutex_lock(&net_mutex); - unregister_pernet_operations(module); - mutex_unlock(&net_mutex); -} -EXPORT_SYMBOL_GPL(unregister_pernet_subsys); - -int register_pernet_gen_subsys(int *id, struct pernet_operations *ops) -{ - int rv; - - mutex_lock(&net_mutex); -again: - rv = ida_get_new_above(&net_generic_ids, 1, id); - if (rv < 0) { - if (rv == -EAGAIN) { - ida_pre_get(&net_generic_ids, GFP_KERNEL); - goto again; - } - goto out; - } - rv = register_pernet_operations(first_device, ops); - if (rv < 0) - ida_remove(&net_generic_ids, *id); -out: - mutex_unlock(&net_mutex); - return rv; -} -EXPORT_SYMBOL_GPL(register_pernet_gen_subsys); - -void unregister_pernet_gen_subsys(int id, struct pernet_operations *ops) -{ - mutex_lock(&net_mutex); - unregister_pernet_operations(ops); - ida_remove(&net_generic_ids, id); - mutex_unlock(&net_mutex); -} -EXPORT_SYMBOL_GPL(unregister_pernet_gen_subsys); - -/** - * register_pernet_device - register a network namespace device - * @ops: pernet operations structure for the subsystem - * - * Register a device which has init and exit functions - * that are called when network namespaces are created and - * destroyed respectively. - * - * When registered all network namespace init functions are - * called for every existing network namespace. Allowing kernel - * modules to have a race free view of the set of network namespaces. - * - * When a new network namespace is created all of the init - * methods are called in the order in which they were registered. - * - * When a network namespace is destroyed all of the exit methods - * are called in the reverse of the order with which they were - * registered. - */ -int register_pernet_device(struct pernet_operations *ops) -{ - int error; - mutex_lock(&net_mutex); - error = register_pernet_operations(&pernet_list, ops); - if (!error && (first_device == &pernet_list)) - first_device = &ops->list; - mutex_unlock(&net_mutex); - return error; -} -EXPORT_SYMBOL_GPL(register_pernet_device); - -int register_pernet_gen_device(int *id, struct pernet_operations *ops) -{ - int error; - mutex_lock(&net_mutex); -again: - error = ida_get_new_above(&net_generic_ids, 1, id); - if (error) { - if (error == -EAGAIN) { - ida_pre_get(&net_generic_ids, GFP_KERNEL); - goto again; - } - goto out; - } - error = register_pernet_operations(&pernet_list, ops); - if (error) - ida_remove(&net_generic_ids, *id); - else if (first_device == &pernet_list) - first_device = &ops->list; -out: - mutex_unlock(&net_mutex); - return error; -} -EXPORT_SYMBOL_GPL(register_pernet_gen_device); - -/** - * unregister_pernet_device - unregister a network namespace netdevice - * @ops: pernet operations structure to manipulate - * - * Remove the pernet operations structure from the list to be - * used when network namespaces are created or destroyed. In - * addition run the exit method for all existing network - * namespaces. - */ -void unregister_pernet_device(struct pernet_operations *ops) -{ - mutex_lock(&net_mutex); - if (&ops->list == first_device) - first_device = first_device->next; - unregister_pernet_operations(ops); - mutex_unlock(&net_mutex); -} -EXPORT_SYMBOL_GPL(unregister_pernet_device); - -void unregister_pernet_gen_device(int id, struct pernet_operations *ops) -{ - mutex_lock(&net_mutex); - if (&ops->list == first_device) - first_device = first_device->next; - unregister_pernet_operations(ops); - ida_remove(&net_generic_ids, id); - mutex_unlock(&net_mutex); -} -EXPORT_SYMBOL_GPL(unregister_pernet_gen_device); - -static void net_generic_release(struct rcu_head *rcu) -{ - struct net_generic *ng; - - ng = container_of(rcu, struct net_generic, rcu); - kfree(ng); -} - -int net_assign_generic(struct net *net, int id, void *data) -{ - struct net_generic *ng, *old_ng; - - BUG_ON(!mutex_is_locked(&net_mutex)); - BUG_ON(id == 0); - - ng = old_ng = net->gen; - if (old_ng->len >= id) - goto assign; - - ng = kzalloc(sizeof(struct net_generic) + - id * sizeof(void *), GFP_KERNEL); - if (ng == NULL) - return -ENOMEM; - - /* - * Some synchronisation notes: - * - * The net_generic explores the net->gen array inside rcu - * read section. Besides once set the net->gen->ptr[x] - * pointer never changes (see rules in netns/generic.h). - * - * That said, we simply duplicate this array and schedule - * the old copy for kfree after a grace period. - */ - - ng->len = id; - memcpy(&ng->ptr, &old_ng->ptr, old_ng->len); - - rcu_assign_pointer(net->gen, ng); - call_rcu(&old_ng->rcu, net_generic_release); -assign: - ng->ptr[id - 1] = data; - return 0; -} -EXPORT_SYMBOL_GPL(net_assign_generic); diff --git a/libdde_linux26/lib/src/net/core/rtnetlink.c b/libdde_linux26/lib/src/net/core/rtnetlink.c deleted file mode 100644 index 8408e3da..00000000 --- a/libdde_linux26/lib/src/net/core/rtnetlink.c +++ /dev/null @@ -1,1436 +0,0 @@ -/* - * INET An implementation of the TCP/IP protocol suite for the LINUX - * operating system. INET is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * Routing netlink socket interface: protocol independent part. - * - * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * Fixes: - * Vitaly E. Lavrov RTA_OK arithmetics was wrong. - */ - -#include <linux/errno.h> -#include <linux/module.h> -#include <linux/types.h> -#include <linux/socket.h> -#include <linux/kernel.h> -#include <linux/timer.h> -#include <linux/string.h> -#include <linux/sockios.h> -#include <linux/net.h> -#include <linux/fcntl.h> -#include <linux/mm.h> -#include <linux/slab.h> -#include <linux/interrupt.h> -#include <linux/capability.h> -#include <linux/skbuff.h> -#include <linux/init.h> -#include <linux/security.h> -#include <linux/mutex.h> -#include <linux/if_addr.h> -#include <linux/nsproxy.h> - -#include <asm/uaccess.h> -#include <asm/system.h> -#include <asm/string.h> - -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <net/ip.h> -#include <net/protocol.h> -#include <net/arp.h> -#include <net/route.h> -#include <net/udp.h> -#include <net/sock.h> -#include <net/pkt_sched.h> -#include <net/fib_rules.h> -#include <net/rtnetlink.h> - -struct rtnl_link -{ - rtnl_doit_func doit; - rtnl_dumpit_func dumpit; -}; - -static DEFINE_MUTEX(rtnl_mutex); - -void rtnl_lock(void) -{ - mutex_lock(&rtnl_mutex); -} - -void __rtnl_unlock(void) -{ - mutex_unlock(&rtnl_mutex); -} - -void rtnl_unlock(void) -{ - /* This fellow will unlock it for us. */ - netdev_run_todo(); -} - -int rtnl_trylock(void) -{ - return mutex_trylock(&rtnl_mutex); -} - -int rtnl_is_locked(void) -{ - return mutex_is_locked(&rtnl_mutex); -} - -static struct rtnl_link *rtnl_msg_handlers[NPROTO]; - -static inline int rtm_msgindex(int msgtype) -{ - int msgindex = msgtype - RTM_BASE; - - /* - * msgindex < 0 implies someone tried to register a netlink - * control code. msgindex >= RTM_NR_MSGTYPES may indicate that - * the message type has not been added to linux/rtnetlink.h - */ - BUG_ON(msgindex < 0 || msgindex >= RTM_NR_MSGTYPES); - - return msgindex; -} - -static rtnl_doit_func rtnl_get_doit(int protocol, int msgindex) -{ - struct rtnl_link *tab; - - tab = rtnl_msg_handlers[protocol]; - if (tab == NULL || tab[msgindex].doit == NULL) - tab = rtnl_msg_handlers[PF_UNSPEC]; - - return tab ? tab[msgindex].doit : NULL; -} - -static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex) -{ - struct rtnl_link *tab; - - tab = rtnl_msg_handlers[protocol]; - if (tab == NULL || tab[msgindex].dumpit == NULL) - tab = rtnl_msg_handlers[PF_UNSPEC]; - - return tab ? tab[msgindex].dumpit : NULL; -} - -/** - * __rtnl_register - Register a rtnetlink message type - * @protocol: Protocol family or PF_UNSPEC - * @msgtype: rtnetlink message type - * @doit: Function pointer called for each request message - * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message - * - * Registers the specified function pointers (at least one of them has - * to be non-NULL) to be called whenever a request message for the - * specified protocol family and message type is received. - * - * The special protocol family PF_UNSPEC may be used to define fallback - * function pointers for the case when no entry for the specific protocol - * family exists. - * - * Returns 0 on success or a negative error code. - */ -int __rtnl_register(int protocol, int msgtype, - rtnl_doit_func doit, rtnl_dumpit_func dumpit) -{ - struct rtnl_link *tab; - int msgindex; - - BUG_ON(protocol < 0 || protocol >= NPROTO); - msgindex = rtm_msgindex(msgtype); - - tab = rtnl_msg_handlers[protocol]; - if (tab == NULL) { - tab = kcalloc(RTM_NR_MSGTYPES, sizeof(*tab), GFP_KERNEL); - if (tab == NULL) - return -ENOBUFS; - - rtnl_msg_handlers[protocol] = tab; - } - - if (doit) - tab[msgindex].doit = doit; - - if (dumpit) - tab[msgindex].dumpit = dumpit; - - return 0; -} - -EXPORT_SYMBOL_GPL(__rtnl_register); - -/** - * rtnl_register - Register a rtnetlink message type - * - * Identical to __rtnl_register() but panics on failure. This is useful - * as failure of this function is very unlikely, it can only happen due - * to lack of memory when allocating the chain to store all message - * handlers for a protocol. Meant for use in init functions where lack - * of memory implies no sense in continueing. - */ -void rtnl_register(int protocol, int msgtype, - rtnl_doit_func doit, rtnl_dumpit_func dumpit) -{ - if (__rtnl_register(protocol, msgtype, doit, dumpit) < 0) - panic("Unable to register rtnetlink message handler, " - "protocol = %d, message type = %d\n", - protocol, msgtype); -} - -EXPORT_SYMBOL_GPL(rtnl_register); - -/** - * rtnl_unregister - Unregister a rtnetlink message type - * @protocol: Protocol family or PF_UNSPEC - * @msgtype: rtnetlink message type - * - * Returns 0 on success or a negative error code. - */ -int rtnl_unregister(int protocol, int msgtype) -{ - int msgindex; - - BUG_ON(protocol < 0 || protocol >= NPROTO); - msgindex = rtm_msgindex(msgtype); - - if (rtnl_msg_handlers[protocol] == NULL) - return -ENOENT; - - rtnl_msg_handlers[protocol][msgindex].doit = NULL; - rtnl_msg_handlers[protocol][msgindex].dumpit = NULL; - - return 0; -} - -EXPORT_SYMBOL_GPL(rtnl_unregister); - -/** - * rtnl_unregister_all - Unregister all rtnetlink message type of a protocol - * @protocol : Protocol family or PF_UNSPEC - * - * Identical to calling rtnl_unregster() for all registered message types - * of a certain protocol family. - */ -void rtnl_unregister_all(int protocol) -{ - BUG_ON(protocol < 0 || protocol >= NPROTO); - - kfree(rtnl_msg_handlers[protocol]); - rtnl_msg_handlers[protocol] = NULL; -} - -EXPORT_SYMBOL_GPL(rtnl_unregister_all); - -static LIST_HEAD(link_ops); - -/** - * __rtnl_link_register - Register rtnl_link_ops with rtnetlink. - * @ops: struct rtnl_link_ops * to register - * - * The caller must hold the rtnl_mutex. This function should be used - * by drivers that create devices during module initialization. It - * must be called before registering the devices. - * - * Returns 0 on success or a negative error code. - */ -int __rtnl_link_register(struct rtnl_link_ops *ops) -{ - if (!ops->dellink) - ops->dellink = unregister_netdevice; - - list_add_tail(&ops->list, &link_ops); - return 0; -} - -EXPORT_SYMBOL_GPL(__rtnl_link_register); - -/** - * rtnl_link_register - Register rtnl_link_ops with rtnetlink. - * @ops: struct rtnl_link_ops * to register - * - * Returns 0 on success or a negative error code. - */ -int rtnl_link_register(struct rtnl_link_ops *ops) -{ - int err; - - rtnl_lock(); - err = __rtnl_link_register(ops); - rtnl_unlock(); - return err; -} - -EXPORT_SYMBOL_GPL(rtnl_link_register); - -static void __rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops) -{ - struct net_device *dev; -restart: - for_each_netdev(net, dev) { - if (dev->rtnl_link_ops == ops) { - ops->dellink(dev); - goto restart; - } - } -} - -void rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops) -{ - rtnl_lock(); - __rtnl_kill_links(net, ops); - rtnl_unlock(); -} -EXPORT_SYMBOL_GPL(rtnl_kill_links); - -/** - * __rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink. - * @ops: struct rtnl_link_ops * to unregister - * - * The caller must hold the rtnl_mutex. - */ -void __rtnl_link_unregister(struct rtnl_link_ops *ops) -{ - struct net *net; - - for_each_net(net) { - __rtnl_kill_links(net, ops); - } - list_del(&ops->list); -} - -EXPORT_SYMBOL_GPL(__rtnl_link_unregister); - -/** - * rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink. - * @ops: struct rtnl_link_ops * to unregister - */ -void rtnl_link_unregister(struct rtnl_link_ops *ops) -{ - rtnl_lock(); - __rtnl_link_unregister(ops); - rtnl_unlock(); -} - -EXPORT_SYMBOL_GPL(rtnl_link_unregister); - -static const struct rtnl_link_ops *rtnl_link_ops_get(const char *kind) -{ - const struct rtnl_link_ops *ops; - - list_for_each_entry(ops, &link_ops, list) { - if (!strcmp(ops->kind, kind)) - return ops; - } - return NULL; -} - -static size_t rtnl_link_get_size(const struct net_device *dev) -{ - const struct rtnl_link_ops *ops = dev->rtnl_link_ops; - size_t size; - - if (!ops) - return 0; - - size = nlmsg_total_size(sizeof(struct nlattr)) + /* IFLA_LINKINFO */ - nlmsg_total_size(strlen(ops->kind) + 1); /* IFLA_INFO_KIND */ - - if (ops->get_size) - /* IFLA_INFO_DATA + nested data */ - size += nlmsg_total_size(sizeof(struct nlattr)) + - ops->get_size(dev); - - if (ops->get_xstats_size) - size += ops->get_xstats_size(dev); /* IFLA_INFO_XSTATS */ - - return size; -} - -static int rtnl_link_fill(struct sk_buff *skb, const struct net_device *dev) -{ - const struct rtnl_link_ops *ops = dev->rtnl_link_ops; - struct nlattr *linkinfo, *data; - int err = -EMSGSIZE; - - linkinfo = nla_nest_start(skb, IFLA_LINKINFO); - if (linkinfo == NULL) - goto out; - - if (nla_put_string(skb, IFLA_INFO_KIND, ops->kind) < 0) - goto err_cancel_link; - if (ops->fill_xstats) { - err = ops->fill_xstats(skb, dev); - if (err < 0) - goto err_cancel_link; - } - if (ops->fill_info) { - data = nla_nest_start(skb, IFLA_INFO_DATA); - if (data == NULL) - goto err_cancel_link; - err = ops->fill_info(skb, dev); - if (err < 0) - goto err_cancel_data; - nla_nest_end(skb, data); - } - - nla_nest_end(skb, linkinfo); - return 0; - -err_cancel_data: - nla_nest_cancel(skb, data); -err_cancel_link: - nla_nest_cancel(skb, linkinfo); -out: - return err; -} - -static const int rtm_min[RTM_NR_FAMILIES] = -{ - [RTM_FAM(RTM_NEWLINK)] = NLMSG_LENGTH(sizeof(struct ifinfomsg)), - [RTM_FAM(RTM_NEWADDR)] = NLMSG_LENGTH(sizeof(struct ifaddrmsg)), - [RTM_FAM(RTM_NEWROUTE)] = NLMSG_LENGTH(sizeof(struct rtmsg)), - [RTM_FAM(RTM_NEWRULE)] = NLMSG_LENGTH(sizeof(struct fib_rule_hdr)), - [RTM_FAM(RTM_NEWQDISC)] = NLMSG_LENGTH(sizeof(struct tcmsg)), - [RTM_FAM(RTM_NEWTCLASS)] = NLMSG_LENGTH(sizeof(struct tcmsg)), - [RTM_FAM(RTM_NEWTFILTER)] = NLMSG_LENGTH(sizeof(struct tcmsg)), - [RTM_FAM(RTM_NEWACTION)] = NLMSG_LENGTH(sizeof(struct tcamsg)), - [RTM_FAM(RTM_GETMULTICAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), - [RTM_FAM(RTM_GETANYCAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), -}; - -static const int rta_max[RTM_NR_FAMILIES] = -{ - [RTM_FAM(RTM_NEWLINK)] = IFLA_MAX, - [RTM_FAM(RTM_NEWADDR)] = IFA_MAX, - [RTM_FAM(RTM_NEWROUTE)] = RTA_MAX, - [RTM_FAM(RTM_NEWRULE)] = FRA_MAX, - [RTM_FAM(RTM_NEWQDISC)] = TCA_MAX, - [RTM_FAM(RTM_NEWTCLASS)] = TCA_MAX, - [RTM_FAM(RTM_NEWTFILTER)] = TCA_MAX, - [RTM_FAM(RTM_NEWACTION)] = TCAA_MAX, -}; - -#ifndef DDE_LINUX -void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data) -{ - struct rtattr *rta; - int size = RTA_LENGTH(attrlen); - - rta = (struct rtattr*)skb_put(skb, RTA_ALIGN(size)); - rta->rta_type = attrtype; - rta->rta_len = size; - memcpy(RTA_DATA(rta), data, attrlen); - memset(RTA_DATA(rta) + attrlen, 0, RTA_ALIGN(size) - size); -} - -int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned group, int echo) -{ - struct sock *rtnl = net->rtnl; - int err = 0; - - NETLINK_CB(skb).dst_group = group; - if (echo) - atomic_inc(&skb->users); - netlink_broadcast(rtnl, skb, pid, group, GFP_KERNEL); - if (echo) - err = netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT); - return err; -} - -int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid) -{ - struct sock *rtnl = net->rtnl; - - return nlmsg_unicast(rtnl, skb, pid); -} - -int rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group, - struct nlmsghdr *nlh, gfp_t flags) -{ - struct sock *rtnl = net->rtnl; - int report = 0; - - if (nlh) - report = nlmsg_report(nlh); - - return nlmsg_notify(rtnl, skb, pid, group, report, flags); -} - -void rtnl_set_sk_err(struct net *net, u32 group, int error) -{ - struct sock *rtnl = net->rtnl; - - netlink_set_err(rtnl, 0, group, error); -} - -int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics) -{ - struct nlattr *mx; - int i, valid = 0; - - mx = nla_nest_start(skb, RTA_METRICS); - if (mx == NULL) - return -ENOBUFS; - - for (i = 0; i < RTAX_MAX; i++) { - if (metrics[i]) { - valid++; - NLA_PUT_U32(skb, i+1, metrics[i]); - } - } - - if (!valid) { - nla_nest_cancel(skb, mx); - return 0; - } - - return nla_nest_end(skb, mx); - -nla_put_failure: - nla_nest_cancel(skb, mx); - return -EMSGSIZE; -} - -int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, - u32 ts, u32 tsage, long expires, u32 error) -{ - struct rta_cacheinfo ci = { - .rta_lastuse = jiffies_to_clock_t(jiffies - dst->lastuse), - .rta_used = dst->__use, - .rta_clntref = atomic_read(&(dst->__refcnt)), - .rta_error = error, - .rta_id = id, - .rta_ts = ts, - .rta_tsage = tsage, - }; - - if (expires) - ci.rta_expires = jiffies_to_clock_t(expires); - - return nla_put(skb, RTA_CACHEINFO, sizeof(ci), &ci); -} - -EXPORT_SYMBOL_GPL(rtnl_put_cacheinfo); - -static void set_operstate(struct net_device *dev, unsigned char transition) -{ - unsigned char operstate = dev->operstate; - - switch(transition) { - case IF_OPER_UP: - if ((operstate == IF_OPER_DORMANT || - operstate == IF_OPER_UNKNOWN) && - !netif_dormant(dev)) - operstate = IF_OPER_UP; - break; - - case IF_OPER_DORMANT: - if (operstate == IF_OPER_UP || - operstate == IF_OPER_UNKNOWN) - operstate = IF_OPER_DORMANT; - break; - } - - if (dev->operstate != operstate) { - write_lock_bh(&dev_base_lock); - dev->operstate = operstate; - write_unlock_bh(&dev_base_lock); - netdev_state_change(dev); - } -} - -static void copy_rtnl_link_stats(struct rtnl_link_stats *a, - const struct net_device_stats *b) -{ - a->rx_packets = b->rx_packets; - a->tx_packets = b->tx_packets; - a->rx_bytes = b->rx_bytes; - a->tx_bytes = b->tx_bytes; - a->rx_errors = b->rx_errors; - a->tx_errors = b->tx_errors; - a->rx_dropped = b->rx_dropped; - a->tx_dropped = b->tx_dropped; - - a->multicast = b->multicast; - a->collisions = b->collisions; - - a->rx_length_errors = b->rx_length_errors; - a->rx_over_errors = b->rx_over_errors; - a->rx_crc_errors = b->rx_crc_errors; - a->rx_frame_errors = b->rx_frame_errors; - a->rx_fifo_errors = b->rx_fifo_errors; - a->rx_missed_errors = b->rx_missed_errors; - - a->tx_aborted_errors = b->tx_aborted_errors; - a->tx_carrier_errors = b->tx_carrier_errors; - a->tx_fifo_errors = b->tx_fifo_errors; - a->tx_heartbeat_errors = b->tx_heartbeat_errors; - a->tx_window_errors = b->tx_window_errors; - - a->rx_compressed = b->rx_compressed; - a->tx_compressed = b->tx_compressed; -}; - -static inline size_t if_nlmsg_size(const struct net_device *dev) -{ - return NLMSG_ALIGN(sizeof(struct ifinfomsg)) - + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ - + nla_total_size(IFALIASZ) /* IFLA_IFALIAS */ - + nla_total_size(IFNAMSIZ) /* IFLA_QDISC */ - + nla_total_size(sizeof(struct rtnl_link_ifmap)) - + nla_total_size(sizeof(struct rtnl_link_stats)) - + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */ - + nla_total_size(MAX_ADDR_LEN) /* IFLA_BROADCAST */ - + nla_total_size(4) /* IFLA_TXQLEN */ - + nla_total_size(4) /* IFLA_WEIGHT */ - + nla_total_size(4) /* IFLA_MTU */ - + nla_total_size(4) /* IFLA_LINK */ - + nla_total_size(4) /* IFLA_MASTER */ - + nla_total_size(1) /* IFLA_OPERSTATE */ - + nla_total_size(1) /* IFLA_LINKMODE */ - + rtnl_link_get_size(dev); /* IFLA_LINKINFO */ -} - -static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, - int type, u32 pid, u32 seq, u32 change, - unsigned int flags) -{ - struct netdev_queue *txq; - struct ifinfomsg *ifm; - struct nlmsghdr *nlh; - const struct net_device_stats *stats; - struct nlattr *attr; - - nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags); - if (nlh == NULL) - return -EMSGSIZE; - - ifm = nlmsg_data(nlh); - ifm->ifi_family = AF_UNSPEC; - ifm->__ifi_pad = 0; - ifm->ifi_type = dev->type; - ifm->ifi_index = dev->ifindex; - ifm->ifi_flags = dev_get_flags(dev); - ifm->ifi_change = change; - - NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name); - NLA_PUT_U32(skb, IFLA_TXQLEN, dev->tx_queue_len); - NLA_PUT_U8(skb, IFLA_OPERSTATE, - netif_running(dev) ? dev->operstate : IF_OPER_DOWN); - NLA_PUT_U8(skb, IFLA_LINKMODE, dev->link_mode); - NLA_PUT_U32(skb, IFLA_MTU, dev->mtu); - - if (dev->ifindex != dev->iflink) - NLA_PUT_U32(skb, IFLA_LINK, dev->iflink); - - if (dev->master) - NLA_PUT_U32(skb, IFLA_MASTER, dev->master->ifindex); - - txq = netdev_get_tx_queue(dev, 0); - if (txq->qdisc_sleeping) - NLA_PUT_STRING(skb, IFLA_QDISC, txq->qdisc_sleeping->ops->id); - - if (dev->ifalias) - NLA_PUT_STRING(skb, IFLA_IFALIAS, dev->ifalias); - - if (1) { - struct rtnl_link_ifmap map = { - .mem_start = dev->mem_start, - .mem_end = dev->mem_end, - .base_addr = dev->base_addr, - .irq = dev->irq, - .dma = dev->dma, - .port = dev->if_port, - }; - NLA_PUT(skb, IFLA_MAP, sizeof(map), &map); - } - - if (dev->addr_len) { - NLA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr); - NLA_PUT(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast); - } - - attr = nla_reserve(skb, IFLA_STATS, - sizeof(struct rtnl_link_stats)); - if (attr == NULL) - goto nla_put_failure; - - stats = dev_get_stats(dev); - copy_rtnl_link_stats(nla_data(attr), stats); - - if (dev->rtnl_link_ops) { - if (rtnl_link_fill(skb, dev) < 0) - goto nla_put_failure; - } - - return nlmsg_end(skb, nlh); - -nla_put_failure: - nlmsg_cancel(skb, nlh); - return -EMSGSIZE; -} - -static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) -{ - struct net *net = sock_net(skb->sk); - int idx; - int s_idx = cb->args[0]; - struct net_device *dev; - - idx = 0; - for_each_netdev(net, dev) { - if (idx < s_idx) - goto cont; - if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, - NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, 0, NLM_F_MULTI) <= 0) - break; -cont: - idx++; - } - cb->args[0] = idx; - - return skb->len; -} - -const struct nla_policy ifla_policy[IFLA_MAX+1] = { - [IFLA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ-1 }, - [IFLA_ADDRESS] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, - [IFLA_BROADCAST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, - [IFLA_MAP] = { .len = sizeof(struct rtnl_link_ifmap) }, - [IFLA_MTU] = { .type = NLA_U32 }, - [IFLA_LINK] = { .type = NLA_U32 }, - [IFLA_TXQLEN] = { .type = NLA_U32 }, - [IFLA_WEIGHT] = { .type = NLA_U32 }, - [IFLA_OPERSTATE] = { .type = NLA_U8 }, - [IFLA_LINKMODE] = { .type = NLA_U8 }, - [IFLA_LINKINFO] = { .type = NLA_NESTED }, - [IFLA_NET_NS_PID] = { .type = NLA_U32 }, - [IFLA_IFALIAS] = { .type = NLA_STRING, .len = IFALIASZ-1 }, -}; - -static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { - [IFLA_INFO_KIND] = { .type = NLA_STRING }, - [IFLA_INFO_DATA] = { .type = NLA_NESTED }, -}; - -static struct net *get_net_ns_by_pid(pid_t pid) -{ - struct task_struct *tsk; - struct net *net; - - /* Lookup the network namespace */ - net = ERR_PTR(-ESRCH); - rcu_read_lock(); - tsk = find_task_by_vpid(pid); - if (tsk) { - struct nsproxy *nsproxy; - nsproxy = task_nsproxy(tsk); - if (nsproxy) - net = get_net(nsproxy->net_ns); - } - rcu_read_unlock(); - return net; -} - -static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) -{ - if (dev) { - if (tb[IFLA_ADDRESS] && - nla_len(tb[IFLA_ADDRESS]) < dev->addr_len) - return -EINVAL; - - if (tb[IFLA_BROADCAST] && - nla_len(tb[IFLA_BROADCAST]) < dev->addr_len) - return -EINVAL; - } - - return 0; -} - -static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, - struct nlattr **tb, char *ifname, int modified) -{ - const struct net_device_ops *ops = dev->netdev_ops; - int send_addr_notify = 0; - int err; - - if (tb[IFLA_NET_NS_PID]) { - struct net *net; - net = get_net_ns_by_pid(nla_get_u32(tb[IFLA_NET_NS_PID])); - if (IS_ERR(net)) { - err = PTR_ERR(net); - goto errout; - } - err = dev_change_net_namespace(dev, net, ifname); - put_net(net); - if (err) - goto errout; - modified = 1; - } - - if (tb[IFLA_MAP]) { - struct rtnl_link_ifmap *u_map; - struct ifmap k_map; - - if (!ops->ndo_set_config) { - err = -EOPNOTSUPP; - goto errout; - } - - if (!netif_device_present(dev)) { - err = -ENODEV; - goto errout; - } - - u_map = nla_data(tb[IFLA_MAP]); - k_map.mem_start = (unsigned long) u_map->mem_start; - k_map.mem_end = (unsigned long) u_map->mem_end; - k_map.base_addr = (unsigned short) u_map->base_addr; - k_map.irq = (unsigned char) u_map->irq; - k_map.dma = (unsigned char) u_map->dma; - k_map.port = (unsigned char) u_map->port; - - err = ops->ndo_set_config(dev, &k_map); - if (err < 0) - goto errout; - - modified = 1; - } - - if (tb[IFLA_ADDRESS]) { - struct sockaddr *sa; - int len; - - if (!ops->ndo_set_mac_address) { - err = -EOPNOTSUPP; - goto errout; - } - - if (!netif_device_present(dev)) { - err = -ENODEV; - goto errout; - } - - len = sizeof(sa_family_t) + dev->addr_len; - sa = kmalloc(len, GFP_KERNEL); - if (!sa) { - err = -ENOMEM; - goto errout; - } - sa->sa_family = dev->type; - memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]), - dev->addr_len); - err = ops->ndo_set_mac_address(dev, sa); - kfree(sa); - if (err) - goto errout; - send_addr_notify = 1; - modified = 1; - } - - if (tb[IFLA_MTU]) { - err = dev_set_mtu(dev, nla_get_u32(tb[IFLA_MTU])); - if (err < 0) - goto errout; - modified = 1; - } - - /* - * Interface selected by interface index but interface - * name provided implies that a name change has been - * requested. - */ - if (ifm->ifi_index > 0 && ifname[0]) { - err = dev_change_name(dev, ifname); - if (err < 0) - goto errout; - modified = 1; - } - - if (tb[IFLA_IFALIAS]) { - err = dev_set_alias(dev, nla_data(tb[IFLA_IFALIAS]), - nla_len(tb[IFLA_IFALIAS])); - if (err < 0) - goto errout; - modified = 1; - } - - if (tb[IFLA_BROADCAST]) { - nla_memcpy(dev->broadcast, tb[IFLA_BROADCAST], dev->addr_len); - send_addr_notify = 1; - } - - if (ifm->ifi_flags || ifm->ifi_change) { - unsigned int flags = ifm->ifi_flags; - - /* bugwards compatibility: ifi_change == 0 is treated as ~0 */ - if (ifm->ifi_change) - flags = (flags & ifm->ifi_change) | - (dev->flags & ~ifm->ifi_change); - err = dev_change_flags(dev, flags); - if (err < 0) - goto errout; - } - - if (tb[IFLA_TXQLEN]) - dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); - - if (tb[IFLA_OPERSTATE]) - set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); - - if (tb[IFLA_LINKMODE]) { - write_lock_bh(&dev_base_lock); - dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]); - write_unlock_bh(&dev_base_lock); - } - - err = 0; - -errout: - if (err < 0 && modified && net_ratelimit()) - printk(KERN_WARNING "A link change request failed with " - "some changes comitted already. Interface %s may " - "have been left with an inconsistent configuration, " - "please check.\n", dev->name); - - if (send_addr_notify) - call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); - return err; -} - -static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) -{ - struct net *net = sock_net(skb->sk); - struct ifinfomsg *ifm; - struct net_device *dev; - int err; - struct nlattr *tb[IFLA_MAX+1]; - char ifname[IFNAMSIZ]; - - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); - if (err < 0) - goto errout; - - if (tb[IFLA_IFNAME]) - nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); - else - ifname[0] = '\0'; - - err = -EINVAL; - ifm = nlmsg_data(nlh); - if (ifm->ifi_index > 0) - dev = dev_get_by_index(net, ifm->ifi_index); - else if (tb[IFLA_IFNAME]) - dev = dev_get_by_name(net, ifname); - else - goto errout; - - if (dev == NULL) { - err = -ENODEV; - goto errout; - } - - if ((err = validate_linkmsg(dev, tb)) < 0) - goto errout_dev; - - err = do_setlink(dev, ifm, tb, ifname, 0); -errout_dev: - dev_put(dev); -errout: - return err; -} - -static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) -{ - struct net *net = sock_net(skb->sk); - const struct rtnl_link_ops *ops; - struct net_device *dev; - struct ifinfomsg *ifm; - char ifname[IFNAMSIZ]; - struct nlattr *tb[IFLA_MAX+1]; - int err; - - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); - if (err < 0) - return err; - - if (tb[IFLA_IFNAME]) - nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); - - ifm = nlmsg_data(nlh); - if (ifm->ifi_index > 0) - dev = __dev_get_by_index(net, ifm->ifi_index); - else if (tb[IFLA_IFNAME]) - dev = __dev_get_by_name(net, ifname); - else - return -EINVAL; - - if (!dev) - return -ENODEV; - - ops = dev->rtnl_link_ops; - if (!ops) - return -EOPNOTSUPP; - - ops->dellink(dev); - return 0; -} - -struct net_device *rtnl_create_link(struct net *net, char *ifname, - const struct rtnl_link_ops *ops, struct nlattr *tb[]) -{ - int err; - struct net_device *dev; - - err = -ENOMEM; - dev = alloc_netdev(ops->priv_size, ifname, ops->setup); - if (!dev) - goto err; - - if (strchr(dev->name, '%')) { - err = dev_alloc_name(dev, dev->name); - if (err < 0) - goto err_free; - } - - dev_net_set(dev, net); - dev->rtnl_link_ops = ops; - - if (tb[IFLA_MTU]) - dev->mtu = nla_get_u32(tb[IFLA_MTU]); - if (tb[IFLA_ADDRESS]) - memcpy(dev->dev_addr, nla_data(tb[IFLA_ADDRESS]), - nla_len(tb[IFLA_ADDRESS])); - if (tb[IFLA_BROADCAST]) - memcpy(dev->broadcast, nla_data(tb[IFLA_BROADCAST]), - nla_len(tb[IFLA_BROADCAST])); - if (tb[IFLA_TXQLEN]) - dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); - if (tb[IFLA_OPERSTATE]) - set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); - if (tb[IFLA_LINKMODE]) - dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]); - - return dev; - -err_free: - free_netdev(dev); -err: - return ERR_PTR(err); -} - -static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) -{ - struct net *net = sock_net(skb->sk); - const struct rtnl_link_ops *ops; - struct net_device *dev; - struct ifinfomsg *ifm; - char kind[MODULE_NAME_LEN]; - char ifname[IFNAMSIZ]; - struct nlattr *tb[IFLA_MAX+1]; - struct nlattr *linkinfo[IFLA_INFO_MAX+1]; - int err; - -#ifdef CONFIG_MODULES -replay: -#endif - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); - if (err < 0) - return err; - - if (tb[IFLA_IFNAME]) - nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); - else - ifname[0] = '\0'; - - ifm = nlmsg_data(nlh); - if (ifm->ifi_index > 0) - dev = __dev_get_by_index(net, ifm->ifi_index); - else if (ifname[0]) - dev = __dev_get_by_name(net, ifname); - else - dev = NULL; - - if ((err = validate_linkmsg(dev, tb)) < 0) - return err; - - if (tb[IFLA_LINKINFO]) { - err = nla_parse_nested(linkinfo, IFLA_INFO_MAX, - tb[IFLA_LINKINFO], ifla_info_policy); - if (err < 0) - return err; - } else - memset(linkinfo, 0, sizeof(linkinfo)); - - if (linkinfo[IFLA_INFO_KIND]) { - nla_strlcpy(kind, linkinfo[IFLA_INFO_KIND], sizeof(kind)); - ops = rtnl_link_ops_get(kind); - } else { - kind[0] = '\0'; - ops = NULL; - } - - if (1) { - struct nlattr *attr[ops ? ops->maxtype + 1 : 0], **data = NULL; - - if (ops) { - if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) { - err = nla_parse_nested(attr, ops->maxtype, - linkinfo[IFLA_INFO_DATA], - ops->policy); - if (err < 0) - return err; - data = attr; - } - if (ops->validate) { - err = ops->validate(tb, data); - if (err < 0) - return err; - } - } - - if (dev) { - int modified = 0; - - if (nlh->nlmsg_flags & NLM_F_EXCL) - return -EEXIST; - if (nlh->nlmsg_flags & NLM_F_REPLACE) - return -EOPNOTSUPP; - - if (linkinfo[IFLA_INFO_DATA]) { - if (!ops || ops != dev->rtnl_link_ops || - !ops->changelink) - return -EOPNOTSUPP; - - err = ops->changelink(dev, tb, data); - if (err < 0) - return err; - modified = 1; - } - - return do_setlink(dev, ifm, tb, ifname, modified); - } - - if (!(nlh->nlmsg_flags & NLM_F_CREATE)) - return -ENODEV; - - if (ifm->ifi_index || ifm->ifi_flags || ifm->ifi_change) - return -EOPNOTSUPP; - if (tb[IFLA_MAP] || tb[IFLA_MASTER] || tb[IFLA_PROTINFO]) - return -EOPNOTSUPP; - - if (!ops) { -#ifdef CONFIG_MODULES - if (kind[0]) { - __rtnl_unlock(); - request_module("rtnl-link-%s", kind); - rtnl_lock(); - ops = rtnl_link_ops_get(kind); - if (ops) - goto replay; - } -#endif - return -EOPNOTSUPP; - } - - if (!ifname[0]) - snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind); - - dev = rtnl_create_link(net, ifname, ops, tb); - - if (IS_ERR(dev)) - err = PTR_ERR(dev); - else if (ops->newlink) - err = ops->newlink(dev, tb, data); - else - err = register_netdevice(dev); - - if (err < 0 && !IS_ERR(dev)) - free_netdev(dev); - return err; - } -} - -static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) -{ - struct net *net = sock_net(skb->sk); - struct ifinfomsg *ifm; - struct nlattr *tb[IFLA_MAX+1]; - struct net_device *dev = NULL; - struct sk_buff *nskb; - int err; - - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); - if (err < 0) - return err; - - ifm = nlmsg_data(nlh); - if (ifm->ifi_index > 0) { - dev = dev_get_by_index(net, ifm->ifi_index); - if (dev == NULL) - return -ENODEV; - } else - return -EINVAL; - - nskb = nlmsg_new(if_nlmsg_size(dev), GFP_KERNEL); - if (nskb == NULL) { - err = -ENOBUFS; - goto errout; - } - - err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).pid, - nlh->nlmsg_seq, 0, 0); - if (err < 0) { - /* -EMSGSIZE implies BUG in if_nlmsg_size */ - WARN_ON(err == -EMSGSIZE); - kfree_skb(nskb); - goto errout; - } - err = rtnl_unicast(nskb, net, NETLINK_CB(skb).pid); -errout: - dev_put(dev); - - return err; -} - -static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) -{ - int idx; - int s_idx = cb->family; - - if (s_idx == 0) - s_idx = 1; - for (idx=1; idx<NPROTO; idx++) { - int type = cb->nlh->nlmsg_type-RTM_BASE; - if (idx < s_idx || idx == PF_PACKET) - continue; - if (rtnl_msg_handlers[idx] == NULL || - rtnl_msg_handlers[idx][type].dumpit == NULL) - continue; - if (idx > s_idx) - memset(&cb->args[0], 0, sizeof(cb->args)); - if (rtnl_msg_handlers[idx][type].dumpit(skb, cb)) - break; - } - cb->family = idx; - - return skb->len; -} -#endif /* DDE_LINUX */ - -void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) -{ - struct net *net = dev_net(dev); -#ifndef DDE_LINUX - struct sk_buff *skb; - int err = -ENOBUFS; - - skb = nlmsg_new(if_nlmsg_size(dev), GFP_KERNEL); - if (skb == NULL) - goto errout; - - err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0); - if (err < 0) { - /* -EMSGSIZE implies BUG in if_nlmsg_size() */ - WARN_ON(err == -EMSGSIZE); - kfree_skb(skb); - goto errout; - } - err = rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL); -errout: - if (err < 0) - rtnl_set_sk_err(net, RTNLGRP_LINK, err); -#endif /* DDE_LINUX */ -} - -#ifndef DDE_LINUX -/* Protected by RTNL sempahore. */ -static struct rtattr **rta_buf; -static int rtattr_max; - -/* Process one rtnetlink message. */ - -static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) -{ - struct net *net = sock_net(skb->sk); - rtnl_doit_func doit; - int sz_idx, kind; - int min_len; - int family; - int type; - int err; - - type = nlh->nlmsg_type; - if (type > RTM_MAX) - return -EOPNOTSUPP; - - type -= RTM_BASE; - - /* All the messages must have at least 1 byte length */ - if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(struct rtgenmsg))) - return 0; - - family = ((struct rtgenmsg*)NLMSG_DATA(nlh))->rtgen_family; - if (family >= NPROTO) - return -EAFNOSUPPORT; - - sz_idx = type>>2; - kind = type&3; - - if (kind != 2 && security_netlink_recv(skb, CAP_NET_ADMIN)) - return -EPERM; - - if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) { - struct sock *rtnl; - rtnl_dumpit_func dumpit; - - dumpit = rtnl_get_dumpit(family, type); - if (dumpit == NULL) - return -EOPNOTSUPP; - - __rtnl_unlock(); - rtnl = net->rtnl; - err = netlink_dump_start(rtnl, skb, nlh, dumpit, NULL); - rtnl_lock(); - return err; - } - - memset(rta_buf, 0, (rtattr_max * sizeof(struct rtattr *))); - - min_len = rtm_min[sz_idx]; - if (nlh->nlmsg_len < min_len) - return -EINVAL; - - if (nlh->nlmsg_len > min_len) { - int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len); - struct rtattr *attr = (void*)nlh + NLMSG_ALIGN(min_len); - - while (RTA_OK(attr, attrlen)) { - unsigned flavor = attr->rta_type; - if (flavor) { - if (flavor > rta_max[sz_idx]) - return -EINVAL; - rta_buf[flavor-1] = attr; - } - attr = RTA_NEXT(attr, attrlen); - } - } - - doit = rtnl_get_doit(family, type); - if (doit == NULL) - return -EOPNOTSUPP; - - return doit(skb, nlh, (void *)&rta_buf[0]); -} - -static void rtnetlink_rcv(struct sk_buff *skb) -{ - rtnl_lock(); - netlink_rcv_skb(skb, &rtnetlink_rcv_msg); - rtnl_unlock(); -} - -static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr) -{ - struct net_device *dev = ptr; - - switch (event) { - case NETDEV_UNREGISTER: - rtmsg_ifinfo(RTM_DELLINK, dev, ~0U); - break; - case NETDEV_REGISTER: - rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U); - break; - case NETDEV_UP: - case NETDEV_DOWN: - rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); - break; - case NETDEV_CHANGE: - case NETDEV_GOING_DOWN: - break; - default: - rtmsg_ifinfo(RTM_NEWLINK, dev, 0); - break; - } - return NOTIFY_DONE; -} - -static struct notifier_block rtnetlink_dev_notifier = { - .notifier_call = rtnetlink_event, -}; - - -static int rtnetlink_net_init(struct net *net) -{ - struct sock *sk; - sk = netlink_kernel_create(net, NETLINK_ROUTE, RTNLGRP_MAX, - rtnetlink_rcv, &rtnl_mutex, THIS_MODULE); - if (!sk) - return -ENOMEM; - net->rtnl = sk; - return 0; -} - -static void rtnetlink_net_exit(struct net *net) -{ - netlink_kernel_release(net->rtnl); - net->rtnl = NULL; -} - -static struct pernet_operations rtnetlink_net_ops = { - .init = rtnetlink_net_init, - .exit = rtnetlink_net_exit, -}; - -void __init rtnetlink_init(void) -{ - int i; - - rtattr_max = 0; - for (i = 0; i < ARRAY_SIZE(rta_max); i++) - if (rta_max[i] > rtattr_max) - rtattr_max = rta_max[i]; - rta_buf = kmalloc(rtattr_max * sizeof(struct rtattr *), GFP_KERNEL); - if (!rta_buf) - panic("rtnetlink_init: cannot allocate rta_buf\n"); - - if (register_pernet_subsys(&rtnetlink_net_ops)) - panic("rtnetlink_init: cannot initialize rtnetlink\n"); - - netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV); - register_netdevice_notifier(&rtnetlink_dev_notifier); - - rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink, rtnl_dump_ifinfo); - rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL); - rtnl_register(PF_UNSPEC, RTM_NEWLINK, rtnl_newlink, NULL); - rtnl_register(PF_UNSPEC, RTM_DELLINK, rtnl_dellink, NULL); - - rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all); - rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all); -} - -EXPORT_SYMBOL(__rta_fill); -EXPORT_SYMBOL(rtnetlink_put_metrics); -EXPORT_SYMBOL(rtnl_lock); -EXPORT_SYMBOL(rtnl_trylock); -EXPORT_SYMBOL(rtnl_unlock); -EXPORT_SYMBOL(rtnl_is_locked); -EXPORT_SYMBOL(rtnl_unicast); -EXPORT_SYMBOL(rtnl_notify); -EXPORT_SYMBOL(rtnl_set_sk_err); -EXPORT_SYMBOL(rtnl_create_link); -EXPORT_SYMBOL(ifla_policy); -#endif /* !DDE_LINUX */ diff --git a/libdde_linux26/lib/src/net/core/skbuff.c b/libdde_linux26/lib/src/net/core/skbuff.c deleted file mode 100644 index 40d64a88..00000000 --- a/libdde_linux26/lib/src/net/core/skbuff.c +++ /dev/null @@ -1,2956 +0,0 @@ -/* - * Routines having to do with the 'struct sk_buff' memory handlers. - * - * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk> - * Florian La Roche <rzsfl@rz.uni-sb.de> - * - * Fixes: - * Alan Cox : Fixed the worst of the load - * balancer bugs. - * Dave Platt : Interrupt stacking fix. - * Richard Kooijman : Timestamp fixes. - * Alan Cox : Changed buffer format. - * Alan Cox : destructor hook for AF_UNIX etc. - * Linus Torvalds : Better skb_clone. - * Alan Cox : Added skb_copy. - * Alan Cox : Added all the changed routines Linus - * only put in the headers - * Ray VanTassle : Fixed --skb->lock in free - * Alan Cox : skb_copy copy arp field - * Andi Kleen : slabified it. - * Robert Olsson : Removed skb_head_pool - * - * NOTE: - * The __skb_ routines should be called with interrupts - * disabled, or you better be *real* sure that the operation is atomic - * with respect to whatever list is being frobbed (e.g. via lock_sock() - * or via disabling bottom half handlers, etc). - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -/* - * The functions in this file will not compile correctly with gcc 2.4.x - */ - -#include <linux/module.h> -#include <linux/types.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/interrupt.h> -#include <linux/in.h> -#include <linux/inet.h> -#include <linux/slab.h> -#include <linux/netdevice.h> -#ifdef CONFIG_NET_CLS_ACT -#include <net/pkt_sched.h> -#endif -#include <linux/string.h> -#include <linux/skbuff.h> -#include <linux/splice.h> -#include <linux/cache.h> -#include <linux/rtnetlink.h> -#include <linux/init.h> -#include <linux/scatterlist.h> - -#include <net/protocol.h> -#include <net/dst.h> -#include <net/sock.h> -#include <net/checksum.h> -#ifndef DDE_LINUX -#include <net/xfrm.h> -#endif /* DDE_LINUX */ - -#include "local.h" - -#include <asm/uaccess.h> -#include <asm/system.h> - -#include "kmap_skb.h" - -static struct kmem_cache *skbuff_head_cache __read_mostly; -static struct kmem_cache *skbuff_fclone_cache __read_mostly; - -static void sock_pipe_buf_release(struct pipe_inode_info *pipe, - struct pipe_buffer *buf) -{ - put_page(buf->page); -} - -static void sock_pipe_buf_get(struct pipe_inode_info *pipe, - struct pipe_buffer *buf) -{ - get_page(buf->page); -} - -static int sock_pipe_buf_steal(struct pipe_inode_info *pipe, - struct pipe_buffer *buf) -{ - return 1; -} - - -/* Pipe buffer operations for a socket. */ -static struct pipe_buf_operations sock_pipe_buf_ops = { - .can_merge = 0, - .map = generic_pipe_buf_map, - .unmap = generic_pipe_buf_unmap, - .confirm = generic_pipe_buf_confirm, - .release = sock_pipe_buf_release, - .steal = sock_pipe_buf_steal, - .get = sock_pipe_buf_get, -}; - -/* - * Keep out-of-line to prevent kernel bloat. - * __builtin_return_address is not used because it is not always - * reliable. - */ - -/** - * skb_over_panic - private function - * @skb: buffer - * @sz: size - * @here: address - * - * Out of line support code for skb_put(). Not user callable. - */ -void skb_over_panic(struct sk_buff *skb, int sz, void *here) -{ - printk(KERN_EMERG "skb_over_panic: text:%p len:%d put:%d head:%p " - "data:%p tail:%#lx end:%#lx dev:%s\n", - here, skb->len, sz, skb->head, skb->data, - (unsigned long)skb->tail, (unsigned long)skb->end, - skb->dev ? skb->dev->name : "<NULL>"); - BUG(); -} - -/** - * skb_under_panic - private function - * @skb: buffer - * @sz: size - * @here: address - * - * Out of line support code for skb_push(). Not user callable. - */ - -void skb_under_panic(struct sk_buff *skb, int sz, void *here) -{ - printk(KERN_EMERG "skb_under_panic: text:%p len:%d put:%d head:%p " - "data:%p tail:%#lx end:%#lx dev:%s\n", - here, skb->len, sz, skb->head, skb->data, - (unsigned long)skb->tail, (unsigned long)skb->end, - skb->dev ? skb->dev->name : "<NULL>"); - BUG(); -} - -/* Allocate a new skbuff. We do this ourselves so we can fill in a few - * 'private' fields and also do memory statistics to find all the - * [BEEP] leaks. - * - */ - -/** - * __alloc_skb - allocate a network buffer - * @size: size to allocate - * @gfp_mask: allocation mask - * @fclone: allocate from fclone cache instead of head cache - * and allocate a cloned (child) skb - * @node: numa node to allocate memory on - * - * Allocate a new &sk_buff. The returned buffer has no headroom and a - * tail room of size bytes. The object has a reference count of one. - * The return is the buffer. On a failure the return is %NULL. - * - * Buffers may only be allocated from interrupts using a @gfp_mask of - * %GFP_ATOMIC. - */ -struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, - int fclone, int node) -{ - struct kmem_cache *cache; - struct skb_shared_info *shinfo; - struct sk_buff *skb; - u8 *data; - - cache = fclone ? skbuff_fclone_cache : skbuff_head_cache; - - /* Get the HEAD */ - skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node); - if (!skb) { - printk("kmem_cache_alloc_node fails\n"); - goto out; - } - - size = SKB_DATA_ALIGN(size); - data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info), - gfp_mask, node); - if (!data) { - printk("kmalloc_node_track_caller %d fails\n", - size + sizeof(struct skb_shared_info)); - goto nodata; - } - - /* - * Only clear those fields we need to clear, not those that we will - * actually initialise below. Hence, don't put any more fields after - * the tail pointer in struct sk_buff! - */ - memset(skb, 0, offsetof(struct sk_buff, tail)); - skb->truesize = size + sizeof(struct sk_buff); - atomic_set(&skb->users, 1); - skb->head = data; - skb->data = data; - skb_reset_tail_pointer(skb); - skb->end = skb->tail + size; - skb->del_data = NULL; - skb->pre_del_func = NULL; - /* make sure we initialize shinfo sequentially */ - shinfo = skb_shinfo(skb); - atomic_set(&shinfo->dataref, 1); - shinfo->nr_frags = 0; - shinfo->gso_size = 0; - shinfo->gso_segs = 0; - shinfo->gso_type = 0; - shinfo->ip6_frag_id = 0; - shinfo->frag_list = NULL; - - if (fclone) { - struct sk_buff *child = skb + 1; - atomic_t *fclone_ref = (atomic_t *) (child + 1); - - skb->fclone = SKB_FCLONE_ORIG; - atomic_set(fclone_ref, 1); - - child->fclone = SKB_FCLONE_UNAVAILABLE; - } -out: - return skb; -nodata: - kmem_cache_free(cache, skb); - skb = NULL; - goto out; -} - -/** - * __netdev_alloc_skb - allocate an skbuff for rx on a specific device - * @dev: network device to receive on - * @length: length to allocate - * @gfp_mask: get_free_pages mask, passed to alloc_skb - * - * Allocate a new &sk_buff and assign it a usage count of one. The - * buffer has unspecified headroom built in. Users should allocate - * the headroom they think they need without accounting for the - * built in space. The built in space is used for optimisations. - * - * %NULL is returned if there is no free memory. - */ -struct sk_buff *__netdev_alloc_skb(struct net_device *dev, - unsigned int length, gfp_t gfp_mask) -{ - int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1; - struct sk_buff *skb; - - skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, node); - if (likely(skb)) { - skb_reserve(skb, NET_SKB_PAD); - skb->dev = dev; - } - return skb; -} - -struct page *__netdev_alloc_page(struct net_device *dev, gfp_t gfp_mask) -{ - int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1; - struct page *page; - - page = alloc_pages_node(node, gfp_mask, 0); - return page; -} -EXPORT_SYMBOL(__netdev_alloc_page); - -void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, - int size) -{ - skb_fill_page_desc(skb, i, page, off, size); - skb->len += size; - skb->data_len += size; - skb->truesize += size; -} -EXPORT_SYMBOL(skb_add_rx_frag); - -/** - * dev_alloc_skb - allocate an skbuff for receiving - * @length: length to allocate - * - * Allocate a new &sk_buff and assign it a usage count of one. The - * buffer has unspecified headroom built in. Users should allocate - * the headroom they think they need without accounting for the - * built in space. The built in space is used for optimisations. - * - * %NULL is returned if there is no free memory. Although this function - * allocates memory it can be called from an interrupt. - */ -struct sk_buff *dev_alloc_skb(unsigned int length) -{ - /* - * There is more code here than it seems: - * __dev_alloc_skb is an inline - */ - return __dev_alloc_skb(length, GFP_ATOMIC); -} -EXPORT_SYMBOL(dev_alloc_skb); - -static void skb_drop_list(struct sk_buff **listp) -{ - struct sk_buff *list = *listp; - - *listp = NULL; - - do { - struct sk_buff *this = list; - list = list->next; - kfree_skb(this); - } while (list); -} - -static inline void skb_drop_fraglist(struct sk_buff *skb) -{ - skb_drop_list(&skb_shinfo(skb)->frag_list); -} - -static void skb_clone_fraglist(struct sk_buff *skb) -{ - struct sk_buff *list; - - for (list = skb_shinfo(skb)->frag_list; list; list = list->next) - skb_get(list); -} - -static void skb_release_data(struct sk_buff *skb) -{ - if (!skb->cloned || - !atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1, - &skb_shinfo(skb)->dataref)) { - if (skb_shinfo(skb)->nr_frags) { - int i; - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) - put_page(skb_shinfo(skb)->frags[i].page); - } - - if (skb_shinfo(skb)->frag_list) - skb_drop_fraglist(skb); - - kfree(skb->head); - } -} - -/* - * Free an skbuff by memory without cleaning the state. - */ -static void kfree_skbmem(struct sk_buff *skb) -{ - struct sk_buff *other; - atomic_t *fclone_ref; - - switch (skb->fclone) { - case SKB_FCLONE_UNAVAILABLE: - kmem_cache_free(skbuff_head_cache, skb); - break; - - case SKB_FCLONE_ORIG: - fclone_ref = (atomic_t *) (skb + 2); - if (atomic_dec_and_test(fclone_ref)) - kmem_cache_free(skbuff_fclone_cache, skb); - break; - - case SKB_FCLONE_CLONE: - fclone_ref = (atomic_t *) (skb + 1); - other = skb - 1; - - /* The clone portion is available for - * fast-cloning again. - */ - skb->fclone = SKB_FCLONE_UNAVAILABLE; - - if (atomic_dec_and_test(fclone_ref)) - kmem_cache_free(skbuff_fclone_cache, other); - break; - } -} - -static void skb_release_head_state(struct sk_buff *skb) -{ -#ifndef DDE_LINUX - dst_release(skb->dst); -#endif -#ifdef CONFIG_XFRM - secpath_put(skb->sp); -#endif - if (skb->destructor) { - WARN_ON(in_irq()); - skb->destructor(skb); - } -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) - nf_conntrack_put(skb->nfct); - nf_conntrack_put_reasm(skb->nfct_reasm); -#endif -#ifdef CONFIG_BRIDGE_NETFILTER - nf_bridge_put(skb->nf_bridge); -#endif -/* XXX: IS this still necessary? - JHS */ -#ifdef CONFIG_NET_SCHED - skb->tc_index = 0; -#ifdef CONFIG_NET_CLS_ACT - skb->tc_verd = 0; -#endif -#endif -} - -/* Free everything but the sk_buff shell. */ -static void skb_release_all(struct sk_buff *skb) -{ - skb_release_head_state(skb); - skb_release_data(skb); -} - -/** - * __kfree_skb - private function - * @skb: buffer - * - * Free an sk_buff. Release anything attached to the buffer. - * Clean the state. This is an internal helper function. Users should - * always call kfree_skb - */ - -void __kfree_skb(struct sk_buff *skb) -{ -#ifdef DDE_LINUX - if (skb->del_data && skb->pre_del_func - && skb->pre_del_func(skb, skb->del_data)) - return; -#endif - skb_release_all(skb); - kfree_skbmem(skb); -} - -/** - * kfree_skb - free an sk_buff - * @skb: buffer to free - * - * Drop a reference to the buffer and free it if the usage count has - * hit zero. - */ -void kfree_skb(struct sk_buff *skb) -{ - if (unlikely(!skb)) - return; -#ifdef DDE_LINUX - if (atomic_read(&skb->users) == 0) { - __kfree_skb(skb); - return; - } -#endif - if (likely(atomic_read(&skb->users) == 1)) - smp_rmb(); - else if (likely(!atomic_dec_and_test(&skb->users))) - return; - __kfree_skb(skb); -} - -/** - * skb_recycle_check - check if skb can be reused for receive - * @skb: buffer - * @skb_size: minimum receive buffer size - * - * Checks that the skb passed in is not shared or cloned, and - * that it is linear and its head portion at least as large as - * skb_size so that it can be recycled as a receive buffer. - * If these conditions are met, this function does any necessary - * reference count dropping and cleans up the skbuff as if it - * just came from __alloc_skb(). - */ -int skb_recycle_check(struct sk_buff *skb, int skb_size) -{ - struct skb_shared_info *shinfo; - - if (skb_is_nonlinear(skb) || skb->fclone != SKB_FCLONE_UNAVAILABLE) - return 0; - - skb_size = SKB_DATA_ALIGN(skb_size + NET_SKB_PAD); - if (skb_end_pointer(skb) - skb->head < skb_size) - return 0; - - if (skb_shared(skb) || skb_cloned(skb)) - return 0; - - skb_release_head_state(skb); - shinfo = skb_shinfo(skb); - atomic_set(&shinfo->dataref, 1); - shinfo->nr_frags = 0; - shinfo->gso_size = 0; - shinfo->gso_segs = 0; - shinfo->gso_type = 0; - shinfo->ip6_frag_id = 0; - shinfo->frag_list = NULL; - - memset(skb, 0, offsetof(struct sk_buff, tail)); - skb->data = skb->head + NET_SKB_PAD; - skb_reset_tail_pointer(skb); - - return 1; -} -EXPORT_SYMBOL(skb_recycle_check); - -static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) -{ - new->tstamp = old->tstamp; - new->dev = old->dev; - new->transport_header = old->transport_header; - new->network_header = old->network_header; - new->mac_header = old->mac_header; - new->dst = dst_clone(old->dst); -#ifdef CONFIG_XFRM - new->sp = secpath_get(old->sp); -#endif - memcpy(new->cb, old->cb, sizeof(old->cb)); - new->csum_start = old->csum_start; - new->csum_offset = old->csum_offset; - new->local_df = old->local_df; - new->pkt_type = old->pkt_type; - new->ip_summed = old->ip_summed; - skb_copy_queue_mapping(new, old); - new->priority = old->priority; -#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) - new->ipvs_property = old->ipvs_property; -#endif - new->protocol = old->protocol; - new->mark = old->mark; - __nf_copy(new, old); -#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ - defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) - new->nf_trace = old->nf_trace; -#endif -#ifdef CONFIG_NET_SCHED - new->tc_index = old->tc_index; -#ifdef CONFIG_NET_CLS_ACT - new->tc_verd = old->tc_verd; -#endif -#endif - new->vlan_tci = old->vlan_tci; - - skb_copy_secmark(new, old); -} - -static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb) -{ -#define C(x) n->x = skb->x - - n->next = n->prev = NULL; - n->sk = NULL; - __copy_skb_header(n, skb); - - C(len); - C(data_len); - C(mac_len); - n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len; - n->cloned = 1; - n->nohdr = 0; - n->destructor = NULL; - C(iif); - C(tail); - C(end); - C(head); - C(data); - C(truesize); -#if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE) - C(do_not_encrypt); - C(requeue); -#endif - atomic_set(&n->users, 1); - - atomic_inc(&(skb_shinfo(skb)->dataref)); - skb->cloned = 1; - - return n; -#undef C -} - -/** - * skb_morph - morph one skb into another - * @dst: the skb to receive the contents - * @src: the skb to supply the contents - * - * This is identical to skb_clone except that the target skb is - * supplied by the user. - * - * The target skb is returned upon exit. - */ -struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src) -{ - skb_release_all(dst); - return __skb_clone(dst, src); -} -EXPORT_SYMBOL_GPL(skb_morph); - -/** - * skb_clone - duplicate an sk_buff - * @skb: buffer to clone - * @gfp_mask: allocation priority - * - * Duplicate an &sk_buff. The new one is not owned by a socket. Both - * copies share the same packet data but not structure. The new - * buffer has a reference count of 1. If the allocation fails the - * function returns %NULL otherwise the new buffer is returned. - * - * If this function is called from an interrupt gfp_mask() must be - * %GFP_ATOMIC. - */ - -struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) -{ - struct sk_buff *n; - - n = skb + 1; - if (skb->fclone == SKB_FCLONE_ORIG && - n->fclone == SKB_FCLONE_UNAVAILABLE) { - atomic_t *fclone_ref = (atomic_t *) (n + 1); - n->fclone = SKB_FCLONE_CLONE; - atomic_inc(fclone_ref); - } else { - n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); - if (!n) - return NULL; - n->fclone = SKB_FCLONE_UNAVAILABLE; - } - - return __skb_clone(n, skb); -} - -static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) -{ -#ifndef NET_SKBUFF_DATA_USES_OFFSET - /* - * Shift between the two data areas in bytes - */ - unsigned long offset = new->data - old->data; -#endif - - __copy_skb_header(new, old); - -#ifndef NET_SKBUFF_DATA_USES_OFFSET - /* {transport,network,mac}_header are relative to skb->head */ - new->transport_header += offset; - new->network_header += offset; - new->mac_header += offset; -#endif - skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size; - skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs; - skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type; -} - -/** - * skb_copy - create private copy of an sk_buff - * @skb: buffer to copy - * @gfp_mask: allocation priority - * - * Make a copy of both an &sk_buff and its data. This is used when the - * caller wishes to modify the data and needs a private copy of the - * data to alter. Returns %NULL on failure or the pointer to the buffer - * on success. The returned buffer has a reference count of 1. - * - * As by-product this function converts non-linear &sk_buff to linear - * one, so that &sk_buff becomes completely private and caller is allowed - * to modify all the data of returned buffer. This means that this - * function is not recommended for use in circumstances when only - * header is going to be modified. Use pskb_copy() instead. - */ - -struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) -{ - int headerlen = skb->data - skb->head; - /* - * Allocate the copy buffer - */ - struct sk_buff *n; -#ifdef NET_SKBUFF_DATA_USES_OFFSET - n = alloc_skb(skb->end + skb->data_len, gfp_mask); -#else - n = alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask); -#endif - if (!n) - return NULL; - - /* Set the data pointer */ - skb_reserve(n, headerlen); - /* Set the tail pointer and length */ - skb_put(n, skb->len); - - if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len)) - BUG(); - - copy_skb_header(n, skb); - return n; -} - - -/** - * pskb_copy - create copy of an sk_buff with private head. - * @skb: buffer to copy - * @gfp_mask: allocation priority - * - * Make a copy of both an &sk_buff and part of its data, located - * in header. Fragmented data remain shared. This is used when - * the caller wishes to modify only header of &sk_buff and needs - * private copy of the header to alter. Returns %NULL on failure - * or the pointer to the buffer on success. - * The returned buffer has a reference count of 1. - */ - -struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) -{ - /* - * Allocate the copy buffer - */ - struct sk_buff *n; -#ifdef NET_SKBUFF_DATA_USES_OFFSET - n = alloc_skb(skb->end, gfp_mask); -#else - n = alloc_skb(skb->end - skb->head, gfp_mask); -#endif - if (!n) - goto out; - - /* Set the data pointer */ - skb_reserve(n, skb->data - skb->head); - /* Set the tail pointer and length */ - skb_put(n, skb_headlen(skb)); - /* Copy the bytes */ - skb_copy_from_linear_data(skb, n->data, n->len); - - n->truesize += skb->data_len; - n->data_len = skb->data_len; - n->len = skb->len; - - if (skb_shinfo(skb)->nr_frags) { - int i; - - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i]; - get_page(skb_shinfo(n)->frags[i].page); - } - skb_shinfo(n)->nr_frags = i; - } - - if (skb_shinfo(skb)->frag_list) { - skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list; - skb_clone_fraglist(n); - } - - copy_skb_header(n, skb); -out: - return n; -} - -/** - * pskb_expand_head - reallocate header of &sk_buff - * @skb: buffer to reallocate - * @nhead: room to add at head - * @ntail: room to add at tail - * @gfp_mask: allocation priority - * - * Expands (or creates identical copy, if &nhead and &ntail are zero) - * header of skb. &sk_buff itself is not changed. &sk_buff MUST have - * reference count of 1. Returns zero in the case of success or error, - * if expansion failed. In the last case, &sk_buff is not changed. - * - * All the pointers pointing into skb header may change and must be - * reloaded after call to this function. - */ - -int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, - gfp_t gfp_mask) -{ - int i; - u8 *data; -#ifdef NET_SKBUFF_DATA_USES_OFFSET - int size = nhead + skb->end + ntail; -#else - int size = nhead + (skb->end - skb->head) + ntail; -#endif - long off; - - BUG_ON(nhead < 0); - - if (skb_shared(skb)) - BUG(); - - size = SKB_DATA_ALIGN(size); - - data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); - if (!data) - goto nodata; - - /* Copy only real data... and, alas, header. This should be - * optimized for the cases when header is void. */ -#ifdef NET_SKBUFF_DATA_USES_OFFSET - memcpy(data + nhead, skb->head, skb->tail); -#else - memcpy(data + nhead, skb->head, skb->tail - skb->head); -#endif - memcpy(data + size, skb_end_pointer(skb), - sizeof(struct skb_shared_info)); - - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) - get_page(skb_shinfo(skb)->frags[i].page); - - if (skb_shinfo(skb)->frag_list) - skb_clone_fraglist(skb); - - skb_release_data(skb); - - off = (data + nhead) - skb->head; - - skb->head = data; - skb->data += off; -#ifdef NET_SKBUFF_DATA_USES_OFFSET - skb->end = size; - off = nhead; -#else - skb->end = skb->head + size; -#endif - /* {transport,network,mac}_header and tail are relative to skb->head */ - skb->tail += off; - skb->transport_header += off; - skb->network_header += off; - skb->mac_header += off; - skb->csum_start += nhead; - skb->cloned = 0; - skb->hdr_len = 0; - skb->nohdr = 0; - atomic_set(&skb_shinfo(skb)->dataref, 1); - return 0; - -nodata: - return -ENOMEM; -} - -/* Make private copy of skb with writable head and some headroom */ - -struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom) -{ - struct sk_buff *skb2; - int delta = headroom - skb_headroom(skb); - - if (delta <= 0) - skb2 = pskb_copy(skb, GFP_ATOMIC); - else { - skb2 = skb_clone(skb, GFP_ATOMIC); - if (skb2 && pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0, - GFP_ATOMIC)) { - kfree_skb(skb2); - skb2 = NULL; - } - } - return skb2; -} - - -/** - * skb_copy_expand - copy and expand sk_buff - * @skb: buffer to copy - * @newheadroom: new free bytes at head - * @newtailroom: new free bytes at tail - * @gfp_mask: allocation priority - * - * Make a copy of both an &sk_buff and its data and while doing so - * allocate additional space. - * - * This is used when the caller wishes to modify the data and needs a - * private copy of the data to alter as well as more space for new fields. - * Returns %NULL on failure or the pointer to the buffer - * on success. The returned buffer has a reference count of 1. - * - * You must pass %GFP_ATOMIC as the allocation priority if this function - * is called from an interrupt. - */ -struct sk_buff *skb_copy_expand(const struct sk_buff *skb, - int newheadroom, int newtailroom, - gfp_t gfp_mask) -{ - /* - * Allocate the copy buffer - */ - struct sk_buff *n = alloc_skb(newheadroom + skb->len + newtailroom, - gfp_mask); - int oldheadroom = skb_headroom(skb); - int head_copy_len, head_copy_off; - int off; - - if (!n) - return NULL; - - skb_reserve(n, newheadroom); - - /* Set the tail pointer and length */ - skb_put(n, skb->len); - - head_copy_len = oldheadroom; - head_copy_off = 0; - if (newheadroom <= head_copy_len) - head_copy_len = newheadroom; - else - head_copy_off = newheadroom - head_copy_len; - - /* Copy the linear header and data. */ - if (skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off, - skb->len + head_copy_len)) - BUG(); - - copy_skb_header(n, skb); - - off = newheadroom - oldheadroom; - n->csum_start += off; -#ifdef NET_SKBUFF_DATA_USES_OFFSET - n->transport_header += off; - n->network_header += off; - n->mac_header += off; -#endif - - return n; -} - -/** - * skb_pad - zero pad the tail of an skb - * @skb: buffer to pad - * @pad: space to pad - * - * Ensure that a buffer is followed by a padding area that is zero - * filled. Used by network drivers which may DMA or transfer data - * beyond the buffer end onto the wire. - * - * May return error in out of memory cases. The skb is freed on error. - */ - -int skb_pad(struct sk_buff *skb, int pad) -{ - int err; - int ntail; - - /* If the skbuff is non linear tailroom is always zero.. */ - if (!skb_cloned(skb) && skb_tailroom(skb) >= pad) { - memset(skb->data+skb->len, 0, pad); - return 0; - } - - ntail = skb->data_len + pad - (skb->end - skb->tail); - if (likely(skb_cloned(skb) || ntail > 0)) { - err = pskb_expand_head(skb, 0, ntail, GFP_ATOMIC); - if (unlikely(err)) - goto free_skb; - } - - /* FIXME: The use of this function with non-linear skb's really needs - * to be audited. - */ - err = skb_linearize(skb); - if (unlikely(err)) - goto free_skb; - - memset(skb->data + skb->len, 0, pad); - return 0; - -free_skb: - kfree_skb(skb); - return err; -} - -/** - * skb_put - add data to a buffer - * @skb: buffer to use - * @len: amount of data to add - * - * This function extends the used data area of the buffer. If this would - * exceed the total buffer size the kernel will panic. A pointer to the - * first byte of the extra data is returned. - */ -unsigned char *skb_put(struct sk_buff *skb, unsigned int len) -{ - unsigned char *tmp = skb_tail_pointer(skb); - SKB_LINEAR_ASSERT(skb); - skb->tail += len; - skb->len += len; - if (unlikely(skb->tail > skb->end)) - skb_over_panic(skb, len, __builtin_return_address(0)); - return tmp; -} -EXPORT_SYMBOL(skb_put); - -/** - * skb_push - add data to the start of a buffer - * @skb: buffer to use - * @len: amount of data to add - * - * This function extends the used data area of the buffer at the buffer - * start. If this would exceed the total buffer headroom the kernel will - * panic. A pointer to the first byte of the extra data is returned. - */ -unsigned char *skb_push(struct sk_buff *skb, unsigned int len) -{ - skb->data -= len; - skb->len += len; - if (unlikely(skb->data<skb->head)) - skb_under_panic(skb, len, __builtin_return_address(0)); - return skb->data; -} -EXPORT_SYMBOL(skb_push); - -/** - * skb_pull - remove data from the start of a buffer - * @skb: buffer to use - * @len: amount of data to remove - * - * This function removes data from the start of a buffer, returning - * the memory to the headroom. A pointer to the next data in the buffer - * is returned. Once the data has been pulled future pushes will overwrite - * the old data. - */ -unsigned char *skb_pull(struct sk_buff *skb, unsigned int len) -{ - return unlikely(len > skb->len) ? NULL : __skb_pull(skb, len); -} -EXPORT_SYMBOL(skb_pull); - -/** - * skb_trim - remove end from a buffer - * @skb: buffer to alter - * @len: new length - * - * Cut the length of a buffer down by removing data from the tail. If - * the buffer is already under the length specified it is not modified. - * The skb must be linear. - */ -void skb_trim(struct sk_buff *skb, unsigned int len) -{ - if (skb->len > len) - __skb_trim(skb, len); -} -EXPORT_SYMBOL(skb_trim); - -/* Trims skb to length len. It can change skb pointers. - */ - -int ___pskb_trim(struct sk_buff *skb, unsigned int len) -{ - struct sk_buff **fragp; - struct sk_buff *frag; - int offset = skb_headlen(skb); - int nfrags = skb_shinfo(skb)->nr_frags; - int i; - int err; - - if (skb_cloned(skb) && - unlikely((err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))) - return err; - - i = 0; - if (offset >= len) - goto drop_pages; - - for (; i < nfrags; i++) { - int end = offset + skb_shinfo(skb)->frags[i].size; - - if (end < len) { - offset = end; - continue; - } - - skb_shinfo(skb)->frags[i++].size = len - offset; - -drop_pages: - skb_shinfo(skb)->nr_frags = i; - - for (; i < nfrags; i++) - put_page(skb_shinfo(skb)->frags[i].page); - - if (skb_shinfo(skb)->frag_list) - skb_drop_fraglist(skb); - goto done; - } - - for (fragp = &skb_shinfo(skb)->frag_list; (frag = *fragp); - fragp = &frag->next) { - int end = offset + frag->len; - - if (skb_shared(frag)) { - struct sk_buff *nfrag; - - nfrag = skb_clone(frag, GFP_ATOMIC); - if (unlikely(!nfrag)) - return -ENOMEM; - - nfrag->next = frag->next; - kfree_skb(frag); - frag = nfrag; - *fragp = frag; - } - - if (end < len) { - offset = end; - continue; - } - - if (end > len && - unlikely((err = pskb_trim(frag, len - offset)))) - return err; - - if (frag->next) - skb_drop_list(&frag->next); - break; - } - -done: - if (len > skb_headlen(skb)) { - skb->data_len -= skb->len - len; - skb->len = len; - } else { - skb->len = len; - skb->data_len = 0; - skb_set_tail_pointer(skb, len); - } - - return 0; -} - -/** - * __pskb_pull_tail - advance tail of skb header - * @skb: buffer to reallocate - * @delta: number of bytes to advance tail - * - * The function makes a sense only on a fragmented &sk_buff, - * it expands header moving its tail forward and copying necessary - * data from fragmented part. - * - * &sk_buff MUST have reference count of 1. - * - * Returns %NULL (and &sk_buff does not change) if pull failed - * or value of new tail of skb in the case of success. - * - * All the pointers pointing into skb header may change and must be - * reloaded after call to this function. - */ - -/* Moves tail of skb head forward, copying data from fragmented part, - * when it is necessary. - * 1. It may fail due to malloc failure. - * 2. It may change skb pointers. - * - * It is pretty complicated. Luckily, it is called only in exceptional cases. - */ -unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta) -{ - /* If skb has not enough free space at tail, get new one - * plus 128 bytes for future expansions. If we have enough - * room at tail, reallocate without expansion only if skb is cloned. - */ - int i, k, eat = (skb->tail + delta) - skb->end; - - if (eat > 0 || skb_cloned(skb)) { - if (pskb_expand_head(skb, 0, eat > 0 ? eat + 128 : 0, - GFP_ATOMIC)) - return NULL; - } - - if (skb_copy_bits(skb, skb_headlen(skb), skb_tail_pointer(skb), delta)) - BUG(); - - /* Optimization: no fragments, no reasons to preestimate - * size of pulled pages. Superb. - */ - if (!skb_shinfo(skb)->frag_list) - goto pull_pages; - - /* Estimate size of pulled pages. */ - eat = delta; - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - if (skb_shinfo(skb)->frags[i].size >= eat) - goto pull_pages; - eat -= skb_shinfo(skb)->frags[i].size; - } - - /* If we need update frag list, we are in troubles. - * Certainly, it possible to add an offset to skb data, - * but taking into account that pulling is expected to - * be very rare operation, it is worth to fight against - * further bloating skb head and crucify ourselves here instead. - * Pure masohism, indeed. 8)8) - */ - if (eat) { - struct sk_buff *list = skb_shinfo(skb)->frag_list; - struct sk_buff *clone = NULL; - struct sk_buff *insp = NULL; - - do { - BUG_ON(!list); - - if (list->len <= eat) { - /* Eaten as whole. */ - eat -= list->len; - list = list->next; - insp = list; - } else { - /* Eaten partially. */ - - if (skb_shared(list)) { - /* Sucks! We need to fork list. :-( */ - clone = skb_clone(list, GFP_ATOMIC); - if (!clone) - return NULL; - insp = list->next; - list = clone; - } else { - /* This may be pulled without - * problems. */ - insp = list; - } - if (!pskb_pull(list, eat)) { - if (clone) - kfree_skb(clone); - return NULL; - } - break; - } - } while (eat); - - /* Free pulled out fragments. */ - while ((list = skb_shinfo(skb)->frag_list) != insp) { - skb_shinfo(skb)->frag_list = list->next; - kfree_skb(list); - } - /* And insert new clone at head. */ - if (clone) { - clone->next = list; - skb_shinfo(skb)->frag_list = clone; - } - } - /* Success! Now we may commit changes to skb data. */ - -pull_pages: - eat = delta; - k = 0; - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - if (skb_shinfo(skb)->frags[i].size <= eat) { - put_page(skb_shinfo(skb)->frags[i].page); - eat -= skb_shinfo(skb)->frags[i].size; - } else { - skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i]; - if (eat) { - skb_shinfo(skb)->frags[k].page_offset += eat; - skb_shinfo(skb)->frags[k].size -= eat; - eat = 0; - } - k++; - } - } - skb_shinfo(skb)->nr_frags = k; - - skb->tail += delta; - skb->data_len -= delta; - - return skb_tail_pointer(skb); -} - -/* Copy some data bits from skb to kernel buffer. */ - -int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) -{ - int i, copy; - int start = skb_headlen(skb); - - if (offset > (int)skb->len - len) - goto fault; - - /* Copy header. */ - if ((copy = start - offset) > 0) { - if (copy > len) - copy = len; - skb_copy_from_linear_data_offset(skb, offset, to, copy); - if ((len -= copy) == 0) - return 0; - offset += copy; - to += copy; - } - - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - int end; - - WARN_ON(start > offset + len); - - end = start + skb_shinfo(skb)->frags[i].size; - if ((copy = end - offset) > 0) { - u8 *vaddr; - - if (copy > len) - copy = len; - - vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]); - memcpy(to, - vaddr + skb_shinfo(skb)->frags[i].page_offset+ - offset - start, copy); - kunmap_skb_frag(vaddr); - - if ((len -= copy) == 0) - return 0; - offset += copy; - to += copy; - } - start = end; - } - - if (skb_shinfo(skb)->frag_list) { - struct sk_buff *list = skb_shinfo(skb)->frag_list; - - for (; list; list = list->next) { - int end; - - WARN_ON(start > offset + len); - - end = start + list->len; - if ((copy = end - offset) > 0) { - if (copy > len) - copy = len; - if (skb_copy_bits(list, offset - start, - to, copy)) - goto fault; - if ((len -= copy) == 0) - return 0; - offset += copy; - to += copy; - } - start = end; - } - } - if (!len) - return 0; - -fault: - return -EFAULT; -} - -/* - * Callback from splice_to_pipe(), if we need to release some pages - * at the end of the spd in case we error'ed out in filling the pipe. - */ -static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i) -{ - put_page(spd->pages[i]); -} - -static inline struct page *linear_to_page(struct page *page, unsigned int len, - unsigned int offset) -{ - struct page *p = alloc_pages(GFP_KERNEL, 0); - - if (!p) - return NULL; - memcpy(page_address(p) + offset, page_address(page) + offset, len); - - return p; -} - -/* - * Fill page/offset/length into spd, if it can hold more pages. - */ -static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page, - unsigned int len, unsigned int offset, - struct sk_buff *skb, int linear) -{ - if (unlikely(spd->nr_pages == PIPE_BUFFERS)) - return 1; - - if (linear) { - page = linear_to_page(page, len, offset); - if (!page) - return 1; - } else - get_page(page); - - spd->pages[spd->nr_pages] = page; - spd->partial[spd->nr_pages].len = len; - spd->partial[spd->nr_pages].offset = offset; - spd->nr_pages++; - - return 0; -} - -static inline void __segment_seek(struct page **page, unsigned int *poff, - unsigned int *plen, unsigned int off) -{ - *poff += off; - *page += *poff / PAGE_SIZE; - *poff = *poff % PAGE_SIZE; - *plen -= off; -} - -static inline int __splice_segment(struct page *page, unsigned int poff, - unsigned int plen, unsigned int *off, - unsigned int *len, struct sk_buff *skb, - struct splice_pipe_desc *spd, int linear) -{ - if (!*len) - return 1; - - /* skip this segment if already processed */ - if (*off >= plen) { - *off -= plen; - return 0; - } - - /* ignore any bits we already processed */ - if (*off) { - __segment_seek(&page, &poff, &plen, *off); - *off = 0; - } - - do { - unsigned int flen = min(*len, plen); - - /* the linear region may spread across several pages */ - flen = min_t(unsigned int, flen, PAGE_SIZE - poff); - - if (spd_fill_page(spd, page, flen, poff, skb, linear)) - return 1; - - __segment_seek(&page, &poff, &plen, flen); - *len -= flen; - - } while (*len && plen); - - return 0; -} - -/* - * Map linear and fragment data from the skb to spd. It reports failure if the - * pipe is full or if we already spliced the requested length. - */ -static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, - unsigned int *len, - struct splice_pipe_desc *spd) -{ - int seg; - - /* - * map the linear part - */ - if (__splice_segment(virt_to_page(skb->data), - (unsigned long) skb->data & (PAGE_SIZE - 1), - skb_headlen(skb), - offset, len, skb, spd, 1)) - return 1; - - /* - * then map the fragments - */ - for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) { - const skb_frag_t *f = &skb_shinfo(skb)->frags[seg]; - - if (__splice_segment(f->page, f->page_offset, f->size, - offset, len, skb, spd, 0)) - return 1; - } - - return 0; -} - -/* - * Map data from the skb to a pipe. Should handle both the linear part, - * the fragments, and the frag list. It does NOT handle frag lists within - * the frag list, if such a thing exists. We'd probably need to recurse to - * handle that cleanly. - */ -int skb_splice_bits(struct sk_buff *skb, unsigned int offset, - struct pipe_inode_info *pipe, unsigned int tlen, - unsigned int flags) -{ - struct partial_page partial[PIPE_BUFFERS]; - struct page *pages[PIPE_BUFFERS]; - struct splice_pipe_desc spd = { - .pages = pages, - .partial = partial, - .flags = flags, - .ops = &sock_pipe_buf_ops, - .spd_release = sock_spd_release, - }; - - /* - * __skb_splice_bits() only fails if the output has no room left, - * so no point in going over the frag_list for the error case. - */ - if (__skb_splice_bits(skb, &offset, &tlen, &spd)) - goto done; - else if (!tlen) - goto done; - - /* - * now see if we have a frag_list to map - */ - if (skb_shinfo(skb)->frag_list) { - struct sk_buff *list = skb_shinfo(skb)->frag_list; - - for (; list && tlen; list = list->next) { - if (__skb_splice_bits(list, &offset, &tlen, &spd)) - break; - } - } - -done: - if (spd.nr_pages) { - struct sock *sk = skb->sk; - int ret; - - /* - * Drop the socket lock, otherwise we have reverse - * locking dependencies between sk_lock and i_mutex - * here as compared to sendfile(). We enter here - * with the socket lock held, and splice_to_pipe() will - * grab the pipe inode lock. For sendfile() emulation, - * we call into ->sendpage() with the i_mutex lock held - * and networking will grab the socket lock. - */ - release_sock(sk); - ret = splice_to_pipe(pipe, &spd); - lock_sock(sk); - return ret; - } - - return 0; -} - -/** - * skb_store_bits - store bits from kernel buffer to skb - * @skb: destination buffer - * @offset: offset in destination - * @from: source buffer - * @len: number of bytes to copy - * - * Copy the specified number of bytes from the source buffer to the - * destination skb. This function handles all the messy bits of - * traversing fragment lists and such. - */ - -int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len) -{ - int i, copy; - int start = skb_headlen(skb); - - if (offset > (int)skb->len - len) - goto fault; - - if ((copy = start - offset) > 0) { - if (copy > len) - copy = len; - skb_copy_to_linear_data_offset(skb, offset, from, copy); - if ((len -= copy) == 0) - return 0; - offset += copy; - from += copy; - } - - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - int end; - - WARN_ON(start > offset + len); - - end = start + frag->size; - if ((copy = end - offset) > 0) { - u8 *vaddr; - - if (copy > len) - copy = len; - - vaddr = kmap_skb_frag(frag); - memcpy(vaddr + frag->page_offset + offset - start, - from, copy); - kunmap_skb_frag(vaddr); - - if ((len -= copy) == 0) - return 0; - offset += copy; - from += copy; - } - start = end; - } - - if (skb_shinfo(skb)->frag_list) { - struct sk_buff *list = skb_shinfo(skb)->frag_list; - - for (; list; list = list->next) { - int end; - - WARN_ON(start > offset + len); - - end = start + list->len; - if ((copy = end - offset) > 0) { - if (copy > len) - copy = len; - if (skb_store_bits(list, offset - start, - from, copy)) - goto fault; - if ((len -= copy) == 0) - return 0; - offset += copy; - from += copy; - } - start = end; - } - } - if (!len) - return 0; - -fault: - return -EFAULT; -} - -EXPORT_SYMBOL(skb_store_bits); - -/* Checksum skb data. */ - -__wsum skb_checksum(const struct sk_buff *skb, int offset, - int len, __wsum csum) -{ - int start = skb_headlen(skb); - int i, copy = start - offset; - int pos = 0; - - /* Checksum header. */ - if (copy > 0) { - if (copy > len) - copy = len; - csum = csum_partial(skb->data + offset, copy, csum); - if ((len -= copy) == 0) - return csum; - offset += copy; - pos = copy; - } - - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - int end; - - WARN_ON(start > offset + len); - - end = start + skb_shinfo(skb)->frags[i].size; - if ((copy = end - offset) > 0) { - __wsum csum2; - u8 *vaddr; - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - - if (copy > len) - copy = len; - vaddr = kmap_skb_frag(frag); - csum2 = csum_partial(vaddr + frag->page_offset + - offset - start, copy, 0); - kunmap_skb_frag(vaddr); - csum = csum_block_add(csum, csum2, pos); - if (!(len -= copy)) - return csum; - offset += copy; - pos += copy; - } - start = end; - } - - if (skb_shinfo(skb)->frag_list) { - struct sk_buff *list = skb_shinfo(skb)->frag_list; - - for (; list; list = list->next) { - int end; - - WARN_ON(start > offset + len); - - end = start + list->len; - if ((copy = end - offset) > 0) { - __wsum csum2; - if (copy > len) - copy = len; - csum2 = skb_checksum(list, offset - start, - copy, 0); - csum = csum_block_add(csum, csum2, pos); - if ((len -= copy) == 0) - return csum; - offset += copy; - pos += copy; - } - start = end; - } - } - BUG_ON(len); - - return csum; -} - -/* Both of above in one bottle. */ - -__wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, - u8 *to, int len, __wsum csum) -{ - int start = skb_headlen(skb); - int i, copy = start - offset; - int pos = 0; - - /* Copy header. */ - if (copy > 0) { - if (copy > len) - copy = len; - csum = csum_partial_copy_nocheck(skb->data + offset, to, - copy, csum); - if ((len -= copy) == 0) - return csum; - offset += copy; - to += copy; - pos = copy; - } - - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - int end; - - WARN_ON(start > offset + len); - - end = start + skb_shinfo(skb)->frags[i].size; - if ((copy = end - offset) > 0) { - __wsum csum2; - u8 *vaddr; - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - - if (copy > len) - copy = len; - vaddr = kmap_skb_frag(frag); - csum2 = csum_partial_copy_nocheck(vaddr + - frag->page_offset + - offset - start, to, - copy, 0); - kunmap_skb_frag(vaddr); - csum = csum_block_add(csum, csum2, pos); - if (!(len -= copy)) - return csum; - offset += copy; - to += copy; - pos += copy; - } - start = end; - } - - if (skb_shinfo(skb)->frag_list) { - struct sk_buff *list = skb_shinfo(skb)->frag_list; - - for (; list; list = list->next) { - __wsum csum2; - int end; - - WARN_ON(start > offset + len); - - end = start + list->len; - if ((copy = end - offset) > 0) { - if (copy > len) - copy = len; - csum2 = skb_copy_and_csum_bits(list, - offset - start, - to, copy, 0); - csum = csum_block_add(csum, csum2, pos); - if ((len -= copy) == 0) - return csum; - offset += copy; - to += copy; - pos += copy; - } - start = end; - } - } - BUG_ON(len); - return csum; -} - -void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) -{ - __wsum csum; - long csstart; - - if (skb->ip_summed == CHECKSUM_PARTIAL) - csstart = skb->csum_start - skb_headroom(skb); - else - csstart = skb_headlen(skb); - - BUG_ON(csstart > skb_headlen(skb)); - - skb_copy_from_linear_data(skb, to, csstart); - - csum = 0; - if (csstart != skb->len) - csum = skb_copy_and_csum_bits(skb, csstart, to + csstart, - skb->len - csstart, 0); - - if (skb->ip_summed == CHECKSUM_PARTIAL) { - long csstuff = csstart + skb->csum_offset; - - *((__sum16 *)(to + csstuff)) = csum_fold(csum); - } -} - -/** - * skb_dequeue - remove from the head of the queue - * @list: list to dequeue from - * - * Remove the head of the list. The list lock is taken so the function - * may be used safely with other locking list functions. The head item is - * returned or %NULL if the list is empty. - */ - -struct sk_buff *skb_dequeue(struct sk_buff_head *list) -{ - unsigned long flags; - struct sk_buff *result; - - spin_lock_irqsave(&list->lock, flags); - result = __skb_dequeue(list); - spin_unlock_irqrestore(&list->lock, flags); - return result; -} - -/** - * skb_dequeue_tail - remove from the tail of the queue - * @list: list to dequeue from - * - * Remove the tail of the list. The list lock is taken so the function - * may be used safely with other locking list functions. The tail item is - * returned or %NULL if the list is empty. - */ -struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list) -{ - unsigned long flags; - struct sk_buff *result; - - spin_lock_irqsave(&list->lock, flags); - result = __skb_dequeue_tail(list); - spin_unlock_irqrestore(&list->lock, flags); - return result; -} - -/** - * skb_queue_purge - empty a list - * @list: list to empty - * - * Delete all buffers on an &sk_buff list. Each buffer is removed from - * the list and one reference dropped. This function takes the list - * lock and is atomic with respect to other list locking functions. - */ -void skb_queue_purge(struct sk_buff_head *list) -{ - struct sk_buff *skb; - while ((skb = skb_dequeue(list)) != NULL) - kfree_skb(skb); -} - -/** - * skb_queue_head - queue a buffer at the list head - * @list: list to use - * @newsk: buffer to queue - * - * Queue a buffer at the start of the list. This function takes the - * list lock and can be used safely with other locking &sk_buff functions - * safely. - * - * A buffer cannot be placed on two lists at the same time. - */ -void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk) -{ - unsigned long flags; - - spin_lock_irqsave(&list->lock, flags); - __skb_queue_head(list, newsk); - spin_unlock_irqrestore(&list->lock, flags); -} - -/** - * skb_queue_tail - queue a buffer at the list tail - * @list: list to use - * @newsk: buffer to queue - * - * Queue a buffer at the tail of the list. This function takes the - * list lock and can be used safely with other locking &sk_buff functions - * safely. - * - * A buffer cannot be placed on two lists at the same time. - */ -void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) -{ - unsigned long flags; - - spin_lock_irqsave(&list->lock, flags); - __skb_queue_tail(list, newsk); - spin_unlock_irqrestore(&list->lock, flags); -} - -/** - * skb_unlink - remove a buffer from a list - * @skb: buffer to remove - * @list: list to use - * - * Remove a packet from a list. The list locks are taken and this - * function is atomic with respect to other list locked calls - * - * You must know what list the SKB is on. - */ -void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) -{ - unsigned long flags; - - spin_lock_irqsave(&list->lock, flags); - __skb_unlink(skb, list); - spin_unlock_irqrestore(&list->lock, flags); -} - -/** - * skb_append - append a buffer - * @old: buffer to insert after - * @newsk: buffer to insert - * @list: list to use - * - * Place a packet after a given packet in a list. The list locks are taken - * and this function is atomic with respect to other list locked calls. - * A buffer cannot be placed on two lists at the same time. - */ -void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list) -{ - unsigned long flags; - - spin_lock_irqsave(&list->lock, flags); - __skb_queue_after(list, old, newsk); - spin_unlock_irqrestore(&list->lock, flags); -} - - -/** - * skb_insert - insert a buffer - * @old: buffer to insert before - * @newsk: buffer to insert - * @list: list to use - * - * Place a packet before a given packet in a list. The list locks are - * taken and this function is atomic with respect to other list locked - * calls. - * - * A buffer cannot be placed on two lists at the same time. - */ -void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list) -{ - unsigned long flags; - - spin_lock_irqsave(&list->lock, flags); - __skb_insert(newsk, old->prev, old, list); - spin_unlock_irqrestore(&list->lock, flags); -} - -static inline void skb_split_inside_header(struct sk_buff *skb, - struct sk_buff* skb1, - const u32 len, const int pos) -{ - int i; - - skb_copy_from_linear_data_offset(skb, len, skb_put(skb1, pos - len), - pos - len); - /* And move data appendix as is. */ - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) - skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i]; - - skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags; - skb_shinfo(skb)->nr_frags = 0; - skb1->data_len = skb->data_len; - skb1->len += skb1->data_len; - skb->data_len = 0; - skb->len = len; - skb_set_tail_pointer(skb, len); -} - -static inline void skb_split_no_header(struct sk_buff *skb, - struct sk_buff* skb1, - const u32 len, int pos) -{ - int i, k = 0; - const int nfrags = skb_shinfo(skb)->nr_frags; - - skb_shinfo(skb)->nr_frags = 0; - skb1->len = skb1->data_len = skb->len - len; - skb->len = len; - skb->data_len = len - pos; - - for (i = 0; i < nfrags; i++) { - int size = skb_shinfo(skb)->frags[i].size; - - if (pos + size > len) { - skb_shinfo(skb1)->frags[k] = skb_shinfo(skb)->frags[i]; - - if (pos < len) { - /* Split frag. - * We have two variants in this case: - * 1. Move all the frag to the second - * part, if it is possible. F.e. - * this approach is mandatory for TUX, - * where splitting is expensive. - * 2. Split is accurately. We make this. - */ - get_page(skb_shinfo(skb)->frags[i].page); - skb_shinfo(skb1)->frags[0].page_offset += len - pos; - skb_shinfo(skb1)->frags[0].size -= len - pos; - skb_shinfo(skb)->frags[i].size = len - pos; - skb_shinfo(skb)->nr_frags++; - } - k++; - } else - skb_shinfo(skb)->nr_frags++; - pos += size; - } - skb_shinfo(skb1)->nr_frags = k; -} - -/** - * skb_split - Split fragmented skb to two parts at length len. - * @skb: the buffer to split - * @skb1: the buffer to receive the second part - * @len: new length for skb - */ -void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) -{ - int pos = skb_headlen(skb); - - if (len < pos) /* Split line is inside header. */ - skb_split_inside_header(skb, skb1, len, pos); - else /* Second chunk has no header, nothing to copy. */ - skb_split_no_header(skb, skb1, len, pos); -} - -/* Shifting from/to a cloned skb is a no-go. - * - * Caller cannot keep skb_shinfo related pointers past calling here! - */ -static int skb_prepare_for_shift(struct sk_buff *skb) -{ - return skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC); -} - -/** - * skb_shift - Shifts paged data partially from skb to another - * @tgt: buffer into which tail data gets added - * @skb: buffer from which the paged data comes from - * @shiftlen: shift up to this many bytes - * - * Attempts to shift up to shiftlen worth of bytes, which may be less than - * the length of the skb, from tgt to skb. Returns number bytes shifted. - * It's up to caller to free skb if everything was shifted. - * - * If @tgt runs out of frags, the whole operation is aborted. - * - * Skb cannot include anything else but paged data while tgt is allowed - * to have non-paged data as well. - * - * TODO: full sized shift could be optimized but that would need - * specialized skb free'er to handle frags without up-to-date nr_frags. - */ -int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) -{ - int from, to, merge, todo; - struct skb_frag_struct *fragfrom, *fragto; - - BUG_ON(shiftlen > skb->len); - BUG_ON(skb_headlen(skb)); /* Would corrupt stream */ - - todo = shiftlen; - from = 0; - to = skb_shinfo(tgt)->nr_frags; - fragfrom = &skb_shinfo(skb)->frags[from]; - - /* Actual merge is delayed until the point when we know we can - * commit all, so that we don't have to undo partial changes - */ - if (!to || - !skb_can_coalesce(tgt, to, fragfrom->page, fragfrom->page_offset)) { - merge = -1; - } else { - merge = to - 1; - - todo -= fragfrom->size; - if (todo < 0) { - if (skb_prepare_for_shift(skb) || - skb_prepare_for_shift(tgt)) - return 0; - - /* All previous frag pointers might be stale! */ - fragfrom = &skb_shinfo(skb)->frags[from]; - fragto = &skb_shinfo(tgt)->frags[merge]; - - fragto->size += shiftlen; - fragfrom->size -= shiftlen; - fragfrom->page_offset += shiftlen; - - goto onlymerged; - } - - from++; - } - - /* Skip full, not-fitting skb to avoid expensive operations */ - if ((shiftlen == skb->len) && - (skb_shinfo(skb)->nr_frags - from) > (MAX_SKB_FRAGS - to)) - return 0; - - if (skb_prepare_for_shift(skb) || skb_prepare_for_shift(tgt)) - return 0; - - while ((todo > 0) && (from < skb_shinfo(skb)->nr_frags)) { - if (to == MAX_SKB_FRAGS) - return 0; - - fragfrom = &skb_shinfo(skb)->frags[from]; - fragto = &skb_shinfo(tgt)->frags[to]; - - if (todo >= fragfrom->size) { - *fragto = *fragfrom; - todo -= fragfrom->size; - from++; - to++; - - } else { - get_page(fragfrom->page); - fragto->page = fragfrom->page; - fragto->page_offset = fragfrom->page_offset; - fragto->size = todo; - - fragfrom->page_offset += todo; - fragfrom->size -= todo; - todo = 0; - - to++; - break; - } - } - - /* Ready to "commit" this state change to tgt */ - skb_shinfo(tgt)->nr_frags = to; - - if (merge >= 0) { - fragfrom = &skb_shinfo(skb)->frags[0]; - fragto = &skb_shinfo(tgt)->frags[merge]; - - fragto->size += fragfrom->size; - put_page(fragfrom->page); - } - - /* Reposition in the original skb */ - to = 0; - while (from < skb_shinfo(skb)->nr_frags) - skb_shinfo(skb)->frags[to++] = skb_shinfo(skb)->frags[from++]; - skb_shinfo(skb)->nr_frags = to; - - BUG_ON(todo > 0 && !skb_shinfo(skb)->nr_frags); - -onlymerged: - /* Most likely the tgt won't ever need its checksum anymore, skb on - * the other hand might need it if it needs to be resent - */ - tgt->ip_summed = CHECKSUM_PARTIAL; - skb->ip_summed = CHECKSUM_PARTIAL; - - /* Yak, is it really working this way? Some helper please? */ - skb->len -= shiftlen; - skb->data_len -= shiftlen; - skb->truesize -= shiftlen; - tgt->len += shiftlen; - tgt->data_len += shiftlen; - tgt->truesize += shiftlen; - - return shiftlen; -} - -/** - * skb_prepare_seq_read - Prepare a sequential read of skb data - * @skb: the buffer to read - * @from: lower offset of data to be read - * @to: upper offset of data to be read - * @st: state variable - * - * Initializes the specified state variable. Must be called before - * invoking skb_seq_read() for the first time. - */ -void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from, - unsigned int to, struct skb_seq_state *st) -{ - st->lower_offset = from; - st->upper_offset = to; - st->root_skb = st->cur_skb = skb; - st->frag_idx = st->stepped_offset = 0; - st->frag_data = NULL; -} - -/** - * skb_seq_read - Sequentially read skb data - * @consumed: number of bytes consumed by the caller so far - * @data: destination pointer for data to be returned - * @st: state variable - * - * Reads a block of skb data at &consumed relative to the - * lower offset specified to skb_prepare_seq_read(). Assigns - * the head of the data block to &data and returns the length - * of the block or 0 if the end of the skb data or the upper - * offset has been reached. - * - * The caller is not required to consume all of the data - * returned, i.e. &consumed is typically set to the number - * of bytes already consumed and the next call to - * skb_seq_read() will return the remaining part of the block. - * - * Note 1: The size of each block of data returned can be arbitary, - * this limitation is the cost for zerocopy seqeuental - * reads of potentially non linear data. - * - * Note 2: Fragment lists within fragments are not implemented - * at the moment, state->root_skb could be replaced with - * a stack for this purpose. - */ -unsigned int skb_seq_read(unsigned int consumed, const u8 **data, - struct skb_seq_state *st) -{ - unsigned int block_limit, abs_offset = consumed + st->lower_offset; - skb_frag_t *frag; - - if (unlikely(abs_offset >= st->upper_offset)) - return 0; - -next_skb: - block_limit = skb_headlen(st->cur_skb) + st->stepped_offset; - - if (abs_offset < block_limit) { - *data = st->cur_skb->data + (abs_offset - st->stepped_offset); - return block_limit - abs_offset; - } - - if (st->frag_idx == 0 && !st->frag_data) - st->stepped_offset += skb_headlen(st->cur_skb); - - while (st->frag_idx < skb_shinfo(st->cur_skb)->nr_frags) { - frag = &skb_shinfo(st->cur_skb)->frags[st->frag_idx]; - block_limit = frag->size + st->stepped_offset; - - if (abs_offset < block_limit) { - if (!st->frag_data) - st->frag_data = kmap_skb_frag(frag); - - *data = (u8 *) st->frag_data + frag->page_offset + - (abs_offset - st->stepped_offset); - - return block_limit - abs_offset; - } - - if (st->frag_data) { - kunmap_skb_frag(st->frag_data); - st->frag_data = NULL; - } - - st->frag_idx++; - st->stepped_offset += frag->size; - } - - if (st->frag_data) { - kunmap_skb_frag(st->frag_data); - st->frag_data = NULL; - } - - if (st->root_skb == st->cur_skb && - skb_shinfo(st->root_skb)->frag_list) { - st->cur_skb = skb_shinfo(st->root_skb)->frag_list; - st->frag_idx = 0; - goto next_skb; - } else if (st->cur_skb->next) { - st->cur_skb = st->cur_skb->next; - st->frag_idx = 0; - goto next_skb; - } - - return 0; -} - -/** - * skb_abort_seq_read - Abort a sequential read of skb data - * @st: state variable - * - * Must be called if skb_seq_read() was not called until it - * returned 0. - */ -void skb_abort_seq_read(struct skb_seq_state *st) -{ - if (st->frag_data) - kunmap_skb_frag(st->frag_data); -} - -#define TS_SKB_CB(state) ((struct skb_seq_state *) &((state)->cb)) - -static unsigned int skb_ts_get_next_block(unsigned int offset, const u8 **text, - struct ts_config *conf, - struct ts_state *state) -{ - return skb_seq_read(offset, text, TS_SKB_CB(state)); -} - -static void skb_ts_finish(struct ts_config *conf, struct ts_state *state) -{ - skb_abort_seq_read(TS_SKB_CB(state)); -} - -/** - * skb_find_text - Find a text pattern in skb data - * @skb: the buffer to look in - * @from: search offset - * @to: search limit - * @config: textsearch configuration - * @state: uninitialized textsearch state variable - * - * Finds a pattern in the skb data according to the specified - * textsearch configuration. Use textsearch_next() to retrieve - * subsequent occurrences of the pattern. Returns the offset - * to the first occurrence or UINT_MAX if no match was found. - */ -unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, - unsigned int to, struct ts_config *config, - struct ts_state *state) -{ - unsigned int ret; - - config->get_next_block = skb_ts_get_next_block; - config->finish = skb_ts_finish; - - skb_prepare_seq_read(skb, from, to, TS_SKB_CB(state)); - - ret = textsearch_find(config, state); - return (ret <= to - from ? ret : UINT_MAX); -} - -/** - * skb_append_datato_frags: - append the user data to a skb - * @sk: sock structure - * @skb: skb structure to be appened with user data. - * @getfrag: call back function to be used for getting the user data - * @from: pointer to user message iov - * @length: length of the iov message - * - * Description: This procedure append the user data in the fragment part - * of the skb if any page alloc fails user this procedure returns -ENOMEM - */ -int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, - int (*getfrag)(void *from, char *to, int offset, - int len, int odd, struct sk_buff *skb), - void *from, int length) -{ - int frg_cnt = 0; - skb_frag_t *frag = NULL; - struct page *page = NULL; - int copy, left; - int offset = 0; - int ret; - - do { - /* Return error if we don't have space for new frag */ - frg_cnt = skb_shinfo(skb)->nr_frags; - if (frg_cnt >= MAX_SKB_FRAGS) - return -EFAULT; - - /* allocate a new page for next frag */ - page = alloc_pages(sk->sk_allocation, 0); - - /* If alloc_page fails just return failure and caller will - * free previous allocated pages by doing kfree_skb() - */ - if (page == NULL) - return -ENOMEM; - - /* initialize the next frag */ - sk->sk_sndmsg_page = page; - sk->sk_sndmsg_off = 0; - skb_fill_page_desc(skb, frg_cnt, page, 0, 0); - skb->truesize += PAGE_SIZE; - atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc); - - /* get the new initialized frag */ - frg_cnt = skb_shinfo(skb)->nr_frags; - frag = &skb_shinfo(skb)->frags[frg_cnt - 1]; - - /* copy the user data to page */ - left = PAGE_SIZE - frag->page_offset; - copy = (length > left)? left : length; - - ret = getfrag(from, (page_address(frag->page) + - frag->page_offset + frag->size), - offset, copy, 0, skb); - if (ret < 0) - return -EFAULT; - - /* copy was successful so update the size parameters */ - sk->sk_sndmsg_off += copy; - frag->size += copy; - skb->len += copy; - skb->data_len += copy; - offset += copy; - length -= copy; - - } while (length > 0); - - return 0; -} - -/** - * skb_pull_rcsum - pull skb and update receive checksum - * @skb: buffer to update - * @len: length of data pulled - * - * This function performs an skb_pull on the packet and updates - * the CHECKSUM_COMPLETE checksum. It should be used on - * receive path processing instead of skb_pull unless you know - * that the checksum difference is zero (e.g., a valid IP header) - * or you are setting ip_summed to CHECKSUM_NONE. - */ -unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len) -{ - BUG_ON(len > skb->len); - skb->len -= len; - BUG_ON(skb->len < skb->data_len); - skb_postpull_rcsum(skb, skb->data, len); - return skb->data += len; -} - -EXPORT_SYMBOL_GPL(skb_pull_rcsum); - -/** - * skb_segment - Perform protocol segmentation on skb. - * @skb: buffer to segment - * @features: features for the output path (see dev->features) - * - * This function performs segmentation on the given skb. It returns - * a pointer to the first in a list of new skbs for the segments. - * In case of error it returns ERR_PTR(err). - */ -struct sk_buff *skb_segment(struct sk_buff *skb, int features) -{ - struct sk_buff *segs = NULL; - struct sk_buff *tail = NULL; - struct sk_buff *fskb = skb_shinfo(skb)->frag_list; - unsigned int mss = skb_shinfo(skb)->gso_size; - unsigned int doffset = skb->data - skb_mac_header(skb); - unsigned int offset = doffset; - unsigned int headroom; - unsigned int len; - int sg = features & NETIF_F_SG; - int nfrags = skb_shinfo(skb)->nr_frags; - int err = -ENOMEM; - int i = 0; - int pos; - - __skb_push(skb, doffset); - headroom = skb_headroom(skb); - pos = skb_headlen(skb); - - do { - struct sk_buff *nskb; - skb_frag_t *frag; - int hsize; - int size; - - len = skb->len - offset; - if (len > mss) - len = mss; - - hsize = skb_headlen(skb) - offset; - if (hsize < 0) - hsize = 0; - if (hsize > len || !sg) - hsize = len; - - if (!hsize && i >= nfrags) { - BUG_ON(fskb->len != len); - - pos += len; - nskb = skb_clone(fskb, GFP_ATOMIC); - fskb = fskb->next; - - if (unlikely(!nskb)) - goto err; - - hsize = skb_end_pointer(nskb) - nskb->head; - if (skb_cow_head(nskb, doffset + headroom)) { - kfree_skb(nskb); - goto err; - } - - nskb->truesize += skb_end_pointer(nskb) - nskb->head - - hsize; - skb_release_head_state(nskb); - __skb_push(nskb, doffset); - } else { - nskb = alloc_skb(hsize + doffset + headroom, - GFP_ATOMIC); - - if (unlikely(!nskb)) - goto err; - - skb_reserve(nskb, headroom); - __skb_put(nskb, doffset); - } - - if (segs) - tail->next = nskb; - else - segs = nskb; - tail = nskb; - - __copy_skb_header(nskb, skb); - nskb->mac_len = skb->mac_len; - - skb_reset_mac_header(nskb); - skb_set_network_header(nskb, skb->mac_len); - nskb->transport_header = (nskb->network_header + - skb_network_header_len(skb)); - skb_copy_from_linear_data(skb, nskb->data, doffset); - - if (pos >= offset + len) - continue; - - if (!sg) { - nskb->ip_summed = CHECKSUM_NONE; - nskb->csum = skb_copy_and_csum_bits(skb, offset, - skb_put(nskb, len), - len, 0); - continue; - } - - frag = skb_shinfo(nskb)->frags; - - skb_copy_from_linear_data_offset(skb, offset, - skb_put(nskb, hsize), hsize); - - while (pos < offset + len && i < nfrags) { - *frag = skb_shinfo(skb)->frags[i]; - get_page(frag->page); - size = frag->size; - - if (pos < offset) { - frag->page_offset += offset - pos; - frag->size -= offset - pos; - } - - skb_shinfo(nskb)->nr_frags++; - - if (pos + size <= offset + len) { - i++; - pos += size; - } else { - frag->size -= pos + size - (offset + len); - goto skip_fraglist; - } - - frag++; - } - - if (pos < offset + len) { - struct sk_buff *fskb2 = fskb; - - BUG_ON(pos + fskb->len != offset + len); - - pos += fskb->len; - fskb = fskb->next; - - if (fskb2->next) { - fskb2 = skb_clone(fskb2, GFP_ATOMIC); - if (!fskb2) - goto err; - } else - skb_get(fskb2); - - BUG_ON(skb_shinfo(nskb)->frag_list); - skb_shinfo(nskb)->frag_list = fskb2; - } - -skip_fraglist: - nskb->data_len = len - hsize; - nskb->len += nskb->data_len; - nskb->truesize += nskb->data_len; - } while ((offset += len) < skb->len); - - return segs; - -err: - while ((skb = segs)) { - segs = skb->next; - kfree_skb(skb); - } - return ERR_PTR(err); -} - -EXPORT_SYMBOL_GPL(skb_segment); - -int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) -{ - struct sk_buff *p = *head; - struct sk_buff *nskb; - unsigned int headroom; - unsigned int hlen = p->data - skb_mac_header(p); - unsigned int len = skb->len; - - if (hlen + p->len + len >= 65536) - return -E2BIG; - - if (skb_shinfo(p)->frag_list) - goto merge; - else if (!skb_headlen(p) && !skb_headlen(skb) && - skb_shinfo(p)->nr_frags + skb_shinfo(skb)->nr_frags < - MAX_SKB_FRAGS) { - memcpy(skb_shinfo(p)->frags + skb_shinfo(p)->nr_frags, - skb_shinfo(skb)->frags, - skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t)); - - skb_shinfo(p)->nr_frags += skb_shinfo(skb)->nr_frags; - skb_shinfo(skb)->nr_frags = 0; - - skb->truesize -= skb->data_len; - skb->len -= skb->data_len; - skb->data_len = 0; - - NAPI_GRO_CB(skb)->free = 1; - goto done; - } - - headroom = skb_headroom(p); - nskb = netdev_alloc_skb(p->dev, headroom); - if (unlikely(!nskb)) - return -ENOMEM; - - __copy_skb_header(nskb, p); - nskb->mac_len = p->mac_len; - - skb_reserve(nskb, headroom); - - skb_set_mac_header(nskb, -hlen); - skb_set_network_header(nskb, skb_network_offset(p)); - skb_set_transport_header(nskb, skb_transport_offset(p)); - - memcpy(skb_mac_header(nskb), skb_mac_header(p), hlen); - - *NAPI_GRO_CB(nskb) = *NAPI_GRO_CB(p); - skb_shinfo(nskb)->frag_list = p; - skb_shinfo(nskb)->gso_size = skb_shinfo(p)->gso_size; - skb_header_release(p); - nskb->prev = p; - - nskb->data_len += p->len; - nskb->truesize += p->len; - nskb->len += p->len; - - *head = nskb; - nskb->next = p->next; - p->next = NULL; - - p = nskb; - -merge: - p->prev->next = skb; - p->prev = skb; - skb_header_release(skb); - -done: - NAPI_GRO_CB(p)->count++; - p->data_len += len; - p->truesize += len; - p->len += len; - - NAPI_GRO_CB(skb)->same_flow = 1; - return 0; -} -EXPORT_SYMBOL_GPL(skb_gro_receive); - -void __init skb_init(void) -{ - skbuff_head_cache = kmem_cache_create("skbuff_head_cache", - sizeof(struct sk_buff), - 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, - NULL); - skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache", - (2*sizeof(struct sk_buff)) + - sizeof(atomic_t), - 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, - NULL); -} - -/** - * skb_to_sgvec - Fill a scatter-gather list from a socket buffer - * @skb: Socket buffer containing the buffers to be mapped - * @sg: The scatter-gather list to map into - * @offset: The offset into the buffer's contents to start mapping - * @len: Length of buffer space to be mapped - * - * Fill the specified scatter-gather list with mappings/pointers into a - * region of the buffer space attached to a socket buffer. - */ -static int -__skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) -{ - int start = skb_headlen(skb); - int i, copy = start - offset; - int elt = 0; - - if (copy > 0) { - if (copy > len) - copy = len; - sg_set_buf(sg, skb->data + offset, copy); - elt++; - if ((len -= copy) == 0) - return elt; - offset += copy; - } - - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - int end; - - WARN_ON(start > offset + len); - - end = start + skb_shinfo(skb)->frags[i].size; - if ((copy = end - offset) > 0) { - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - - if (copy > len) - copy = len; - sg_set_page(&sg[elt], frag->page, copy, - frag->page_offset+offset-start); - elt++; - if (!(len -= copy)) - return elt; - offset += copy; - } - start = end; - } - - if (skb_shinfo(skb)->frag_list) { - struct sk_buff *list = skb_shinfo(skb)->frag_list; - - for (; list; list = list->next) { - int end; - - WARN_ON(start > offset + len); - - end = start + list->len; - if ((copy = end - offset) > 0) { - if (copy > len) - copy = len; - elt += __skb_to_sgvec(list, sg+elt, offset - start, - copy); - if ((len -= copy) == 0) - return elt; - offset += copy; - } - start = end; - } - } - BUG_ON(len); - return elt; -} - -int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) -{ - int nsg = __skb_to_sgvec(skb, sg, offset, len); - - sg_mark_end(&sg[nsg - 1]); - - return nsg; -} - -/** - * skb_cow_data - Check that a socket buffer's data buffers are writable - * @skb: The socket buffer to check. - * @tailbits: Amount of trailing space to be added - * @trailer: Returned pointer to the skb where the @tailbits space begins - * - * Make sure that the data buffers attached to a socket buffer are - * writable. If they are not, private copies are made of the data buffers - * and the socket buffer is set to use these instead. - * - * If @tailbits is given, make sure that there is space to write @tailbits - * bytes of data beyond current end of socket buffer. @trailer will be - * set to point to the skb in which this space begins. - * - * The number of scatterlist elements required to completely map the - * COW'd and extended socket buffer will be returned. - */ -int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer) -{ - int copyflag; - int elt; - struct sk_buff *skb1, **skb_p; - - /* If skb is cloned or its head is paged, reallocate - * head pulling out all the pages (pages are considered not writable - * at the moment even if they are anonymous). - */ - if ((skb_cloned(skb) || skb_shinfo(skb)->nr_frags) && - __pskb_pull_tail(skb, skb_pagelen(skb)-skb_headlen(skb)) == NULL) - return -ENOMEM; - - /* Easy case. Most of packets will go this way. */ - if (!skb_shinfo(skb)->frag_list) { - /* A little of trouble, not enough of space for trailer. - * This should not happen, when stack is tuned to generate - * good frames. OK, on miss we reallocate and reserve even more - * space, 128 bytes is fair. */ - - if (skb_tailroom(skb) < tailbits && - pskb_expand_head(skb, 0, tailbits-skb_tailroom(skb)+128, GFP_ATOMIC)) - return -ENOMEM; - - /* Voila! */ - *trailer = skb; - return 1; - } - - /* Misery. We are in troubles, going to mincer fragments... */ - - elt = 1; - skb_p = &skb_shinfo(skb)->frag_list; - copyflag = 0; - - while ((skb1 = *skb_p) != NULL) { - int ntail = 0; - - /* The fragment is partially pulled by someone, - * this can happen on input. Copy it and everything - * after it. */ - - if (skb_shared(skb1)) - copyflag = 1; - - /* If the skb is the last, worry about trailer. */ - - if (skb1->next == NULL && tailbits) { - if (skb_shinfo(skb1)->nr_frags || - skb_shinfo(skb1)->frag_list || - skb_tailroom(skb1) < tailbits) - ntail = tailbits + 128; - } - - if (copyflag || - skb_cloned(skb1) || - ntail || - skb_shinfo(skb1)->nr_frags || - skb_shinfo(skb1)->frag_list) { - struct sk_buff *skb2; - - /* Fuck, we are miserable poor guys... */ - if (ntail == 0) - skb2 = skb_copy(skb1, GFP_ATOMIC); - else - skb2 = skb_copy_expand(skb1, - skb_headroom(skb1), - ntail, - GFP_ATOMIC); - if (unlikely(skb2 == NULL)) - return -ENOMEM; - - if (skb1->sk) - skb_set_owner_w(skb2, skb1->sk); - - /* Looking around. Are we still alive? - * OK, link new skb, drop old one */ - - skb2->next = skb1->next; - *skb_p = skb2; - kfree_skb(skb1); - skb1 = skb2; - } - elt++; - *trailer = skb1; - skb_p = &skb1->next; - } - - return elt; -} - -/** - * skb_partial_csum_set - set up and verify partial csum values for packet - * @skb: the skb to set - * @start: the number of bytes after skb->data to start checksumming. - * @off: the offset from start to place the checksum. - * - * For untrusted partially-checksummed packets, we need to make sure the values - * for skb->csum_start and skb->csum_offset are valid so we don't oops. - * - * This function checks and sets those values and skb->ip_summed: if this - * returns false you should drop the packet. - */ -bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off) -{ - if (unlikely(start > skb->len - 2) || - unlikely((int)start + off > skb->len - 2)) { - if (net_ratelimit()) - printk(KERN_WARNING - "bad partial csum: csum=%u/%u len=%u\n", - start, off, skb->len); - return false; - } - skb->ip_summed = CHECKSUM_PARTIAL; - skb->csum_start = skb_headroom(skb) + start; - skb->csum_offset = off; - return true; -} - -void __skb_warn_lro_forwarding(const struct sk_buff *skb) -{ - if (net_ratelimit()) - pr_warning("%s: received packets cannot be forwarded" - " while LRO is enabled\n", skb->dev->name); -} - -EXPORT_SYMBOL(___pskb_trim); -EXPORT_SYMBOL(__kfree_skb); -EXPORT_SYMBOL(kfree_skb); -EXPORT_SYMBOL(__pskb_pull_tail); -EXPORT_SYMBOL(__alloc_skb); -EXPORT_SYMBOL(__netdev_alloc_skb); -EXPORT_SYMBOL(pskb_copy); -EXPORT_SYMBOL(pskb_expand_head); -EXPORT_SYMBOL(skb_checksum); -EXPORT_SYMBOL(skb_clone); -EXPORT_SYMBOL(skb_copy); -EXPORT_SYMBOL(skb_copy_and_csum_bits); -EXPORT_SYMBOL(skb_copy_and_csum_dev); -EXPORT_SYMBOL(skb_copy_bits); -EXPORT_SYMBOL(skb_copy_expand); -EXPORT_SYMBOL(skb_over_panic); -EXPORT_SYMBOL(skb_pad); -EXPORT_SYMBOL(skb_realloc_headroom); -EXPORT_SYMBOL(skb_under_panic); -EXPORT_SYMBOL(skb_dequeue); -EXPORT_SYMBOL(skb_dequeue_tail); -EXPORT_SYMBOL(skb_insert); -EXPORT_SYMBOL(skb_queue_purge); -EXPORT_SYMBOL(skb_queue_head); -EXPORT_SYMBOL(skb_queue_tail); -EXPORT_SYMBOL(skb_unlink); -EXPORT_SYMBOL(skb_append); -EXPORT_SYMBOL(skb_split); -EXPORT_SYMBOL(skb_prepare_seq_read); -EXPORT_SYMBOL(skb_seq_read); -EXPORT_SYMBOL(skb_abort_seq_read); -EXPORT_SYMBOL(skb_find_text); -EXPORT_SYMBOL(skb_append_datato_frags); -EXPORT_SYMBOL(__skb_warn_lro_forwarding); - -EXPORT_SYMBOL_GPL(skb_to_sgvec); -EXPORT_SYMBOL_GPL(skb_cow_data); -EXPORT_SYMBOL_GPL(skb_partial_csum_set); diff --git a/libdde_linux26/lib/src/net/core/utils.c b/libdde_linux26/lib/src/net/core/utils.c deleted file mode 100644 index 5d10a675..00000000 --- a/libdde_linux26/lib/src/net/core/utils.c +++ /dev/null @@ -1,309 +0,0 @@ -/* - * Generic address resultion entity - * - * Authors: - * net_random Alan Cox - * net_ratelimit Andi Kleen - * in{4,6}_pton YOSHIFUJI Hideaki, Copyright (C)2006 USAGI/WIDE Project - * - * Created by Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/module.h> -#include <linux/jiffies.h> -#include <linux/kernel.h> -#include <linux/inet.h> -#include <linux/mm.h> -#include <linux/net.h> -#include <linux/string.h> -#include <linux/types.h> -#include <linux/random.h> -#include <linux/percpu.h> -#include <linux/init.h> -#include <net/sock.h> - -#include <asm/byteorder.h> -#include <asm/system.h> -#include <asm/uaccess.h> - -#ifndef DDE_LINUX -int net_msg_cost __read_mostly = 5*HZ; -DEFINE_RATELIMIT_STATE(net_ratelimit_state, 5 * HZ, 10); -#else -int net_msg_cost = 500; -#endif /* DDE_LINUX */ -int net_msg_burst __read_mostly = 10; -int net_msg_warn __read_mostly = 1; -EXPORT_SYMBOL(net_msg_warn); - -/* - * All net warning printk()s should be guarded by this function. - */ -int net_ratelimit(void) -{ -#ifndef DDE_LINUX - return __ratelimit(&net_ratelimit_state); -#else - return 0; -#endif -} -EXPORT_SYMBOL(net_ratelimit); - -/* - * Convert an ASCII string to binary IP. - * This is outside of net/ipv4/ because various code that uses IP addresses - * is otherwise not dependent on the TCP/IP stack. - */ - -__be32 in_aton(const char *str) -{ - unsigned long l; - unsigned int val; - int i; - - l = 0; - for (i = 0; i < 4; i++) - { - l <<= 8; - if (*str != '\0') - { - val = 0; - while (*str != '\0' && *str != '.' && *str != '\n') - { - val *= 10; - val += *str - '0'; - str++; - } - l |= val; - if (*str != '\0') - str++; - } - } - return(htonl(l)); -} - -EXPORT_SYMBOL(in_aton); - -#define IN6PTON_XDIGIT 0x00010000 -#define IN6PTON_DIGIT 0x00020000 -#define IN6PTON_COLON_MASK 0x00700000 -#define IN6PTON_COLON_1 0x00100000 /* single : requested */ -#define IN6PTON_COLON_2 0x00200000 /* second : requested */ -#define IN6PTON_COLON_1_2 0x00400000 /* :: requested */ -#define IN6PTON_DOT 0x00800000 /* . */ -#define IN6PTON_DELIM 0x10000000 -#define IN6PTON_NULL 0x20000000 /* first/tail */ -#define IN6PTON_UNKNOWN 0x40000000 - -static inline int xdigit2bin(char c, int delim) -{ - if (c == delim || c == '\0') - return IN6PTON_DELIM; - if (c == ':') - return IN6PTON_COLON_MASK; - if (c == '.') - return IN6PTON_DOT; - if (c >= '0' && c <= '9') - return (IN6PTON_XDIGIT | IN6PTON_DIGIT| (c - '0')); - if (c >= 'a' && c <= 'f') - return (IN6PTON_XDIGIT | (c - 'a' + 10)); - if (c >= 'A' && c <= 'F') - return (IN6PTON_XDIGIT | (c - 'A' + 10)); - if (delim == -1) - return IN6PTON_DELIM; - return IN6PTON_UNKNOWN; -} - -int in4_pton(const char *src, int srclen, - u8 *dst, - int delim, const char **end) -{ - const char *s; - u8 *d; - u8 dbuf[4]; - int ret = 0; - int i; - int w = 0; - - if (srclen < 0) - srclen = strlen(src); - s = src; - d = dbuf; - i = 0; - while(1) { - int c; - c = xdigit2bin(srclen > 0 ? *s : '\0', delim); - if (!(c & (IN6PTON_DIGIT | IN6PTON_DOT | IN6PTON_DELIM | IN6PTON_COLON_MASK))) { - goto out; - } - if (c & (IN6PTON_DOT | IN6PTON_DELIM | IN6PTON_COLON_MASK)) { - if (w == 0) - goto out; - *d++ = w & 0xff; - w = 0; - i++; - if (c & (IN6PTON_DELIM | IN6PTON_COLON_MASK)) { - if (i != 4) - goto out; - break; - } - goto cont; - } - w = (w * 10) + c; - if ((w & 0xffff) > 255) { - goto out; - } -cont: - if (i >= 4) - goto out; - s++; - srclen--; - } - ret = 1; - memcpy(dst, dbuf, sizeof(dbuf)); -out: - if (end) - *end = s; - return ret; -} - -EXPORT_SYMBOL(in4_pton); - -int in6_pton(const char *src, int srclen, - u8 *dst, - int delim, const char **end) -{ - const char *s, *tok = NULL; - u8 *d, *dc = NULL; - u8 dbuf[16]; - int ret = 0; - int i; - int state = IN6PTON_COLON_1_2 | IN6PTON_XDIGIT | IN6PTON_NULL; - int w = 0; - - memset(dbuf, 0, sizeof(dbuf)); - - s = src; - d = dbuf; - if (srclen < 0) - srclen = strlen(src); - - while (1) { - int c; - - c = xdigit2bin(srclen > 0 ? *s : '\0', delim); - if (!(c & state)) - goto out; - if (c & (IN6PTON_DELIM | IN6PTON_COLON_MASK)) { - /* process one 16-bit word */ - if (!(state & IN6PTON_NULL)) { - *d++ = (w >> 8) & 0xff; - *d++ = w & 0xff; - } - w = 0; - if (c & IN6PTON_DELIM) { - /* We've processed last word */ - break; - } - /* - * COLON_1 => XDIGIT - * COLON_2 => XDIGIT|DELIM - * COLON_1_2 => COLON_2 - */ - switch (state & IN6PTON_COLON_MASK) { - case IN6PTON_COLON_2: - dc = d; - state = IN6PTON_XDIGIT | IN6PTON_DELIM; - if (dc - dbuf >= sizeof(dbuf)) - state |= IN6PTON_NULL; - break; - case IN6PTON_COLON_1|IN6PTON_COLON_1_2: - state = IN6PTON_XDIGIT | IN6PTON_COLON_2; - break; - case IN6PTON_COLON_1: - state = IN6PTON_XDIGIT; - break; - case IN6PTON_COLON_1_2: - state = IN6PTON_COLON_2; - break; - default: - state = 0; - } - tok = s + 1; - goto cont; - } - - if (c & IN6PTON_DOT) { - ret = in4_pton(tok ? tok : s, srclen + (int)(s - tok), d, delim, &s); - if (ret > 0) { - d += 4; - break; - } - goto out; - } - - w = (w << 4) | (0xff & c); - state = IN6PTON_COLON_1 | IN6PTON_DELIM; - if (!(w & 0xf000)) { - state |= IN6PTON_XDIGIT; - } - if (!dc && d + 2 < dbuf + sizeof(dbuf)) { - state |= IN6PTON_COLON_1_2; - state &= ~IN6PTON_DELIM; - } - if (d + 2 >= dbuf + sizeof(dbuf)) { - state &= ~(IN6PTON_COLON_1|IN6PTON_COLON_1_2); - } -cont: - if ((dc && d + 4 < dbuf + sizeof(dbuf)) || - d + 4 == dbuf + sizeof(dbuf)) { - state |= IN6PTON_DOT; - } - if (d >= dbuf + sizeof(dbuf)) { - state &= ~(IN6PTON_XDIGIT|IN6PTON_COLON_MASK); - } - s++; - srclen--; - } - - i = 15; d--; - - if (dc) { - while(d >= dc) - dst[i--] = *d--; - while(i >= dc - dbuf) - dst[i--] = 0; - while(i >= 0) - dst[i--] = *d--; - } else - memcpy(dst, dbuf, sizeof(dbuf)); - - ret = 1; -out: - if (end) - *end = s; - return ret; -} - -EXPORT_SYMBOL(in6_pton); - -void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb, - __be32 from, __be32 to, int pseudohdr) -{ - __be32 diff[] = { ~from, to }; - if (skb->ip_summed != CHECKSUM_PARTIAL) { - *sum = csum_fold(csum_partial(diff, sizeof(diff), - ~csum_unfold(*sum))); - if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr) - skb->csum = ~csum_partial(diff, sizeof(diff), - ~skb->csum); - } else if (pseudohdr) - *sum = ~csum_fold(csum_partial(diff, sizeof(diff), - csum_unfold(*sum))); -} -EXPORT_SYMBOL(inet_proto_csum_replace4); diff --git a/libdde_linux26/lib/src/net/netlink/af_netlink.c b/libdde_linux26/lib/src/net/netlink/af_netlink.c deleted file mode 100644 index 3f00a014..00000000 --- a/libdde_linux26/lib/src/net/netlink/af_netlink.c +++ /dev/null @@ -1,2013 +0,0 @@ -/* - * NETLINK Kernel-user communication protocol. - * - * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk> - * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * Tue Jun 26 14:36:48 MEST 2001 Herbert "herp" Rosmanith - * added netlink_proto_exit - * Tue Jan 22 18:32:44 BRST 2002 Arnaldo C. de Melo <acme@conectiva.com.br> - * use nlk_sk, as sk->protinfo is on a diet 8) - * Fri Jul 22 19:51:12 MEST 2005 Harald Welte <laforge@gnumonks.org> - * - inc module use count of module that owns - * the kernel socket in case userspace opens - * socket of same protocol - * - remove all module support, since netlink is - * mandatory if CONFIG_NET=y these days - */ - -#include <linux/module.h> - -#include <linux/capability.h> -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/signal.h> -#include <linux/sched.h> -#include <linux/errno.h> -#include <linux/string.h> -#include <linux/stat.h> -#include <linux/socket.h> -#include <linux/un.h> -#include <linux/fcntl.h> -#include <linux/termios.h> -#include <linux/sockios.h> -#include <linux/net.h> -#include <linux/fs.h> -#include <linux/slab.h> -#include <asm/uaccess.h> -#include <linux/skbuff.h> -#include <linux/netdevice.h> -#include <linux/rtnetlink.h> -#include <linux/proc_fs.h> -#include <linux/seq_file.h> -#include <linux/notifier.h> -#include <linux/security.h> -#include <linux/jhash.h> -#include <linux/jiffies.h> -#include <linux/random.h> -#include <linux/bitops.h> -#include <linux/mm.h> -#include <linux/types.h> -#include <linux/audit.h> -#include <linux/mutex.h> - -#include <net/net_namespace.h> -#include <net/sock.h> -#include <net/scm.h> -#include <net/netlink.h> - -#define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8) -#define NLGRPLONGS(x) (NLGRPSZ(x)/sizeof(unsigned long)) - -struct netlink_sock { - /* struct sock has to be the first member of netlink_sock */ - struct sock sk; - u32 pid; - u32 dst_pid; - u32 dst_group; - u32 flags; - u32 subscriptions; - u32 ngroups; - unsigned long *groups; - unsigned long state; - wait_queue_head_t wait; - struct netlink_callback *cb; - struct mutex *cb_mutex; - struct mutex cb_def_mutex; - void (*netlink_rcv)(struct sk_buff *skb); - struct module *module; -}; - -#define NETLINK_KERNEL_SOCKET 0x1 -#define NETLINK_RECV_PKTINFO 0x2 - -static inline struct netlink_sock *nlk_sk(struct sock *sk) -{ - return container_of(sk, struct netlink_sock, sk); -} - -static inline int netlink_is_kernel(struct sock *sk) -{ - return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET; -} - -struct nl_pid_hash { - struct hlist_head *table; - unsigned long rehash_time; - - unsigned int mask; - unsigned int shift; - - unsigned int entries; - unsigned int max_shift; - - u32 rnd; -}; - -struct netlink_table { - struct nl_pid_hash hash; - struct hlist_head mc_list; - unsigned long *listeners; - unsigned int nl_nonroot; - unsigned int groups; - struct mutex *cb_mutex; - struct module *module; - int registered; -}; - -static struct netlink_table *nl_table; - -static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait); - -static int netlink_dump(struct sock *sk); -static void netlink_destroy_callback(struct netlink_callback *cb); - -static DEFINE_RWLOCK(nl_table_lock); -static atomic_t nl_table_users = ATOMIC_INIT(0); - -static ATOMIC_NOTIFIER_HEAD(netlink_chain); - -static u32 netlink_group_mask(u32 group) -{ - return group ? 1 << (group - 1) : 0; -} - -static struct hlist_head *nl_pid_hashfn(struct nl_pid_hash *hash, u32 pid) -{ - return &hash->table[jhash_1word(pid, hash->rnd) & hash->mask]; -} - -static void netlink_sock_destruct(struct sock *sk) -{ - struct netlink_sock *nlk = nlk_sk(sk); - - if (nlk->cb) { - if (nlk->cb->done) - nlk->cb->done(nlk->cb); - netlink_destroy_callback(nlk->cb); - } - - skb_queue_purge(&sk->sk_receive_queue); - - if (!sock_flag(sk, SOCK_DEAD)) { - printk(KERN_ERR "Freeing alive netlink socket %p\n", sk); - return; - } - - WARN_ON(atomic_read(&sk->sk_rmem_alloc)); - WARN_ON(atomic_read(&sk->sk_wmem_alloc)); - WARN_ON(nlk_sk(sk)->groups); -} - -/* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on - * SMP. Look, when several writers sleep and reader wakes them up, all but one - * immediately hit write lock and grab all the cpus. Exclusive sleep solves - * this, _but_ remember, it adds useless work on UP machines. - */ - -static void netlink_table_grab(void) - __acquires(nl_table_lock) -{ - write_lock_irq(&nl_table_lock); - - if (atomic_read(&nl_table_users)) { - DECLARE_WAITQUEUE(wait, current); - - add_wait_queue_exclusive(&nl_table_wait, &wait); - for (;;) { - set_current_state(TASK_UNINTERRUPTIBLE); - if (atomic_read(&nl_table_users) == 0) - break; - write_unlock_irq(&nl_table_lock); - schedule(); - write_lock_irq(&nl_table_lock); - } - - __set_current_state(TASK_RUNNING); - remove_wait_queue(&nl_table_wait, &wait); - } -} - -static void netlink_table_ungrab(void) - __releases(nl_table_lock) -{ - write_unlock_irq(&nl_table_lock); - wake_up(&nl_table_wait); -} - -static inline void -netlink_lock_table(void) -{ - /* read_lock() synchronizes us to netlink_table_grab */ - - read_lock(&nl_table_lock); - atomic_inc(&nl_table_users); - read_unlock(&nl_table_lock); -} - -static inline void -netlink_unlock_table(void) -{ - if (atomic_dec_and_test(&nl_table_users)) - wake_up(&nl_table_wait); -} - -static inline struct sock *netlink_lookup(struct net *net, int protocol, - u32 pid) -{ - struct nl_pid_hash *hash = &nl_table[protocol].hash; - struct hlist_head *head; - struct sock *sk; - struct hlist_node *node; - - read_lock(&nl_table_lock); - head = nl_pid_hashfn(hash, pid); - sk_for_each(sk, node, head) { - if (net_eq(sock_net(sk), net) && (nlk_sk(sk)->pid == pid)) { - sock_hold(sk); - goto found; - } - } - sk = NULL; -found: - read_unlock(&nl_table_lock); - return sk; -} - -static inline struct hlist_head *nl_pid_hash_zalloc(size_t size) -{ - if (size <= PAGE_SIZE) - return kzalloc(size, GFP_ATOMIC); - else - return (struct hlist_head *) - __get_free_pages(GFP_ATOMIC | __GFP_ZERO, - get_order(size)); -} - -static inline void nl_pid_hash_free(struct hlist_head *table, size_t size) -{ - if (size <= PAGE_SIZE) - kfree(table); - else - free_pages((unsigned long)table, get_order(size)); -} - -static int nl_pid_hash_rehash(struct nl_pid_hash *hash, int grow) -{ - unsigned int omask, mask, shift; - size_t osize, size; - struct hlist_head *otable, *table; - int i; - - omask = mask = hash->mask; - osize = size = (mask + 1) * sizeof(*table); - shift = hash->shift; - - if (grow) { - if (++shift > hash->max_shift) - return 0; - mask = mask * 2 + 1; - size *= 2; - } - - table = nl_pid_hash_zalloc(size); - if (!table) - return 0; - - otable = hash->table; - hash->table = table; - hash->mask = mask; - hash->shift = shift; - get_random_bytes(&hash->rnd, sizeof(hash->rnd)); - - for (i = 0; i <= omask; i++) { - struct sock *sk; - struct hlist_node *node, *tmp; - - sk_for_each_safe(sk, node, tmp, &otable[i]) - __sk_add_node(sk, nl_pid_hashfn(hash, nlk_sk(sk)->pid)); - } - - nl_pid_hash_free(otable, osize); - hash->rehash_time = jiffies + 10 * 60 * HZ; - return 1; -} - -static inline int nl_pid_hash_dilute(struct nl_pid_hash *hash, int len) -{ - int avg = hash->entries >> hash->shift; - - if (unlikely(avg > 1) && nl_pid_hash_rehash(hash, 1)) - return 1; - - if (unlikely(len > avg) && time_after(jiffies, hash->rehash_time)) { - nl_pid_hash_rehash(hash, 0); - return 1; - } - - return 0; -} - -static const struct proto_ops netlink_ops; - -static void -netlink_update_listeners(struct sock *sk) -{ - struct netlink_table *tbl = &nl_table[sk->sk_protocol]; - struct hlist_node *node; - unsigned long mask; - unsigned int i; - - for (i = 0; i < NLGRPLONGS(tbl->groups); i++) { - mask = 0; - sk_for_each_bound(sk, node, &tbl->mc_list) { - if (i < NLGRPLONGS(nlk_sk(sk)->ngroups)) - mask |= nlk_sk(sk)->groups[i]; - } - tbl->listeners[i] = mask; - } - /* this function is only called with the netlink table "grabbed", which - * makes sure updates are visible before bind or setsockopt return. */ -} - -static int netlink_insert(struct sock *sk, struct net *net, u32 pid) -{ - struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash; - struct hlist_head *head; - int err = -EADDRINUSE; - struct sock *osk; - struct hlist_node *node; - int len; - - netlink_table_grab(); - head = nl_pid_hashfn(hash, pid); - len = 0; - sk_for_each(osk, node, head) { - if (net_eq(sock_net(osk), net) && (nlk_sk(osk)->pid == pid)) - break; - len++; - } - if (node) - goto err; - - err = -EBUSY; - if (nlk_sk(sk)->pid) - goto err; - - err = -ENOMEM; - if (BITS_PER_LONG > 32 && unlikely(hash->entries >= UINT_MAX)) - goto err; - - if (len && nl_pid_hash_dilute(hash, len)) - head = nl_pid_hashfn(hash, pid); - hash->entries++; - nlk_sk(sk)->pid = pid; - sk_add_node(sk, head); - err = 0; - -err: - netlink_table_ungrab(); - return err; -} - -static void netlink_remove(struct sock *sk) -{ - netlink_table_grab(); - if (sk_del_node_init(sk)) - nl_table[sk->sk_protocol].hash.entries--; - if (nlk_sk(sk)->subscriptions) - __sk_del_bind_node(sk); - netlink_table_ungrab(); -} - -static struct proto netlink_proto = { - .name = "NETLINK", - .owner = THIS_MODULE, - .obj_size = sizeof(struct netlink_sock), -}; - -static int __netlink_create(struct net *net, struct socket *sock, - struct mutex *cb_mutex, int protocol) -{ - struct sock *sk; - struct netlink_sock *nlk; - - sock->ops = &netlink_ops; - - sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto); - if (!sk) - return -ENOMEM; - - sock_init_data(sock, sk); - - nlk = nlk_sk(sk); - if (cb_mutex) - nlk->cb_mutex = cb_mutex; - else { - nlk->cb_mutex = &nlk->cb_def_mutex; - mutex_init(nlk->cb_mutex); - } - init_waitqueue_head(&nlk->wait); - - sk->sk_destruct = netlink_sock_destruct; - sk->sk_protocol = protocol; - return 0; -} - -static int netlink_create(struct net *net, struct socket *sock, int protocol) -{ - struct module *module = NULL; - struct mutex *cb_mutex; - struct netlink_sock *nlk; - int err = 0; - - sock->state = SS_UNCONNECTED; - - if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) - return -ESOCKTNOSUPPORT; - - if (protocol < 0 || protocol >= MAX_LINKS) - return -EPROTONOSUPPORT; - - netlink_lock_table(); -#ifdef CONFIG_MODULES - if (!nl_table[protocol].registered) { - netlink_unlock_table(); - request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol); - netlink_lock_table(); - } -#endif - if (nl_table[protocol].registered && - try_module_get(nl_table[protocol].module)) - module = nl_table[protocol].module; - cb_mutex = nl_table[protocol].cb_mutex; - netlink_unlock_table(); - - err = __netlink_create(net, sock, cb_mutex, protocol); - if (err < 0) - goto out_module; - - local_bh_disable(); - sock_prot_inuse_add(net, &netlink_proto, 1); - local_bh_enable(); - - nlk = nlk_sk(sock->sk); - nlk->module = module; -out: - return err; - -out_module: - module_put(module); - goto out; -} - -static int netlink_release(struct socket *sock) -{ - struct sock *sk = sock->sk; - struct netlink_sock *nlk; - - if (!sk) - return 0; - - netlink_remove(sk); - sock_orphan(sk); - nlk = nlk_sk(sk); - - /* - * OK. Socket is unlinked, any packets that arrive now - * will be purged. - */ - - sock->sk = NULL; - wake_up_interruptible_all(&nlk->wait); - - skb_queue_purge(&sk->sk_write_queue); - - if (nlk->pid && !nlk->subscriptions) { - struct netlink_notify n = { - .net = sock_net(sk), - .protocol = sk->sk_protocol, - .pid = nlk->pid, - }; - atomic_notifier_call_chain(&netlink_chain, - NETLINK_URELEASE, &n); - } - - module_put(nlk->module); - - netlink_table_grab(); - if (netlink_is_kernel(sk)) { - BUG_ON(nl_table[sk->sk_protocol].registered == 0); - if (--nl_table[sk->sk_protocol].registered == 0) { - kfree(nl_table[sk->sk_protocol].listeners); - nl_table[sk->sk_protocol].module = NULL; - nl_table[sk->sk_protocol].registered = 0; - } - } else if (nlk->subscriptions) - netlink_update_listeners(sk); - netlink_table_ungrab(); - - kfree(nlk->groups); - nlk->groups = NULL; - - local_bh_disable(); - sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1); - local_bh_enable(); - sock_put(sk); - return 0; -} - -static int netlink_autobind(struct socket *sock) -{ - struct sock *sk = sock->sk; - struct net *net = sock_net(sk); - struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash; - struct hlist_head *head; - struct sock *osk; - struct hlist_node *node; - s32 pid = current->tgid; - int err; - static s32 rover = -4097; - -retry: - cond_resched(); - netlink_table_grab(); - head = nl_pid_hashfn(hash, pid); - sk_for_each(osk, node, head) { - if (!net_eq(sock_net(osk), net)) - continue; - if (nlk_sk(osk)->pid == pid) { - /* Bind collision, search negative pid values. */ - pid = rover--; - if (rover > -4097) - rover = -4097; - netlink_table_ungrab(); - goto retry; - } - } - netlink_table_ungrab(); - - err = netlink_insert(sk, net, pid); - if (err == -EADDRINUSE) - goto retry; - - /* If 2 threads race to autobind, that is fine. */ - if (err == -EBUSY) - err = 0; - - return err; -} - -static inline int netlink_capable(struct socket *sock, unsigned int flag) -{ - return (nl_table[sock->sk->sk_protocol].nl_nonroot & flag) || - capable(CAP_NET_ADMIN); -} - -static void -netlink_update_subscriptions(struct sock *sk, unsigned int subscriptions) -{ - struct netlink_sock *nlk = nlk_sk(sk); - - if (nlk->subscriptions && !subscriptions) - __sk_del_bind_node(sk); - else if (!nlk->subscriptions && subscriptions) - sk_add_bind_node(sk, &nl_table[sk->sk_protocol].mc_list); - nlk->subscriptions = subscriptions; -} - -static int netlink_realloc_groups(struct sock *sk) -{ - struct netlink_sock *nlk = nlk_sk(sk); - unsigned int groups; - unsigned long *new_groups; - int err = 0; - - netlink_table_grab(); - - groups = nl_table[sk->sk_protocol].groups; - if (!nl_table[sk->sk_protocol].registered) { - err = -ENOENT; - goto out_unlock; - } - - if (nlk->ngroups >= groups) - goto out_unlock; - - new_groups = krealloc(nlk->groups, NLGRPSZ(groups), GFP_ATOMIC); - if (new_groups == NULL) { - err = -ENOMEM; - goto out_unlock; - } - memset((char *)new_groups + NLGRPSZ(nlk->ngroups), 0, - NLGRPSZ(groups) - NLGRPSZ(nlk->ngroups)); - - nlk->groups = new_groups; - nlk->ngroups = groups; - out_unlock: - netlink_table_ungrab(); - return err; -} - -static int netlink_bind(struct socket *sock, struct sockaddr *addr, - int addr_len) -{ - struct sock *sk = sock->sk; - struct net *net = sock_net(sk); - struct netlink_sock *nlk = nlk_sk(sk); - struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; - int err; - - if (nladdr->nl_family != AF_NETLINK) - return -EINVAL; - - /* Only superuser is allowed to listen multicasts */ - if (nladdr->nl_groups) { - if (!netlink_capable(sock, NL_NONROOT_RECV)) - return -EPERM; - err = netlink_realloc_groups(sk); - if (err) - return err; - } - - if (nlk->pid) { - if (nladdr->nl_pid != nlk->pid) - return -EINVAL; - } else { - err = nladdr->nl_pid ? - netlink_insert(sk, net, nladdr->nl_pid) : - netlink_autobind(sock); - if (err) - return err; - } - - if (!nladdr->nl_groups && (nlk->groups == NULL || !(u32)nlk->groups[0])) - return 0; - - netlink_table_grab(); - netlink_update_subscriptions(sk, nlk->subscriptions + - hweight32(nladdr->nl_groups) - - hweight32(nlk->groups[0])); - nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | nladdr->nl_groups; - netlink_update_listeners(sk); - netlink_table_ungrab(); - - return 0; -} - -static int netlink_connect(struct socket *sock, struct sockaddr *addr, - int alen, int flags) -{ - int err = 0; - struct sock *sk = sock->sk; - struct netlink_sock *nlk = nlk_sk(sk); - struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; - - if (addr->sa_family == AF_UNSPEC) { - sk->sk_state = NETLINK_UNCONNECTED; - nlk->dst_pid = 0; - nlk->dst_group = 0; - return 0; - } - if (addr->sa_family != AF_NETLINK) - return -EINVAL; - - /* Only superuser is allowed to send multicasts */ - if (nladdr->nl_groups && !netlink_capable(sock, NL_NONROOT_SEND)) - return -EPERM; - - if (!nlk->pid) - err = netlink_autobind(sock); - - if (err == 0) { - sk->sk_state = NETLINK_CONNECTED; - nlk->dst_pid = nladdr->nl_pid; - nlk->dst_group = ffs(nladdr->nl_groups); - } - - return err; -} - -static int netlink_getname(struct socket *sock, struct sockaddr *addr, - int *addr_len, int peer) -{ - struct sock *sk = sock->sk; - struct netlink_sock *nlk = nlk_sk(sk); - struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; - - nladdr->nl_family = AF_NETLINK; - nladdr->nl_pad = 0; - *addr_len = sizeof(*nladdr); - - if (peer) { - nladdr->nl_pid = nlk->dst_pid; - nladdr->nl_groups = netlink_group_mask(nlk->dst_group); - } else { - nladdr->nl_pid = nlk->pid; - nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0; - } - return 0; -} - -static void netlink_overrun(struct sock *sk) -{ - if (!test_and_set_bit(0, &nlk_sk(sk)->state)) { - sk->sk_err = ENOBUFS; - sk->sk_error_report(sk); - } -} - -static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid) -{ - struct sock *sock; - struct netlink_sock *nlk; - - sock = netlink_lookup(sock_net(ssk), ssk->sk_protocol, pid); - if (!sock) - return ERR_PTR(-ECONNREFUSED); - - /* Don't bother queuing skb if kernel socket has no input function */ - nlk = nlk_sk(sock); - if (sock->sk_state == NETLINK_CONNECTED && - nlk->dst_pid != nlk_sk(ssk)->pid) { - sock_put(sock); - return ERR_PTR(-ECONNREFUSED); - } - return sock; -} - -struct sock *netlink_getsockbyfilp(struct file *filp) -{ - struct inode *inode = filp->f_path.dentry->d_inode; - struct sock *sock; - - if (!S_ISSOCK(inode->i_mode)) - return ERR_PTR(-ENOTSOCK); - - sock = SOCKET_I(inode)->sk; - if (sock->sk_family != AF_NETLINK) - return ERR_PTR(-EINVAL); - - sock_hold(sock); - return sock; -} - -/* - * Attach a skb to a netlink socket. - * The caller must hold a reference to the destination socket. On error, the - * reference is dropped. The skb is not send to the destination, just all - * all error checks are performed and memory in the queue is reserved. - * Return values: - * < 0: error. skb freed, reference to sock dropped. - * 0: continue - * 1: repeat lookup - reference dropped while waiting for socket memory. - */ -int netlink_attachskb(struct sock *sk, struct sk_buff *skb, - long *timeo, struct sock *ssk) -{ - struct netlink_sock *nlk; - - nlk = nlk_sk(sk); - - if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || - test_bit(0, &nlk->state)) { - DECLARE_WAITQUEUE(wait, current); - if (!*timeo) { - if (!ssk || netlink_is_kernel(ssk)) - netlink_overrun(sk); - sock_put(sk); - kfree_skb(skb); - return -EAGAIN; - } - - __set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue(&nlk->wait, &wait); - - if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || - test_bit(0, &nlk->state)) && - !sock_flag(sk, SOCK_DEAD)) - *timeo = schedule_timeout(*timeo); - - __set_current_state(TASK_RUNNING); - remove_wait_queue(&nlk->wait, &wait); - sock_put(sk); - - if (signal_pending(current)) { - kfree_skb(skb); - return sock_intr_errno(*timeo); - } - return 1; - } - skb_set_owner_r(skb, sk); - return 0; -} - -int netlink_sendskb(struct sock *sk, struct sk_buff *skb) -{ - int len = skb->len; - - skb_queue_tail(&sk->sk_receive_queue, skb); - sk->sk_data_ready(sk, len); - sock_put(sk); - return len; -} - -void netlink_detachskb(struct sock *sk, struct sk_buff *skb) -{ - kfree_skb(skb); - sock_put(sk); -} - -static inline struct sk_buff *netlink_trim(struct sk_buff *skb, - gfp_t allocation) -{ - int delta; - - skb_orphan(skb); - - delta = skb->end - skb->tail; - if (delta * 2 < skb->truesize) - return skb; - - if (skb_shared(skb)) { - struct sk_buff *nskb = skb_clone(skb, allocation); - if (!nskb) - return skb; - kfree_skb(skb); - skb = nskb; - } - - if (!pskb_expand_head(skb, 0, -delta, allocation)) - skb->truesize -= delta; - - return skb; -} - -static inline void netlink_rcv_wake(struct sock *sk) -{ - struct netlink_sock *nlk = nlk_sk(sk); - - if (skb_queue_empty(&sk->sk_receive_queue)) - clear_bit(0, &nlk->state); - if (!test_bit(0, &nlk->state)) - wake_up_interruptible(&nlk->wait); -} - -static inline int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb) -{ - int ret; - struct netlink_sock *nlk = nlk_sk(sk); - - ret = -ECONNREFUSED; - if (nlk->netlink_rcv != NULL) { - ret = skb->len; - skb_set_owner_r(skb, sk); - nlk->netlink_rcv(skb); - } - kfree_skb(skb); - sock_put(sk); - return ret; -} - -int netlink_unicast(struct sock *ssk, struct sk_buff *skb, - u32 pid, int nonblock) -{ - struct sock *sk; - int err; - long timeo; - - skb = netlink_trim(skb, gfp_any()); - - timeo = sock_sndtimeo(ssk, nonblock); -retry: - sk = netlink_getsockbypid(ssk, pid); - if (IS_ERR(sk)) { - kfree_skb(skb); - return PTR_ERR(sk); - } - if (netlink_is_kernel(sk)) - return netlink_unicast_kernel(sk, skb); - - if (sk_filter(sk, skb)) { - err = skb->len; - kfree_skb(skb); - sock_put(sk); - return err; - } - - err = netlink_attachskb(sk, skb, &timeo, ssk); - if (err == 1) - goto retry; - if (err) - return err; - - return netlink_sendskb(sk, skb); -} -EXPORT_SYMBOL(netlink_unicast); - -int netlink_has_listeners(struct sock *sk, unsigned int group) -{ - int res = 0; - unsigned long *listeners; - - BUG_ON(!netlink_is_kernel(sk)); - - rcu_read_lock(); - listeners = rcu_dereference(nl_table[sk->sk_protocol].listeners); - - if (group - 1 < nl_table[sk->sk_protocol].groups) - res = test_bit(group - 1, listeners); - - rcu_read_unlock(); - - return res; -} -EXPORT_SYMBOL_GPL(netlink_has_listeners); - -static inline int netlink_broadcast_deliver(struct sock *sk, - struct sk_buff *skb) -{ - struct netlink_sock *nlk = nlk_sk(sk); - - if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && - !test_bit(0, &nlk->state)) { - skb_set_owner_r(skb, sk); - skb_queue_tail(&sk->sk_receive_queue, skb); - sk->sk_data_ready(sk, skb->len); - return atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf; - } - return -1; -} - -struct netlink_broadcast_data { - struct sock *exclude_sk; - struct net *net; - u32 pid; - u32 group; - int failure; - int congested; - int delivered; - gfp_t allocation; - struct sk_buff *skb, *skb2; -}; - -static inline int do_one_broadcast(struct sock *sk, - struct netlink_broadcast_data *p) -{ - struct netlink_sock *nlk = nlk_sk(sk); - int val; - - if (p->exclude_sk == sk) - goto out; - - if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups || - !test_bit(p->group - 1, nlk->groups)) - goto out; - - if (!net_eq(sock_net(sk), p->net)) - goto out; - - if (p->failure) { - netlink_overrun(sk); - goto out; - } - - sock_hold(sk); - if (p->skb2 == NULL) { - if (skb_shared(p->skb)) { - p->skb2 = skb_clone(p->skb, p->allocation); - } else { - p->skb2 = skb_get(p->skb); - /* - * skb ownership may have been set when - * delivered to a previous socket. - */ - skb_orphan(p->skb2); - } - } - if (p->skb2 == NULL) { - netlink_overrun(sk); - /* Clone failed. Notify ALL listeners. */ - p->failure = 1; - } else if (sk_filter(sk, p->skb2)) { - kfree_skb(p->skb2); - p->skb2 = NULL; - } else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) { - netlink_overrun(sk); - } else { - p->congested |= val; - p->delivered = 1; - p->skb2 = NULL; - } - sock_put(sk); - -out: - return 0; -} - -int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, - u32 group, gfp_t allocation) -{ - struct net *net = sock_net(ssk); - struct netlink_broadcast_data info; - struct hlist_node *node; - struct sock *sk; - - skb = netlink_trim(skb, allocation); - - info.exclude_sk = ssk; - info.net = net; - info.pid = pid; - info.group = group; - info.failure = 0; - info.congested = 0; - info.delivered = 0; - info.allocation = allocation; - info.skb = skb; - info.skb2 = NULL; - - /* While we sleep in clone, do not allow to change socket list */ - - netlink_lock_table(); - - sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list) - do_one_broadcast(sk, &info); - - kfree_skb(skb); - - netlink_unlock_table(); - - if (info.skb2) - kfree_skb(info.skb2); - - if (info.delivered) { - if (info.congested && (allocation & __GFP_WAIT)) - yield(); - return 0; - } - if (info.failure) - return -ENOBUFS; - return -ESRCH; -} -EXPORT_SYMBOL(netlink_broadcast); - -struct netlink_set_err_data { - struct sock *exclude_sk; - u32 pid; - u32 group; - int code; -}; - -static inline int do_one_set_err(struct sock *sk, - struct netlink_set_err_data *p) -{ - struct netlink_sock *nlk = nlk_sk(sk); - - if (sk == p->exclude_sk) - goto out; - - if (sock_net(sk) != sock_net(p->exclude_sk)) - goto out; - - if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups || - !test_bit(p->group - 1, nlk->groups)) - goto out; - - sk->sk_err = p->code; - sk->sk_error_report(sk); -out: - return 0; -} - -/** - * netlink_set_err - report error to broadcast listeners - * @ssk: the kernel netlink socket, as returned by netlink_kernel_create() - * @pid: the PID of a process that we want to skip (if any) - * @groups: the broadcast group that will notice the error - * @code: error code, must be negative (as usual in kernelspace) - */ -void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code) -{ - struct netlink_set_err_data info; - struct hlist_node *node; - struct sock *sk; - - info.exclude_sk = ssk; - info.pid = pid; - info.group = group; - /* sk->sk_err wants a positive error value */ - info.code = -code; - - read_lock(&nl_table_lock); - - sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list) - do_one_set_err(sk, &info); - - read_unlock(&nl_table_lock); -} - -/* must be called with netlink table grabbed */ -static void netlink_update_socket_mc(struct netlink_sock *nlk, - unsigned int group, - int is_new) -{ - int old, new = !!is_new, subscriptions; - - old = test_bit(group - 1, nlk->groups); - subscriptions = nlk->subscriptions - old + new; - if (new) - __set_bit(group - 1, nlk->groups); - else - __clear_bit(group - 1, nlk->groups); - netlink_update_subscriptions(&nlk->sk, subscriptions); - netlink_update_listeners(&nlk->sk); -} - -static int netlink_setsockopt(struct socket *sock, int level, int optname, - char __user *optval, int optlen) -{ - struct sock *sk = sock->sk; - struct netlink_sock *nlk = nlk_sk(sk); - unsigned int val = 0; - int err; - - if (level != SOL_NETLINK) - return -ENOPROTOOPT; - - if (optlen >= sizeof(int) && - get_user(val, (unsigned int __user *)optval)) - return -EFAULT; - - switch (optname) { - case NETLINK_PKTINFO: - if (val) - nlk->flags |= NETLINK_RECV_PKTINFO; - else - nlk->flags &= ~NETLINK_RECV_PKTINFO; - err = 0; - break; - case NETLINK_ADD_MEMBERSHIP: - case NETLINK_DROP_MEMBERSHIP: { - if (!netlink_capable(sock, NL_NONROOT_RECV)) - return -EPERM; - err = netlink_realloc_groups(sk); - if (err) - return err; - if (!val || val - 1 >= nlk->ngroups) - return -EINVAL; - netlink_table_grab(); - netlink_update_socket_mc(nlk, val, - optname == NETLINK_ADD_MEMBERSHIP); - netlink_table_ungrab(); - err = 0; - break; - } - default: - err = -ENOPROTOOPT; - } - return err; -} - -static int netlink_getsockopt(struct socket *sock, int level, int optname, - char __user *optval, int __user *optlen) -{ - struct sock *sk = sock->sk; - struct netlink_sock *nlk = nlk_sk(sk); - int len, val, err; - - if (level != SOL_NETLINK) - return -ENOPROTOOPT; - - if (get_user(len, optlen)) - return -EFAULT; - if (len < 0) - return -EINVAL; - - switch (optname) { - case NETLINK_PKTINFO: - if (len < sizeof(int)) - return -EINVAL; - len = sizeof(int); - val = nlk->flags & NETLINK_RECV_PKTINFO ? 1 : 0; - if (put_user(len, optlen) || - put_user(val, optval)) - return -EFAULT; - err = 0; - break; - default: - err = -ENOPROTOOPT; - } - return err; -} - -static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) -{ - struct nl_pktinfo info; - - info.group = NETLINK_CB(skb).dst_group; - put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info); -} - -static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, - struct msghdr *msg, size_t len) -{ - struct sock_iocb *siocb = kiocb_to_siocb(kiocb); - struct sock *sk = sock->sk; - struct netlink_sock *nlk = nlk_sk(sk); - struct sockaddr_nl *addr = msg->msg_name; - u32 dst_pid; - u32 dst_group; - struct sk_buff *skb; - int err; - struct scm_cookie scm; - - if (msg->msg_flags&MSG_OOB) - return -EOPNOTSUPP; - - if (NULL == siocb->scm) - siocb->scm = &scm; - err = scm_send(sock, msg, siocb->scm); - if (err < 0) - return err; - - if (msg->msg_namelen) { - if (addr->nl_family != AF_NETLINK) - return -EINVAL; - dst_pid = addr->nl_pid; - dst_group = ffs(addr->nl_groups); - if (dst_group && !netlink_capable(sock, NL_NONROOT_SEND)) - return -EPERM; - } else { - dst_pid = nlk->dst_pid; - dst_group = nlk->dst_group; - } - - if (!nlk->pid) { - err = netlink_autobind(sock); - if (err) - goto out; - } - - err = -EMSGSIZE; - if (len > sk->sk_sndbuf - 32) - goto out; - err = -ENOBUFS; - skb = alloc_skb(len, GFP_KERNEL); - if (skb == NULL) - goto out; - - NETLINK_CB(skb).pid = nlk->pid; - NETLINK_CB(skb).dst_group = dst_group; - NETLINK_CB(skb).loginuid = audit_get_loginuid(current); - NETLINK_CB(skb).sessionid = audit_get_sessionid(current); - security_task_getsecid(current, &(NETLINK_CB(skb).sid)); - memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); - - /* What can I do? Netlink is asynchronous, so that - we will have to save current capabilities to - check them, when this message will be delivered - to corresponding kernel module. --ANK (980802) - */ - - err = -EFAULT; - if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) { - kfree_skb(skb); - goto out; - } - - err = security_netlink_send(sk, skb); - if (err) { - kfree_skb(skb); - goto out; - } - - if (dst_group) { - atomic_inc(&skb->users); - netlink_broadcast(sk, skb, dst_pid, dst_group, GFP_KERNEL); - } - err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT); - -out: - return err; -} - -static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, - struct msghdr *msg, size_t len, - int flags) -{ - struct sock_iocb *siocb = kiocb_to_siocb(kiocb); - struct scm_cookie scm; - struct sock *sk = sock->sk; - struct netlink_sock *nlk = nlk_sk(sk); - int noblock = flags&MSG_DONTWAIT; - size_t copied; - struct sk_buff *skb; - int err; - - if (flags&MSG_OOB) - return -EOPNOTSUPP; - - copied = 0; - - skb = skb_recv_datagram(sk, flags, noblock, &err); - if (skb == NULL) - goto out; - - msg->msg_namelen = 0; - - copied = skb->len; - if (len < copied) { - msg->msg_flags |= MSG_TRUNC; - copied = len; - } - - skb_reset_transport_header(skb); - err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); - - if (msg->msg_name) { - struct sockaddr_nl *addr = (struct sockaddr_nl *)msg->msg_name; - addr->nl_family = AF_NETLINK; - addr->nl_pad = 0; - addr->nl_pid = NETLINK_CB(skb).pid; - addr->nl_groups = netlink_group_mask(NETLINK_CB(skb).dst_group); - msg->msg_namelen = sizeof(*addr); - } - - if (nlk->flags & NETLINK_RECV_PKTINFO) - netlink_cmsg_recv_pktinfo(msg, skb); - - if (NULL == siocb->scm) { - memset(&scm, 0, sizeof(scm)); - siocb->scm = &scm; - } - siocb->scm->creds = *NETLINK_CREDS(skb); - if (flags & MSG_TRUNC) - copied = skb->len; - skb_free_datagram(sk, skb); - - if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) - netlink_dump(sk); - - scm_recv(sock, msg, siocb->scm, flags); -out: - netlink_rcv_wake(sk); - return err ? : copied; -} - -static void netlink_data_ready(struct sock *sk, int len) -{ - BUG(); -} - -/* - * We export these functions to other modules. They provide a - * complete set of kernel non-blocking support for message - * queueing. - */ - -struct sock * -netlink_kernel_create(struct net *net, int unit, unsigned int groups, - void (*input)(struct sk_buff *skb), - struct mutex *cb_mutex, struct module *module) -{ - struct socket *sock; - struct sock *sk; - struct netlink_sock *nlk; - unsigned long *listeners = NULL; - - BUG_ON(!nl_table); - - if (unit < 0 || unit >= MAX_LINKS) - return NULL; - - if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock)) - return NULL; - - /* - * We have to just have a reference on the net from sk, but don't - * get_net it. Besides, we cannot get and then put the net here. - * So we create one inside init_net and the move it to net. - */ - - if (__netlink_create(&init_net, sock, cb_mutex, unit) < 0) - goto out_sock_release_nosk; - - sk = sock->sk; - sk_change_net(sk, net); - - if (groups < 32) - groups = 32; - - listeners = kzalloc(NLGRPSZ(groups), GFP_KERNEL); - if (!listeners) - goto out_sock_release; - - sk->sk_data_ready = netlink_data_ready; - if (input) - nlk_sk(sk)->netlink_rcv = input; - - if (netlink_insert(sk, net, 0)) - goto out_sock_release; - - nlk = nlk_sk(sk); - nlk->flags |= NETLINK_KERNEL_SOCKET; - - netlink_table_grab(); - if (!nl_table[unit].registered) { - nl_table[unit].groups = groups; - nl_table[unit].listeners = listeners; - nl_table[unit].cb_mutex = cb_mutex; - nl_table[unit].module = module; - nl_table[unit].registered = 1; - } else { - kfree(listeners); - nl_table[unit].registered++; - } - netlink_table_ungrab(); - return sk; - -out_sock_release: - kfree(listeners); - netlink_kernel_release(sk); - return NULL; - -out_sock_release_nosk: - sock_release(sock); - return NULL; -} -EXPORT_SYMBOL(netlink_kernel_create); - - -void -netlink_kernel_release(struct sock *sk) -{ - sk_release_kernel(sk); -} -EXPORT_SYMBOL(netlink_kernel_release); - - -/** - * netlink_change_ngroups - change number of multicast groups - * - * This changes the number of multicast groups that are available - * on a certain netlink family. Note that it is not possible to - * change the number of groups to below 32. Also note that it does - * not implicitly call netlink_clear_multicast_users() when the - * number of groups is reduced. - * - * @sk: The kernel netlink socket, as returned by netlink_kernel_create(). - * @groups: The new number of groups. - */ -int netlink_change_ngroups(struct sock *sk, unsigned int groups) -{ - unsigned long *listeners, *old = NULL; - struct netlink_table *tbl = &nl_table[sk->sk_protocol]; - int err = 0; - - if (groups < 32) - groups = 32; - - netlink_table_grab(); - if (NLGRPSZ(tbl->groups) < NLGRPSZ(groups)) { - listeners = kzalloc(NLGRPSZ(groups), GFP_ATOMIC); - if (!listeners) { - err = -ENOMEM; - goto out_ungrab; - } - old = tbl->listeners; - memcpy(listeners, old, NLGRPSZ(tbl->groups)); - rcu_assign_pointer(tbl->listeners, listeners); - } - tbl->groups = groups; - - out_ungrab: - netlink_table_ungrab(); - synchronize_rcu(); - kfree(old); - return err; -} -EXPORT_SYMBOL(netlink_change_ngroups); - -/** - * netlink_clear_multicast_users - kick off multicast listeners - * - * This function removes all listeners from the given group. - * @ksk: The kernel netlink socket, as returned by - * netlink_kernel_create(). - * @group: The multicast group to clear. - */ -void netlink_clear_multicast_users(struct sock *ksk, unsigned int group) -{ - struct sock *sk; - struct hlist_node *node; - struct netlink_table *tbl = &nl_table[ksk->sk_protocol]; - - netlink_table_grab(); - - sk_for_each_bound(sk, node, &tbl->mc_list) - netlink_update_socket_mc(nlk_sk(sk), group, 0); - - netlink_table_ungrab(); -} -EXPORT_SYMBOL(netlink_clear_multicast_users); - -void netlink_set_nonroot(int protocol, unsigned int flags) -{ - if ((unsigned int)protocol < MAX_LINKS) - nl_table[protocol].nl_nonroot = flags; -} -EXPORT_SYMBOL(netlink_set_nonroot); - -static void netlink_destroy_callback(struct netlink_callback *cb) -{ - if (cb->skb) - kfree_skb(cb->skb); - kfree(cb); -} - -/* - * It looks a bit ugly. - * It would be better to create kernel thread. - */ - -static int netlink_dump(struct sock *sk) -{ - struct netlink_sock *nlk = nlk_sk(sk); - struct netlink_callback *cb; - struct sk_buff *skb; - struct nlmsghdr *nlh; - int len, err = -ENOBUFS; - - skb = sock_rmalloc(sk, NLMSG_GOODSIZE, 0, GFP_KERNEL); - if (!skb) - goto errout; - - mutex_lock(nlk->cb_mutex); - - cb = nlk->cb; - if (cb == NULL) { - err = -EINVAL; - goto errout_skb; - } - - len = cb->dump(skb, cb); - - if (len > 0) { - mutex_unlock(nlk->cb_mutex); - - if (sk_filter(sk, skb)) - kfree_skb(skb); - else { - skb_queue_tail(&sk->sk_receive_queue, skb); - sk->sk_data_ready(sk, skb->len); - } - return 0; - } - - nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI); - if (!nlh) - goto errout_skb; - - memcpy(nlmsg_data(nlh), &len, sizeof(len)); - - if (sk_filter(sk, skb)) - kfree_skb(skb); - else { - skb_queue_tail(&sk->sk_receive_queue, skb); - sk->sk_data_ready(sk, skb->len); - } - - if (cb->done) - cb->done(cb); - nlk->cb = NULL; - mutex_unlock(nlk->cb_mutex); - - netlink_destroy_callback(cb); - return 0; - -errout_skb: - mutex_unlock(nlk->cb_mutex); - kfree_skb(skb); -errout: - return err; -} - -int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, - struct nlmsghdr *nlh, - int (*dump)(struct sk_buff *skb, - struct netlink_callback *), - int (*done)(struct netlink_callback *)) -{ -#ifdef DDE_LINUX - return -ENOBUFS; -#else - struct netlink_callback *cb; - struct sock *sk; - struct netlink_sock *nlk; - - cb = kzalloc(sizeof(*cb), GFP_KERNEL); - if (cb == NULL) - return -ENOBUFS; - - cb->dump = dump; - cb->done = done; - cb->nlh = nlh; - atomic_inc(&skb->users); - cb->skb = skb; - - sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).pid); - if (sk == NULL) { - netlink_destroy_callback(cb); - return -ECONNREFUSED; - } - nlk = nlk_sk(sk); - /* A dump is in progress... */ - mutex_lock(nlk->cb_mutex); - if (nlk->cb) { - mutex_unlock(nlk->cb_mutex); - netlink_destroy_callback(cb); - sock_put(sk); - return -EBUSY; - } - nlk->cb = cb; - mutex_unlock(nlk->cb_mutex); - - netlink_dump(sk); - sock_put(sk); - - /* We successfully started a dump, by returning -EINTR we - * signal not to send ACK even if it was requested. - */ - return -EINTR; -#endif -} -EXPORT_SYMBOL(netlink_dump_start); - -void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) -{ - struct sk_buff *skb; - struct nlmsghdr *rep; - struct nlmsgerr *errmsg; - size_t payload = sizeof(*errmsg); - - /* error messages get the original request appened */ - if (err) - payload += nlmsg_len(nlh); - - skb = nlmsg_new(payload, GFP_KERNEL); - if (!skb) { - struct sock *sk; - - sk = netlink_lookup(sock_net(in_skb->sk), - in_skb->sk->sk_protocol, - NETLINK_CB(in_skb).pid); - if (sk) { - sk->sk_err = ENOBUFS; - sk->sk_error_report(sk); - sock_put(sk); - } - return; - } - - rep = __nlmsg_put(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, - NLMSG_ERROR, sizeof(struct nlmsgerr), 0); - errmsg = nlmsg_data(rep); - errmsg->error = err; - memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(*nlh)); - netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); -} -EXPORT_SYMBOL(netlink_ack); - -int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, - struct nlmsghdr *)) -{ - struct nlmsghdr *nlh; - int err; - - while (skb->len >= nlmsg_total_size(0)) { - int msglen; - - nlh = nlmsg_hdr(skb); - err = 0; - - if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len) - return 0; - - /* Only requests are handled by the kernel */ - if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) - goto ack; - - /* Skip control messages */ - if (nlh->nlmsg_type < NLMSG_MIN_TYPE) - goto ack; - - err = cb(skb, nlh); - if (err == -EINTR) - goto skip; - -ack: - if (nlh->nlmsg_flags & NLM_F_ACK || err) - netlink_ack(skb, nlh, err); - -skip: - msglen = NLMSG_ALIGN(nlh->nlmsg_len); - if (msglen > skb->len) - msglen = skb->len; - skb_pull(skb, msglen); - } - - return 0; -} -EXPORT_SYMBOL(netlink_rcv_skb); - -/** - * nlmsg_notify - send a notification netlink message - * @sk: netlink socket to use - * @skb: notification message - * @pid: destination netlink pid for reports or 0 - * @group: destination multicast group or 0 - * @report: 1 to report back, 0 to disable - * @flags: allocation flags - */ -int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 pid, - unsigned int group, int report, gfp_t flags) -{ - int err = 0; - - if (group) { - int exclude_pid = 0; - - if (report) { - atomic_inc(&skb->users); - exclude_pid = pid; - } - - /* errors reported via destination sk->sk_err */ - nlmsg_multicast(sk, skb, exclude_pid, group, flags); - } - - if (report) - err = nlmsg_unicast(sk, skb, pid); - - return err; -} -EXPORT_SYMBOL(nlmsg_notify); - -#ifdef CONFIG_PROC_FS -struct nl_seq_iter { - struct seq_net_private p; - int link; - int hash_idx; -}; - -static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos) -{ - struct nl_seq_iter *iter = seq->private; - int i, j; - struct sock *s; - struct hlist_node *node; - loff_t off = 0; - - for (i = 0; i < MAX_LINKS; i++) { - struct nl_pid_hash *hash = &nl_table[i].hash; - - for (j = 0; j <= hash->mask; j++) { - sk_for_each(s, node, &hash->table[j]) { - if (sock_net(s) != seq_file_net(seq)) - continue; - if (off == pos) { - iter->link = i; - iter->hash_idx = j; - return s; - } - ++off; - } - } - } - return NULL; -} - -static void *netlink_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(nl_table_lock) -{ - read_lock(&nl_table_lock); - return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN; -} - -static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - struct sock *s; - struct nl_seq_iter *iter; - int i, j; - - ++*pos; - - if (v == SEQ_START_TOKEN) - return netlink_seq_socket_idx(seq, 0); - - iter = seq->private; - s = v; - do { - s = sk_next(s); - } while (s && sock_net(s) != seq_file_net(seq)); - if (s) - return s; - - i = iter->link; - j = iter->hash_idx + 1; - - do { - struct nl_pid_hash *hash = &nl_table[i].hash; - - for (; j <= hash->mask; j++) { - s = sk_head(&hash->table[j]); - while (s && sock_net(s) != seq_file_net(seq)) - s = sk_next(s); - if (s) { - iter->link = i; - iter->hash_idx = j; - return s; - } - } - - j = 0; - } while (++i < MAX_LINKS); - - return NULL; -} - -static void netlink_seq_stop(struct seq_file *seq, void *v) - __releases(nl_table_lock) -{ - read_unlock(&nl_table_lock); -} - - -static int netlink_seq_show(struct seq_file *seq, void *v) -{ - if (v == SEQ_START_TOKEN) - seq_puts(seq, - "sk Eth Pid Groups " - "Rmem Wmem Dump Locks\n"); - else { - struct sock *s = v; - struct netlink_sock *nlk = nlk_sk(s); - - seq_printf(seq, "%p %-3d %-6d %08x %-8d %-8d %p %d\n", - s, - s->sk_protocol, - nlk->pid, - nlk->groups ? (u32)nlk->groups[0] : 0, - atomic_read(&s->sk_rmem_alloc), - atomic_read(&s->sk_wmem_alloc), - nlk->cb, - atomic_read(&s->sk_refcnt) - ); - - } - return 0; -} - -static const struct seq_operations netlink_seq_ops = { - .start = netlink_seq_start, - .next = netlink_seq_next, - .stop = netlink_seq_stop, - .show = netlink_seq_show, -}; - - -static int netlink_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &netlink_seq_ops, - sizeof(struct nl_seq_iter)); -} - -static const struct file_operations netlink_seq_fops = { - .owner = THIS_MODULE, - .open = netlink_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - -#endif - -int netlink_register_notifier(struct notifier_block *nb) -{ - return atomic_notifier_chain_register(&netlink_chain, nb); -} -EXPORT_SYMBOL(netlink_register_notifier); - -int netlink_unregister_notifier(struct notifier_block *nb) -{ - return atomic_notifier_chain_unregister(&netlink_chain, nb); -} -EXPORT_SYMBOL(netlink_unregister_notifier); - -static const struct proto_ops netlink_ops = { - .family = PF_NETLINK, - .owner = THIS_MODULE, - .release = netlink_release, - .bind = netlink_bind, - .connect = netlink_connect, - .socketpair = sock_no_socketpair, - .accept = sock_no_accept, - .getname = netlink_getname, - .poll = datagram_poll, - .ioctl = sock_no_ioctl, - .listen = sock_no_listen, - .shutdown = sock_no_shutdown, - .setsockopt = netlink_setsockopt, - .getsockopt = netlink_getsockopt, - .sendmsg = netlink_sendmsg, - .recvmsg = netlink_recvmsg, - .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, -}; - -static struct net_proto_family netlink_family_ops = { - .family = PF_NETLINK, - .create = netlink_create, - .owner = THIS_MODULE, /* for consistency 8) */ -}; - -static int __net_init netlink_net_init(struct net *net) -{ -#ifdef CONFIG_PROC_FS - if (!proc_net_fops_create(net, "netlink", 0, &netlink_seq_fops)) - return -ENOMEM; -#endif - return 0; -} - -static void __net_exit netlink_net_exit(struct net *net) -{ -#ifdef CONFIG_PROC_FS - proc_net_remove(net, "netlink"); -#endif -} - -static struct pernet_operations __net_initdata netlink_net_ops = { - .init = netlink_net_init, - .exit = netlink_net_exit, -}; - -static int __init netlink_proto_init(void) -{ - struct sk_buff *dummy_skb; - int i; - unsigned long limit; - unsigned int order; - int err = proto_register(&netlink_proto, 0); - - if (err != 0) - goto out; - - BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > sizeof(dummy_skb->cb)); - - nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL); - if (!nl_table) - goto panic; - - if (num_physpages >= (128 * 1024)) - limit = num_physpages >> (21 - PAGE_SHIFT); - else - limit = num_physpages >> (23 - PAGE_SHIFT); - - order = get_bitmask_order(limit) - 1 + PAGE_SHIFT; - limit = (1UL << order) / sizeof(struct hlist_head); - order = get_bitmask_order(min(limit, (unsigned long)UINT_MAX)) - 1; - - for (i = 0; i < MAX_LINKS; i++) { - struct nl_pid_hash *hash = &nl_table[i].hash; - - hash->table = nl_pid_hash_zalloc(1 * sizeof(*hash->table)); - if (!hash->table) { - while (i-- > 0) - nl_pid_hash_free(nl_table[i].hash.table, - 1 * sizeof(*hash->table)); - kfree(nl_table); - goto panic; - } - hash->max_shift = order; - hash->shift = 0; - hash->mask = 0; - hash->rehash_time = jiffies; - } - - sock_register(&netlink_family_ops); - register_pernet_subsys(&netlink_net_ops); - /* The netlink device handler may be needed early. */ - rtnetlink_init(); -out: - return err; -panic: - panic("netlink_init: Cannot allocate nl_table\n"); -} - -core_initcall(netlink_proto_init); diff --git a/libdde_linux26/lib/src/net/sched/sch_generic.c b/libdde_linux26/lib/src/net/sched/sch_generic.c deleted file mode 100644 index a2acd6c4..00000000 --- a/libdde_linux26/lib/src/net/sched/sch_generic.c +++ /dev/null @@ -1,749 +0,0 @@ -/* - * net/sched/sch_generic.c Generic packet scheduler routines. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> - * Jamal Hadi Salim, <hadi@cyberus.ca> 990601 - * - Ingress support - */ - -#include <linux/bitops.h> -#include <linux/module.h> -#include <linux/types.h> -#include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/string.h> -#include <linux/errno.h> -#include <linux/netdevice.h> -#include <linux/skbuff.h> -#include <linux/rtnetlink.h> -#include <linux/init.h> -#include <linux/rcupdate.h> -#include <linux/list.h> -#include <net/pkt_sched.h> - -#ifdef DDE_LINUX -#include "local.h" -#endif - -/* Main transmission queue. */ - -/* Modifications to data participating in scheduling must be protected with - * qdisc_lock(qdisc) spinlock. - * - * The idea is the following: - * - enqueue, dequeue are serialized via qdisc root lock - * - ingress filtering is also serialized via qdisc root lock - * - updates to tree and tree walking are only done under the rtnl mutex. - */ - -static inline int qdisc_qlen(struct Qdisc *q) -{ - return q->q.qlen; -} - -static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) -{ - q->gso_skb = skb; - q->qstats.requeues++; - __netif_schedule(q); - - return 0; -} - -static inline struct sk_buff *dequeue_skb(struct Qdisc *q) -{ - struct sk_buff *skb = q->gso_skb; - - if (unlikely(skb)) { - struct net_device *dev = qdisc_dev(q); - struct netdev_queue *txq; - - /* check the reason of requeuing without tx lock first */ - txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); - if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq)) - q->gso_skb = NULL; - else - skb = NULL; - } else { - skb = q->dequeue(q); - } - - return skb; -} - -static inline int handle_dev_cpu_collision(struct sk_buff *skb, - struct netdev_queue *dev_queue, - struct Qdisc *q) -{ - int ret; - - if (unlikely(dev_queue->xmit_lock_owner == smp_processor_id())) { - /* - * Same CPU holding the lock. It may be a transient - * configuration error, when hard_start_xmit() recurses. We - * detect it by checking xmit owner and drop the packet when - * deadloop is detected. Return OK to try the next skb. - */ - kfree_skb(skb); - if (net_ratelimit()) - printk(KERN_WARNING "Dead loop on netdevice %s, " - "fix it urgently!\n", dev_queue->dev->name); - ret = qdisc_qlen(q); - } else { - /* - * Another cpu is holding lock, requeue & delay xmits for - * some time. - */ - __get_cpu_var(netdev_rx_stat).cpu_collision++; - ret = dev_requeue_skb(skb, q); - } - - return ret; -} - -/* - * NOTE: Called under qdisc_lock(q) with locally disabled BH. - * - * __QDISC_STATE_RUNNING guarantees only one CPU can process - * this qdisc at a time. qdisc_lock(q) serializes queue accesses for - * this queue. - * - * netif_tx_lock serializes accesses to device driver. - * - * qdisc_lock(q) and netif_tx_lock are mutually exclusive, - * if one is grabbed, another must be free. - * - * Note, that this procedure can be called by a watchdog timer - * - * Returns to the caller: - * 0 - queue is empty or throttled. - * >0 - queue is not empty. - * - */ -static inline int qdisc_restart(struct Qdisc *q) -{ - struct netdev_queue *txq; - int ret = NETDEV_TX_BUSY; - struct net_device *dev; - spinlock_t *root_lock; - struct sk_buff *skb; - - /* Dequeue packet */ - if (unlikely((skb = dequeue_skb(q)) == NULL)) - return 0; - - root_lock = qdisc_lock(q); - - /* And release qdisc */ - spin_unlock(root_lock); - - dev = qdisc_dev(q); - txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); - - HARD_TX_LOCK(dev, txq, smp_processor_id()); - if (!netif_tx_queue_stopped(txq) && - !netif_tx_queue_frozen(txq)) - ret = dev_hard_start_xmit(skb, dev, txq); - HARD_TX_UNLOCK(dev, txq); - - spin_lock(root_lock); - - switch (ret) { - case NETDEV_TX_OK: - /* Driver sent out skb successfully */ - ret = qdisc_qlen(q); - break; - - case NETDEV_TX_LOCKED: - /* Driver try lock failed */ - ret = handle_dev_cpu_collision(skb, txq, q); - break; - - default: - /* Driver returned NETDEV_TX_BUSY - requeue skb */ - if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit())) - printk(KERN_WARNING "BUG %s code %d qlen %d\n", - dev->name, ret, q->q.qlen); - - ret = dev_requeue_skb(skb, q); - break; - } - - if (ret && (netif_tx_queue_stopped(txq) || - netif_tx_queue_frozen(txq))) - ret = 0; - - return ret; -} - -void __qdisc_run(struct Qdisc *q) -{ - unsigned long start_time = jiffies; - - while (qdisc_restart(q)) { - /* - * Postpone processing if - * 1. another process needs the CPU; - * 2. we've been doing it for too long. - */ - if (need_resched() || jiffies != start_time) { - __netif_schedule(q); - break; - } - } - - clear_bit(__QDISC_STATE_RUNNING, &q->state); -} - -static void dev_watchdog(unsigned long arg) -{ - struct net_device *dev = (struct net_device *)arg; - - netif_tx_lock(dev); - if (!qdisc_tx_is_noop(dev)) { - if (netif_device_present(dev) && - netif_running(dev) && - netif_carrier_ok(dev)) { - int some_queue_stopped = 0; - unsigned int i; - - for (i = 0; i < dev->num_tx_queues; i++) { - struct netdev_queue *txq; - - txq = netdev_get_tx_queue(dev, i); - if (netif_tx_queue_stopped(txq)) { - some_queue_stopped = 1; - break; - } - } - - if (some_queue_stopped && - time_after(jiffies, (dev->trans_start + - dev->watchdog_timeo))) { - char drivername[64]; - WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit timed out\n", - dev->name, netdev_drivername(dev, drivername, 64)); - dev->netdev_ops->ndo_tx_timeout(dev); - } - if (!mod_timer(&dev->watchdog_timer, - round_jiffies(jiffies + - dev->watchdog_timeo))) - dev_hold(dev); - } - } - netif_tx_unlock(dev); - - dev_put(dev); -} - -void __netdev_watchdog_up(struct net_device *dev) -{ - if (dev->netdev_ops->ndo_tx_timeout) { - if (dev->watchdog_timeo <= 0) - dev->watchdog_timeo = 5*HZ; - if (!mod_timer(&dev->watchdog_timer, - round_jiffies(jiffies + dev->watchdog_timeo))) - dev_hold(dev); - } -} - -static void dev_watchdog_up(struct net_device *dev) -{ - __netdev_watchdog_up(dev); -} - -static void dev_watchdog_down(struct net_device *dev) -{ - netif_tx_lock_bh(dev); - if (del_timer(&dev->watchdog_timer)) - dev_put(dev); - netif_tx_unlock_bh(dev); -} - -/** - * netif_carrier_on - set carrier - * @dev: network device - * - * Device has detected that carrier. - */ -void netif_carrier_on(struct net_device *dev) -{ - if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) { - if (dev->reg_state == NETREG_UNINITIALIZED) - return; - linkwatch_fire_event(dev); - if (netif_running(dev)) - __netdev_watchdog_up(dev); - } -} -EXPORT_SYMBOL(netif_carrier_on); - -/** - * netif_carrier_off - clear carrier - * @dev: network device - * - * Device has detected loss of carrier. - */ -void netif_carrier_off(struct net_device *dev) -{ - if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) { - if (dev->reg_state == NETREG_UNINITIALIZED) - return; - linkwatch_fire_event(dev); - } -} -EXPORT_SYMBOL(netif_carrier_off); - -/* "NOOP" scheduler: the best scheduler, recommended for all interfaces - under all circumstances. It is difficult to invent anything faster or - cheaper. - */ - -static int noop_enqueue(struct sk_buff *skb, struct Qdisc * qdisc) -{ - kfree_skb(skb); - return NET_XMIT_CN; -} - -static struct sk_buff *noop_dequeue(struct Qdisc * qdisc) -{ - return NULL; -} - -struct Qdisc_ops noop_qdisc_ops __read_mostly = { - .id = "noop", - .priv_size = 0, - .enqueue = noop_enqueue, - .dequeue = noop_dequeue, - .peek = noop_dequeue, - .owner = THIS_MODULE, -}; - -static struct netdev_queue noop_netdev_queue = { - .qdisc = &noop_qdisc, - .qdisc_sleeping = &noop_qdisc, -}; - -struct Qdisc noop_qdisc = { - .enqueue = noop_enqueue, - .dequeue = noop_dequeue, - .flags = TCQ_F_BUILTIN, - .ops = &noop_qdisc_ops, - .list = LIST_HEAD_INIT(noop_qdisc.list), - .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), - .dev_queue = &noop_netdev_queue, -}; -EXPORT_SYMBOL(noop_qdisc); - -static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = { - .id = "noqueue", - .priv_size = 0, - .enqueue = noop_enqueue, - .dequeue = noop_dequeue, - .peek = noop_dequeue, - .owner = THIS_MODULE, -}; - -static struct Qdisc noqueue_qdisc; -static struct netdev_queue noqueue_netdev_queue = { - .qdisc = &noqueue_qdisc, - .qdisc_sleeping = &noqueue_qdisc, -}; - -static struct Qdisc noqueue_qdisc = { - .enqueue = NULL, - .dequeue = noop_dequeue, - .flags = TCQ_F_BUILTIN, - .ops = &noqueue_qdisc_ops, - .list = LIST_HEAD_INIT(noqueue_qdisc.list), - .q.lock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock), - .dev_queue = &noqueue_netdev_queue, -}; - - -static const u8 prio2band[TC_PRIO_MAX+1] = - { 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 }; - -/* 3-band FIFO queue: old style, but should be a bit faster than - generic prio+fifo combination. - */ - -#define PFIFO_FAST_BANDS 3 - -static inline struct sk_buff_head *prio2list(struct sk_buff *skb, - struct Qdisc *qdisc) -{ - struct sk_buff_head *list = qdisc_priv(qdisc); - return list + prio2band[skb->priority & TC_PRIO_MAX]; -} - -static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc) -{ - struct sk_buff_head *list = prio2list(skb, qdisc); - - if (skb_queue_len(list) < qdisc_dev(qdisc)->tx_queue_len) { - qdisc->q.qlen++; - return __qdisc_enqueue_tail(skb, qdisc, list); - } - - return qdisc_drop(skb, qdisc); -} - -static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc) -{ - int prio; - struct sk_buff_head *list = qdisc_priv(qdisc); - - for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) { - if (!skb_queue_empty(list + prio)) { - qdisc->q.qlen--; - return __qdisc_dequeue_head(qdisc, list + prio); - } - } - - return NULL; -} - -static struct sk_buff *pfifo_fast_peek(struct Qdisc* qdisc) -{ - int prio; - struct sk_buff_head *list = qdisc_priv(qdisc); - - for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) { - if (!skb_queue_empty(list + prio)) - return skb_peek(list + prio); - } - - return NULL; -} - -static void pfifo_fast_reset(struct Qdisc* qdisc) -{ - int prio; - struct sk_buff_head *list = qdisc_priv(qdisc); - - for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) - __qdisc_reset_queue(qdisc, list + prio); - - qdisc->qstats.backlog = 0; - qdisc->q.qlen = 0; -} - -static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb) -{ - struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS }; - -#ifndef DDE_LINUX - memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1); - NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); -#else - WARN_UNIMPL; -#endif - return skb->len; - -nla_put_failure: - return -1; -} - -static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt) -{ - int prio; - struct sk_buff_head *list = qdisc_priv(qdisc); - - for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) - skb_queue_head_init(list + prio); - - return 0; -} - -static struct Qdisc_ops pfifo_fast_ops __read_mostly = { - .id = "pfifo_fast", - .priv_size = PFIFO_FAST_BANDS * sizeof(struct sk_buff_head), - .enqueue = pfifo_fast_enqueue, - .dequeue = pfifo_fast_dequeue, - .peek = pfifo_fast_peek, - .init = pfifo_fast_init, - .reset = pfifo_fast_reset, - .dump = pfifo_fast_dump, - .owner = THIS_MODULE, -}; - -struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, - struct Qdisc_ops *ops) -{ - void *p; - struct Qdisc *sch; - unsigned int size; - int err = -ENOBUFS; - - /* ensure that the Qdisc and the private data are 32-byte aligned */ - size = QDISC_ALIGN(sizeof(*sch)); - size += ops->priv_size + (QDISC_ALIGNTO - 1); - - p = kzalloc(size, GFP_KERNEL); - if (!p) - goto errout; - sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p); - sch->padded = (char *) sch - (char *) p; - - INIT_LIST_HEAD(&sch->list); - skb_queue_head_init(&sch->q); - sch->ops = ops; - sch->enqueue = ops->enqueue; - sch->dequeue = ops->dequeue; - sch->dev_queue = dev_queue; - dev_hold(qdisc_dev(sch)); - atomic_set(&sch->refcnt, 1); - - return sch; -errout: - return ERR_PTR(err); -} - -struct Qdisc * qdisc_create_dflt(struct net_device *dev, - struct netdev_queue *dev_queue, - struct Qdisc_ops *ops, - unsigned int parentid) -{ - struct Qdisc *sch; - - sch = qdisc_alloc(dev_queue, ops); - if (IS_ERR(sch)) - goto errout; - sch->parent = parentid; - - if (!ops->init || ops->init(sch, NULL) == 0) - return sch; - - qdisc_destroy(sch); -errout: - return NULL; -} -EXPORT_SYMBOL(qdisc_create_dflt); - -/* Under qdisc_lock(qdisc) and BH! */ - -void qdisc_reset(struct Qdisc *qdisc) -{ - const struct Qdisc_ops *ops = qdisc->ops; - - if (ops->reset) - ops->reset(qdisc); - - kfree_skb(qdisc->gso_skb); - qdisc->gso_skb = NULL; -} -EXPORT_SYMBOL(qdisc_reset); - -void qdisc_destroy(struct Qdisc *qdisc) -{ - const struct Qdisc_ops *ops = qdisc->ops; - - if (qdisc->flags & TCQ_F_BUILTIN || - !atomic_dec_and_test(&qdisc->refcnt)) - return; - -#ifdef CONFIG_NET_SCHED -#ifndef DDE_LINUX - qdisc_list_del(qdisc); - - qdisc_put_stab(qdisc->stab); - gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est); -#endif -#endif - if (ops->reset) - ops->reset(qdisc); - if (ops->destroy) - ops->destroy(qdisc); - - module_put(ops->owner); - dev_put(qdisc_dev(qdisc)); - - kfree_skb(qdisc->gso_skb); - kfree((char *) qdisc - qdisc->padded); -} -EXPORT_SYMBOL(qdisc_destroy); - -static bool dev_all_qdisc_sleeping_noop(struct net_device *dev) -{ - unsigned int i; - - for (i = 0; i < dev->num_tx_queues; i++) { - struct netdev_queue *txq = netdev_get_tx_queue(dev, i); - - if (txq->qdisc_sleeping != &noop_qdisc) - return false; - } - return true; -} - -static void attach_one_default_qdisc(struct net_device *dev, - struct netdev_queue *dev_queue, - void *_unused) -{ - struct Qdisc *qdisc; - - if (dev->tx_queue_len) { - qdisc = qdisc_create_dflt(dev, dev_queue, - &pfifo_fast_ops, TC_H_ROOT); - if (!qdisc) { - printk(KERN_INFO "%s: activation failed\n", dev->name); - return; - } - } else { - qdisc = &noqueue_qdisc; - } - dev_queue->qdisc_sleeping = qdisc; -} - -static void transition_one_qdisc(struct net_device *dev, - struct netdev_queue *dev_queue, - void *_need_watchdog) -{ - struct Qdisc *new_qdisc = dev_queue->qdisc_sleeping; - int *need_watchdog_p = _need_watchdog; - - if (!(new_qdisc->flags & TCQ_F_BUILTIN)) - clear_bit(__QDISC_STATE_DEACTIVATED, &new_qdisc->state); - - rcu_assign_pointer(dev_queue->qdisc, new_qdisc); - if (need_watchdog_p && new_qdisc != &noqueue_qdisc) - *need_watchdog_p = 1; -} - -void dev_activate(struct net_device *dev) -{ - int need_watchdog; - - /* No queueing discipline is attached to device; - create default one i.e. pfifo_fast for devices, - which need queueing and noqueue_qdisc for - virtual interfaces - */ - - if (dev_all_qdisc_sleeping_noop(dev)) - netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL); - - if (!netif_carrier_ok(dev)) - /* Delay activation until next carrier-on event */ - return; - - need_watchdog = 0; - netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog); - transition_one_qdisc(dev, &dev->rx_queue, NULL); - - if (need_watchdog) { - dev->trans_start = jiffies; - dev_watchdog_up(dev); - } -} - -static void dev_deactivate_queue(struct net_device *dev, - struct netdev_queue *dev_queue, - void *_qdisc_default) -{ - struct Qdisc *qdisc_default = _qdisc_default; - struct Qdisc *qdisc; - - qdisc = dev_queue->qdisc; - if (qdisc) { - spin_lock_bh(qdisc_lock(qdisc)); - - if (!(qdisc->flags & TCQ_F_BUILTIN)) - set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state); - - rcu_assign_pointer(dev_queue->qdisc, qdisc_default); - qdisc_reset(qdisc); - - spin_unlock_bh(qdisc_lock(qdisc)); - } -} - -static bool some_qdisc_is_busy(struct net_device *dev) -{ - unsigned int i; - - for (i = 0; i < dev->num_tx_queues; i++) { - struct netdev_queue *dev_queue; - spinlock_t *root_lock; - struct Qdisc *q; - int val; - - dev_queue = netdev_get_tx_queue(dev, i); - q = dev_queue->qdisc_sleeping; - root_lock = qdisc_lock(q); - - spin_lock_bh(root_lock); - - val = (test_bit(__QDISC_STATE_RUNNING, &q->state) || - test_bit(__QDISC_STATE_SCHED, &q->state)); - - spin_unlock_bh(root_lock); - - if (val) - return true; - } - return false; -} - -void dev_deactivate(struct net_device *dev) -{ - netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc); - dev_deactivate_queue(dev, &dev->rx_queue, &noop_qdisc); - - dev_watchdog_down(dev); - -#ifndef DDE_LINUX - /* Wait for outstanding qdisc-less dev_queue_xmit calls. */ - synchronize_rcu(); -#endif - - /* Wait for outstanding qdisc_run calls. */ - while (some_qdisc_is_busy(dev)) - yield(); -} - -static void dev_init_scheduler_queue(struct net_device *dev, - struct netdev_queue *dev_queue, - void *_qdisc) -{ - struct Qdisc *qdisc = _qdisc; - - dev_queue->qdisc = qdisc; - dev_queue->qdisc_sleeping = qdisc; -} - -void dev_init_scheduler(struct net_device *dev) -{ - netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc); - dev_init_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc); - - setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev); -} - -static void shutdown_scheduler_queue(struct net_device *dev, - struct netdev_queue *dev_queue, - void *_qdisc_default) -{ - struct Qdisc *qdisc = dev_queue->qdisc_sleeping; - struct Qdisc *qdisc_default = _qdisc_default; - - if (qdisc) { - rcu_assign_pointer(dev_queue->qdisc, qdisc_default); - dev_queue->qdisc_sleeping = qdisc_default; - - qdisc_destroy(qdisc); - } -} - -void dev_shutdown(struct net_device *dev) -{ - netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc); - shutdown_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc); - WARN_ON(timer_pending(&dev->watchdog_timer)); -} |