From 9fd51e9b0ad33a89a83fdbbb66bd20d85f7893fb Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Fri, 4 Feb 2000 03:21:18 +0000 Subject: Import of Linux 2.2.12 subset (ipv4 stack and related) --- pfinet/linux-src/net/ipv4/ip_fw.c | 1759 +++++++++++++++++++++++++++++++++++++ 1 file changed, 1759 insertions(+) create mode 100644 pfinet/linux-src/net/ipv4/ip_fw.c (limited to 'pfinet/linux-src/net/ipv4/ip_fw.c') diff --git a/pfinet/linux-src/net/ipv4/ip_fw.c b/pfinet/linux-src/net/ipv4/ip_fw.c new file mode 100644 index 00000000..99a91d53 --- /dev/null +++ b/pfinet/linux-src/net/ipv4/ip_fw.c @@ -0,0 +1,1759 @@ +/* + * This code is heavily based on the code on the old ip_fw.c code; see below for + * copyrights and attributions of the old code. This code is basically GPL. + * + * 15-Aug-1997: Major changes to allow graphs for firewall rules. + * Paul Russell and + * Michael Neuling + * 24-Aug-1997: Generalised protocol handling (not just TCP/UDP/ICMP). + * Added explicit RETURN from chains. + * Removed TOS mangling (done in ipchains 1.0.1). + * Fixed read & reset bug by reworking proc handling. + * Paul Russell + * 28-Sep-1997: Added packet marking for net sched code. + * Removed fw_via comparisons: all done on device name now, + * similar to changes in ip_fw.c in DaveM's CVS970924 tree. + * Paul Russell + * 2-Nov-1997: Moved types across to __u16, etc. + * Added inverse flags. + * Fixed fragment bug (in args to port_match). + * Changed mark to only one flag (MARKABS). + * 21-Nov-1997: Added ability to test ICMP code. + * 19-Jan-1998: Added wildcard interfaces. + * 6-Feb-1998: Merged 2.0 and 2.1 versions. + * Initialised ip_masq for 2.0.x version. + * Added explicit NETLINK option for 2.1.x version. + * Added packet and byte counters for policy matches. + * 26-Feb-1998: Fixed race conditions, added SMP support. + * 18-Mar-1998: Fix SMP, fix race condition fix. + * 1-May-1998: Remove caching of device pointer. + * 12-May-1998: Allow tiny fragment case for TCP/UDP. + * 15-May-1998: Treat short packets as fragments, don't just block. + * 3-Jan-1999: Fixed serious procfs security hole -- users should never + * be allowed to view the chains! + * Marc Santoro + * 29-Jan-1999: Locally generated bogus IPs dealt with, rather than crash + * during dump_packet. --RR. + * 19-May-1999: Star Wars: The Phantom Menace opened. Rule num + * printed in log (modified from Michael Hasenstein's patch). + * Added SYN in log message. --RR + * 23-Jul-1999: Fixed small fragment security exposure opened on 15-May-1998. + * John McDonald + * Thomas Lopatic + */ + +/* + * + * The origina Linux port was done Alan Cox, with changes/fixes from + * Pauline Middlelink, Jos Vos, Thomas Quinot, Wouter Gadeyne, Juan + * Jose Ciarlante, Bernd Eckenfels, Keith Owens and others. + * + * Copyright from the original FreeBSD version follows: + * + * Copyright (c) 1993 Daniel Boulet + * Copyright (c) 1994 Ugen J.S.Antsilevich + * + * Redistribution and use in source forms, with and without modification, + * are permitted provided that this entire comment appears intact. + * + * Redistribution in binary form may occur without any restrictions. + * Obviously, it would be nice if you gave credit where credit is due + * but requiring it would be too onerous. + * + * This software is provided ``AS IS'' without any warranties of any kind. */ + + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_IP_MASQUERADE +#include +#endif + +#include +#include +#include + +/* Understanding locking in this code: (thanks to Alan Cox for using + * little words to explain this to me). -- PR + * + * In UP, there can be two packets traversing the chains: + * 1) A packet from the current userspace context + * 2) A packet off the bh handlers (timer or net). + * + * For SMP (kernel v2.1+), multiply this by # CPUs. + * + * [Note that this in not correct for 2.2 - because the socket code always + * uses lock_kernel() to serialize, and bottom halves (timers and net_bhs) + * only run on one CPU at a time. This will probably change for 2.3. + * It is still good to use spinlocks because that avoids the global cli() + * for updating the tables, which is rather costly in SMP kernels -AK] + * + * This means counters and backchains can get corrupted if no precautions + * are taken. + * + * To actually alter a chain on UP, we need only do a cli(), as this will + * stop a bh handler firing, as we are in the current userspace context + * (coming from a setsockopt()). + * + * On SMP, we need a write_lock_irqsave(), which is a simple cli() in + * UP. + * + * For backchains and counters, we use an array, indexed by + * [cpu_number_map[smp_processor_id()]*2 + !in_interrupt()]; the array is of + * size [smp_num_cpus*2]. For v2.0, smp_num_cpus is effectively 1. So, + * confident of uniqueness, we modify counters even though we only + * have a read lock (to read the counters, you need a write lock, + * though). */ + +/* Why I didn't use straight locking... -- PR + * + * The backchains can be separated out of the ip_chains structure, and + * allocated as needed inside ip_fw_check(). + * + * The counters, however, can't. Trying to lock these means blocking + * interrupts every time we want to access them. This would suck HARD + * performance-wise. Not locking them leads to possible corruption, + * made worse on 32-bit machines (counters are 64-bit). */ + +/*#define DEBUG_IP_FIREWALL*/ +/*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */ +/*#define DEBUG_IP_FIREWALL_USER*/ +/*#define DEBUG_IP_FIREWALL_LOCKING*/ + +#ifdef CONFIG_IP_FIREWALL_NETLINK +static struct sock *ipfwsk; +#endif + +#ifdef __SMP__ +#define SLOT_NUMBER() (cpu_number_map[smp_processor_id()]*2 + !in_interrupt()) +#else +#define SLOT_NUMBER() (!in_interrupt()) +#endif +#define NUM_SLOTS (smp_num_cpus*2) + +#define SIZEOF_STRUCT_IP_CHAIN (sizeof(struct ip_chain) \ + + NUM_SLOTS*sizeof(struct ip_reent)) +#define SIZEOF_STRUCT_IP_FW_KERNEL (sizeof(struct ip_fwkernel) \ + + NUM_SLOTS*sizeof(struct ip_counters)) + +#ifdef DEBUG_IP_FIREWALL_LOCKING +static unsigned int fwc_rlocks, fwc_wlocks; +#define FWC_DEBUG_LOCK(d) \ +do { \ + FWC_DONT_HAVE_LOCK(d); \ + d |= (1 << SLOT_NUMBER()); \ +} while (0) + +#define FWC_DEBUG_UNLOCK(d) \ +do { \ + FWC_HAVE_LOCK(d); \ + d &= ~(1 << SLOT_NUMBER()); \ +} while (0) + +#define FWC_DONT_HAVE_LOCK(d) \ +do { \ + if ((d) & (1 << SLOT_NUMBER())) \ + printk("%s:%i: Got lock on %i already!\n", \ + __FILE__, __LINE__, SLOT_NUMBER()); \ +} while(0) + +#define FWC_HAVE_LOCK(d) \ +do { \ + if (!((d) & (1 << SLOT_NUMBER()))) \ + printk("%s:%i:No lock on %i!\n", \ + __FILE__, __LINE__, SLOT_NUMBER()); \ +} while (0) + +#else +#define FWC_DEBUG_LOCK(d) do { } while(0) +#define FWC_DEBUG_UNLOCK(d) do { } while(0) +#define FWC_DONT_HAVE_LOCK(d) do { } while(0) +#define FWC_HAVE_LOCK(d) do { } while(0) +#endif /*DEBUG_IP_FIRWALL_LOCKING*/ + +#define FWC_READ_LOCK(l) do { FWC_DEBUG_LOCK(fwc_rlocks); read_lock(l); } while (0) +#define FWC_WRITE_LOCK(l) do { FWC_DEBUG_LOCK(fwc_wlocks); write_lock(l); } while (0) +#define FWC_READ_LOCK_IRQ(l,f) do { FWC_DEBUG_LOCK(fwc_rlocks); read_lock_irqsave(l,f); } while (0) +#define FWC_WRITE_LOCK_IRQ(l,f) do { FWC_DEBUG_LOCK(fwc_wlocks); write_lock_irqsave(l,f); } while (0) +#define FWC_READ_UNLOCK(l) do { FWC_DEBUG_UNLOCK(fwc_rlocks); read_unlock(l); } while (0) +#define FWC_WRITE_UNLOCK(l) do { FWC_DEBUG_UNLOCK(fwc_wlocks); write_unlock(l); } while (0) +#define FWC_READ_UNLOCK_IRQ(l,f) do { FWC_DEBUG_UNLOCK(fwc_rlocks); read_unlock_irqrestore(l,f); } while (0) +#define FWC_WRITE_UNLOCK_IRQ(l,f) do { FWC_DEBUG_UNLOCK(fwc_wlocks); write_unlock_irqrestore(l,f); } while (0) + +struct ip_chain; + +struct ip_counters +{ + __u64 pcnt, bcnt; /* Packet and byte counters */ +}; + +struct ip_fwkernel +{ + struct ip_fw ipfw; + struct ip_fwkernel *next; /* where to go next if current + * rule doesn't match */ + struct ip_chain *branch; /* which branch to jump to if + * current rule matches */ + int simplebranch; /* Use this if branch == NULL */ + struct ip_counters counters[0]; /* Actually several of these */ +}; + +struct ip_reent +{ + struct ip_chain *prevchain; /* Pointer to referencing chain */ + struct ip_fwkernel *prevrule; /* Pointer to referencing rule */ + struct ip_counters counters; +}; + +struct ip_chain +{ + ip_chainlabel label; /* Defines the label for each block */ + struct ip_chain *next; /* Pointer to next block */ + struct ip_fwkernel *chain; /* Pointer to first rule in block */ + __u32 refcount; /* Number of refernces to block */ + int policy; /* Default rule for chain. Only * + * used in built in chains */ + struct ip_reent reent[0]; /* Actually several of these */ +}; + +/* + * Implement IP packet firewall + */ + +#ifdef DEBUG_IP_FIREWALL +#define dprintf(format, args...) printk(format , ## args) +#else +#define dprintf(format, args...) +#endif + +#ifdef DEBUG_IP_FIREWALL_USER +#define duprintf(format, args...) printk(format , ## args) +#else +#define duprintf(format, args...) +#endif + +/* Lock around ip_fw_chains linked list structure */ +rwlock_t ip_fw_lock = RW_LOCK_UNLOCKED; + +/* Head of linked list of fw rules */ +static struct ip_chain *ip_fw_chains; + +#define IP_FW_INPUT_CHAIN ip_fw_chains +#define IP_FW_FORWARD_CHAIN (ip_fw_chains->next) +#define IP_FW_OUTPUT_CHAIN (ip_fw_chains->next->next) + +/* Returns 1 if the port is matched by the range, 0 otherwise */ +extern inline int port_match(__u16 min, __u16 max, __u16 port, + int frag, int invert) +{ + if (frag) /* Fragments fail ANY port test. */ + return (min == 0 && max == 0xFFFF); + else return (port >= min && port <= max) ^ invert; +} + +/* Returns whether matches rule or not. */ +static int ip_rule_match(struct ip_fwkernel *f, + const char *ifname, + struct iphdr *ip, + char tcpsyn, + __u16 src_port, __u16 dst_port, + char isfrag) +{ +#define FWINV(bool,invflg) ((bool) ^ !!(f->ipfw.fw_invflg & invflg)) + /* + * This is a bit simpler as we don't have to walk + * an interface chain as you do in BSD - same logic + * however. + */ + + if (FWINV((ip->saddr&f->ipfw.fw_smsk.s_addr) != f->ipfw.fw_src.s_addr, + IP_FW_INV_SRCIP) + || FWINV((ip->daddr&f->ipfw.fw_dmsk.s_addr)!=f->ipfw.fw_dst.s_addr, + IP_FW_INV_DSTIP)) { + dprintf("Source or dest mismatch.\n"); + + dprintf("SRC: %u. Mask: %u. Target: %u.%s\n", ip->saddr, + f->ipfw.fw_smsk.s_addr, f->ipfw.fw_src.s_addr, + f->ipfw.fw_invflg & IP_FW_INV_SRCIP ? " (INV)" : ""); + dprintf("DST: %u. Mask: %u. Target: %u.%s\n", ip->daddr, + f->ipfw.fw_dmsk.s_addr, f->ipfw.fw_dst.s_addr, + f->ipfw.fw_invflg & IP_FW_INV_DSTIP ? " (INV)" : ""); + return 0; + } + + /* + * Look for a VIA device match + */ + if (f->ipfw.fw_flg & IP_FW_F_WILDIF) { + if (FWINV(strncmp(ifname, f->ipfw.fw_vianame, + strlen(f->ipfw.fw_vianame)) != 0, + IP_FW_INV_VIA)) { + dprintf("Wildcard interface mismatch.%s\n", + f->ipfw.fw_invflg & IP_FW_INV_VIA ? " (INV)" : ""); + return 0; /* Mismatch */ + } + } + else if (FWINV(strcmp(ifname, f->ipfw.fw_vianame) != 0, + IP_FW_INV_VIA)) { + dprintf("Interface name does not match.%s\n", + f->ipfw.fw_invflg & IP_FW_INV_VIA + ? " (INV)" : ""); + return 0; /* Mismatch */ + } + + /* + * Ok the chain addresses match. + */ + + /* If we have a fragment rule but the packet is not a fragment + * the we return zero */ + if (FWINV((f->ipfw.fw_flg&IP_FW_F_FRAG) && !isfrag, IP_FW_INV_FRAG)) { + dprintf("Fragment rule but not fragment.%s\n", + f->ipfw.fw_invflg & IP_FW_INV_FRAG ? " (INV)" : ""); + return 0; + } + + /* Fragment NEVER passes a SYN test, even an inverted one. */ + if (FWINV((f->ipfw.fw_flg&IP_FW_F_TCPSYN) && !tcpsyn, IP_FW_INV_SYN) + || (isfrag && (f->ipfw.fw_flg&IP_FW_F_TCPSYN))) { + dprintf("Rule requires SYN and packet has no SYN.%s\n", + f->ipfw.fw_invflg & IP_FW_INV_SYN ? " (INV)" : ""); + return 0; + } + + if (f->ipfw.fw_proto) { + /* + * Specific firewall - packet's protocol + * must match firewall's. + */ + + if (FWINV(ip->protocol!=f->ipfw.fw_proto, IP_FW_INV_PROTO)) { + dprintf("Packet protocol %hi does not match %hi.%s\n", + ip->protocol, f->ipfw.fw_proto, + f->ipfw.fw_invflg&IP_FW_INV_PROTO ? " (INV)":""); + return 0; + } + + /* For non TCP/UDP/ICMP, port range is max anyway. */ + if (!port_match(f->ipfw.fw_spts[0], + f->ipfw.fw_spts[1], + src_port, isfrag, + !!(f->ipfw.fw_invflg&IP_FW_INV_SRCPT)) + || !port_match(f->ipfw.fw_dpts[0], + f->ipfw.fw_dpts[1], + dst_port, isfrag, + !!(f->ipfw.fw_invflg + &IP_FW_INV_DSTPT))) { + dprintf("Port match failed.\n"); + return 0; + } + } + + dprintf("Match succeeded.\n"); + return 1; +} + +static const char *branchname(struct ip_chain *branch,int simplebranch) +{ + if (branch) + return branch->label; + switch (simplebranch) + { + case FW_BLOCK: return IP_FW_LABEL_BLOCK; + case FW_ACCEPT: return IP_FW_LABEL_ACCEPT; + case FW_REJECT: return IP_FW_LABEL_REJECT; + case FW_REDIRECT: return IP_FW_LABEL_REDIRECT; + case FW_MASQUERADE: return IP_FW_LABEL_MASQUERADE; + case FW_SKIP: return "-"; + case FW_SKIP+1: return IP_FW_LABEL_RETURN; + default: + return "UNKNOWN"; + } +} + +/* + * VERY ugly piece of code which actually + * makes kernel printf for matching packets... + */ +static void dump_packet(const struct iphdr *ip, + const char *ifname, + struct ip_fwkernel *f, + const ip_chainlabel chainlabel, + __u16 src_port, + __u16 dst_port, + unsigned int count, + int syn) +{ + __u32 *opt = (__u32 *) (ip + 1); + int opti; + + if (f) + { + printk(KERN_INFO "Packet log: %s ",chainlabel); + + printk("%s ",branchname(f->branch,f->simplebranch)); + if (f->simplebranch==FW_REDIRECT) + printk("%d ",f->ipfw.fw_redirpt); + } + + printk("%s PROTO=%d %ld.%ld.%ld.%ld:%hu %ld.%ld.%ld.%ld:%hu" + " L=%hu S=0x%2.2hX I=%hu F=0x%4.4hX T=%hu", + ifname, ip->protocol, + (ntohl(ip->saddr)>>24)&0xFF, + (ntohl(ip->saddr)>>16)&0xFF, + (ntohl(ip->saddr)>>8)&0xFF, + (ntohl(ip->saddr))&0xFF, + src_port, + (ntohl(ip->daddr)>>24)&0xFF, + (ntohl(ip->daddr)>>16)&0xFF, + (ntohl(ip->daddr)>>8)&0xFF, + (ntohl(ip->daddr))&0xFF, + dst_port, + ntohs(ip->tot_len), ip->tos, ntohs(ip->id), + ntohs(ip->frag_off), ip->ttl); + + for (opti = 0; opti < (ip->ihl - sizeof(struct iphdr) / 4); opti++) + printk(" O=0x%8.8X", *opt++); + printk(" %s(#%d)\n", syn ? "SYN " : /* "PENANCE" */ "", count); +} + +/* function for checking chain labels for user space. */ +static int check_label(ip_chainlabel label) +{ + unsigned int i; + /* strlen must be < IP_FW_MAX_LABEL_LENGTH. */ + for (i = 0; i < IP_FW_MAX_LABEL_LENGTH + 1; i++) + if (label[i] == '\0') return 1; + + return 0; +} + +/* This function returns a pointer to the first chain with a label + * that matches the one given. */ +static struct ip_chain *find_label(ip_chainlabel label) +{ + struct ip_chain *tmp; + FWC_HAVE_LOCK(fwc_rlocks | fwc_wlocks); + for (tmp = ip_fw_chains; tmp; tmp = tmp->next) + if (strcmp(tmp->label,label) == 0) + break; + return tmp; +} + +/* This function returns a boolean which when true sets answer to one + of the FW_*. */ +static int find_special(ip_chainlabel label, int *answer) +{ + if (label[0] == '\0') { + *answer = FW_SKIP; /* => pass-through rule */ + return 1; + } else if (strcmp(label,IP_FW_LABEL_ACCEPT) == 0) { + *answer = FW_ACCEPT; + return 1; + } else if (strcmp(label,IP_FW_LABEL_BLOCK) == 0) { + *answer = FW_BLOCK; + return 1; + } else if (strcmp(label,IP_FW_LABEL_REJECT) == 0) { + *answer = FW_REJECT; + return 1; +#ifdef CONFIG_IP_TRANSPARENT_PROXY + } else if (strcmp(label,IP_FW_LABEL_REDIRECT) == 0) { + *answer = FW_REDIRECT; + return 1; +#endif +#ifdef CONFIG_IP_MASQUERADE + } else if (strcmp(label,IP_FW_LABEL_MASQUERADE) == 0) { + *answer = FW_MASQUERADE; + return 1; +#endif + } else if (strcmp(label, IP_FW_LABEL_RETURN) == 0) { + *answer = FW_SKIP+1; + return 1; + } else { + return 0; + } +} + +/* This function cleans up the prevchain and prevrule. If the verbose + * flag is set then he names of the chains will be printed as it + * cleans up. */ +static void cleanup(struct ip_chain *chain, + const int verbose, + unsigned int slot) +{ + struct ip_chain *tmpchain = chain->reent[slot].prevchain; + if (verbose) + printk(KERN_ERR "Chain backtrace: "); + while (tmpchain) { + if (verbose) + printk("%s<-",chain->label); + chain->reent[slot].prevchain = NULL; + chain = tmpchain; + tmpchain = chain->reent[slot].prevchain; + } + if (verbose) + printk("%s\n",chain->label); +} + +static inline int +ip_fw_domatch(struct ip_fwkernel *f, + struct iphdr *ip, + const char *rif, + const ip_chainlabel label, + struct sk_buff *skb, + unsigned int slot, + __u16 src_port, __u16 dst_port, + unsigned int count, + int tcpsyn) +{ + f->counters[slot].bcnt+=ntohs(ip->tot_len); + f->counters[slot].pcnt++; + if (f->ipfw.fw_flg & IP_FW_F_PRN) { + dump_packet(ip,rif,f,label,src_port,dst_port,count,tcpsyn); + } + ip->tos = (ip->tos & f->ipfw.fw_tosand) ^ f->ipfw.fw_tosxor; + +/* This functionality is useless in stock 2.0.x series, but we don't + * discard the mark thing altogether, to avoid breaking ipchains (and, + * more importantly, the ipfwadm wrapper) --PR */ + if (f->ipfw.fw_flg & IP_FW_F_MARKABS) + skb->fwmark = f->ipfw.fw_mark; + else + skb->fwmark+=f->ipfw.fw_mark; +#ifdef CONFIG_IP_FIREWALL_NETLINK + if (f->ipfw.fw_flg & IP_FW_F_NETLINK) { + size_t len = min(f->ipfw.fw_outputsize, ntohs(ip->tot_len)) + + sizeof(__u32) + sizeof(skb->fwmark) + IFNAMSIZ; + struct sk_buff *outskb=alloc_skb(len, GFP_ATOMIC); + + duprintf("Sending packet out NETLINK (length = %u).\n", + (unsigned int)len); + if (outskb) { + /* Prepend length, mark & interface */ + skb_put(outskb, len); + *((__u32 *)outskb->data) = (__u32)len; + *((__u32 *)(outskb->data+sizeof(__u32))) = skb->fwmark; + strcpy(outskb->data+sizeof(__u32)*2, rif); + memcpy(outskb->data+sizeof(__u32)*2+IFNAMSIZ, ip, + len-(sizeof(__u32)*2+IFNAMSIZ)); + netlink_broadcast(ipfwsk, outskb, 0, ~0, GFP_KERNEL); + } + else { + if (net_ratelimit()) + printk(KERN_WARNING "ip_fw: packet drop due to " + "netlink failure\n"); + return 0; + } + } +#endif + return 1; +} + +/* + * Returns one of the generic firewall policies, like FW_ACCEPT. + * + * The testing is either false for normal firewall mode or true for + * user checking mode (counters are not updated, TOS & mark not done). + */ +static int +ip_fw_check(struct iphdr *ip, + const char *rif, + __u16 *redirport, + struct ip_chain *chain, + struct sk_buff *skb, + unsigned int slot, + int testing) +{ + struct tcphdr *tcp=(struct tcphdr *)((__u32 *)ip+ip->ihl); + struct udphdr *udp=(struct udphdr *)((__u32 *)ip+ip->ihl); + struct icmphdr *icmp=(struct icmphdr *)((__u32 *)ip+ip->ihl); + __u32 src, dst; + __u16 src_port = 0xFFFF, dst_port = 0xFFFF; + char tcpsyn=0; + __u16 offset; + unsigned char oldtos; + struct ip_fwkernel *f; + int ret = FW_SKIP+2; + unsigned int count; + + /* We handle fragments by dealing with the first fragment as + * if it was a normal packet. All other fragments are treated + * normally, except that they will NEVER match rules that ask + * things we don't know, ie. tcp syn flag or ports). If the + * rule is also a fragment-specific rule, non-fragments won't + * match it. */ + + offset = ntohs(ip->frag_off) & IP_OFFSET; + + /* + * Don't allow a fragment of TCP 8 bytes in. Nobody + * normal causes this. Its a cracker trying to break + * in by doing a flag overwrite to pass the direction + * checks. + */ + + if (offset == 1 && ip->protocol == IPPROTO_TCP) { + if (!testing && net_ratelimit()) { + printk("Suspect TCP fragment.\n"); + dump_packet(ip,rif,NULL,NULL,0,0,0,0); + } + return FW_BLOCK; + } + + /* If we can't investigate ports, treat as fragment. It's + * either a trucated whole packet, or a truncated first + * fragment, or a TCP first fragment of length 8-15, in which + * case the above rule stops reassembly. + */ + if (offset == 0) { + unsigned int size_req; + switch (ip->protocol) { + case IPPROTO_TCP: + /* Don't care about things past flags word */ + size_req = 16; + break; + + case IPPROTO_UDP: + case IPPROTO_ICMP: + size_req = 8; + break; + + default: + size_req = 0; + } + offset = (ntohs(ip->tot_len) < (ip->ihl<<2)+size_req); + + /* If it is a truncated first fragment then it can be + * used to rewrite port information, and thus should + * be blocked. + */ + if (offset && (ntohs(ip->frag_off) & IP_MF)) { + if (!testing && net_ratelimit()) { + printk("Suspect short first fragment.\n"); + dump_packet(ip,rif,NULL,NULL,0,0,0,0); + } + return FW_BLOCK; + } + } + + src = ip->saddr; + dst = ip->daddr; + oldtos = ip->tos; + + /* + * If we got interface from which packet came + * we can use the address directly. Linux 2.1 now uses address + * chains per device too, but unlike BSD we first check if the + * incoming packet matches a device address and the routing + * table before calling the firewall. + */ + + dprintf("Packet "); + switch(ip->protocol) + { + case IPPROTO_TCP: + dprintf("TCP "); + if (!offset) { + src_port=ntohs(tcp->source); + dst_port=ntohs(tcp->dest); + + /* Connection initilisation can only + * be made when the syn bit is set and + * neither of the ack or reset is + * set. */ + if(tcp->syn && !(tcp->ack || tcp->rst)) + tcpsyn=1; + } + break; + case IPPROTO_UDP: + dprintf("UDP "); + if (!offset) { + src_port=ntohs(udp->source); + dst_port=ntohs(udp->dest); + } + break; + case IPPROTO_ICMP: + if (!offset) { + src_port=(__u16)icmp->type; + dst_port=(__u16)icmp->code; + } + dprintf("ICMP "); + break; + default: + dprintf("p=%d ",ip->protocol); + break; + } +#ifdef DEBUG_IP_FIREWALL + print_ip(ip->saddr); + + if (offset) + dprintf(":fragment (%i) ", ((int)offset)<<2); + else if (ip->protocol==IPPROTO_TCP || ip->protocol==IPPROTO_UDP + || ip->protocol==IPPROTO_ICMP) + dprintf(":%hu:%hu", src_port, dst_port); + dprintf("\n"); +#endif + + if (!testing) FWC_READ_LOCK(&ip_fw_lock); + else FWC_HAVE_LOCK(fwc_rlocks); + + f = chain->chain; + do { + count = 0; + for (; f; f = f->next) { + count++; + if (ip_rule_match(f,rif,ip, + tcpsyn,src_port,dst_port,offset)) { + if (!testing + && !ip_fw_domatch(f, ip, rif, chain->label, + skb, slot, + src_port, dst_port, + count, tcpsyn)) { + ret = FW_BLOCK; + goto out; + } + break; + } + } + if (f) { + if (f->branch) { + /* Do sanity check to see if we have + * already set prevchain and if so we + * must be in a loop */ + if (f->branch->reent[slot].prevchain) { + if (!testing) { + printk(KERN_ERR + "IP firewall: " + "Loop detected " + "at `%s'.\n", + f->branch->label); + cleanup(chain, 1, slot); + ret = FW_BLOCK; + } else { + cleanup(chain, 0, slot); + ret = FW_SKIP+1; + } + } + else { + f->branch->reent[slot].prevchain + = chain; + f->branch->reent[slot].prevrule + = f->next; + chain = f->branch; + f = chain->chain; + } + } + else if (f->simplebranch == FW_SKIP) + f = f->next; + else if (f->simplebranch == FW_SKIP+1) { + /* Just like falling off the chain */ + goto fall_off_chain; + } + else { + cleanup(chain, 0, slot); + ret = f->simplebranch; + } + } /* f == NULL */ + else { + fall_off_chain: + if (chain->reent[slot].prevchain) { + struct ip_chain *tmp = chain; + f = chain->reent[slot].prevrule; + chain = chain->reent[slot].prevchain; + tmp->reent[slot].prevchain = NULL; + } + else { + ret = chain->policy; + if (!testing) { + chain->reent[slot].counters.pcnt++; + chain->reent[slot].counters.bcnt + += ntohs(ip->tot_len); + } + } + } + } while (ret == FW_SKIP+2); + + out: + if (!testing) FWC_READ_UNLOCK(&ip_fw_lock); + + /* Recalculate checksum if not going to reject, and TOS changed. */ + if (ip->tos != oldtos + && ret != FW_REJECT && ret != FW_BLOCK + && !testing) + ip_send_check(ip); + +#ifdef CONFIG_IP_TRANSPARENT_PROXY + if (ret == FW_REDIRECT && redirport) { + if ((*redirport = htons(f->ipfw.fw_redirpt)) == 0) { + /* Wildcard redirection. + * Note that redirport will become + * 0xFFFF for non-TCP/UDP packets. + */ + *redirport = htons(dst_port); + } + } +#endif + +#ifdef DEBUG_ALLOW_ALL + return (testing ? ret : FW_ACCEPT); +#else + return ret; +#endif +} + +/* Must have write lock & interrupts off for any of these */ + +/* This function sets all the byte counters in a chain to zero. The + * input is a pointer to the chain required for zeroing */ +static int zero_fw_chain(struct ip_chain *chainptr) +{ + struct ip_fwkernel *i; + + FWC_HAVE_LOCK(fwc_wlocks); + for (i = chainptr->chain; i; i = i->next) + memset(i->counters, 0, sizeof(struct ip_counters)*NUM_SLOTS); + return 0; +} + +static int clear_fw_chain(struct ip_chain *chainptr) +{ + struct ip_fwkernel *i= chainptr->chain; + + FWC_HAVE_LOCK(fwc_wlocks); + chainptr->chain=NULL; + + while (i) { + struct ip_fwkernel *tmp = i->next; + if (i->branch) + i->branch->refcount--; + kfree(i); + i = tmp; + } + return 0; +} + +static int replace_in_chain(struct ip_chain *chainptr, + struct ip_fwkernel *frwl, + __u32 position) +{ + struct ip_fwkernel *f = chainptr->chain; + + FWC_HAVE_LOCK(fwc_wlocks); + + while (--position && f != NULL) f = f->next; + if (f == NULL) + return EINVAL; + + if (f->branch) f->branch->refcount--; + if (frwl->branch) frwl->branch->refcount++; + + frwl->next = f->next; + memcpy(f,frwl,sizeof(struct ip_fwkernel)); + kfree(frwl); + return 0; +} + +static int append_to_chain(struct ip_chain *chainptr, struct ip_fwkernel *rule) +{ + struct ip_fwkernel *i; + + FWC_HAVE_LOCK(fwc_wlocks); + /* Special case if no rules already present */ + if (chainptr->chain == NULL) { + + /* If pointer writes are atomic then turning off + * interupts is not necessary. */ + chainptr->chain = rule; + if (rule->branch) rule->branch->refcount++; + return 0; + } + + /* Find the rule before the end of the chain */ + for (i = chainptr->chain; i->next; i = i->next); + i->next = rule; + if (rule->branch) rule->branch->refcount++; + return 0; +} + +/* This function inserts a rule at the position of position in the + * chain refenced by chainptr. If position is 1 then this rule will + * become the new rule one. */ +static int insert_in_chain(struct ip_chain *chainptr, + struct ip_fwkernel *frwl, + __u32 position) +{ + struct ip_fwkernel *f = chainptr->chain; + + FWC_HAVE_LOCK(fwc_wlocks); + /* special case if the position is number 1 */ + if (position == 1) { + frwl->next = chainptr->chain; + if (frwl->branch) frwl->branch->refcount++; + chainptr->chain = frwl; + return 0; + } + position--; + while (--position && f != NULL) f = f->next; + if (f == NULL) + return EINVAL; + if (frwl->branch) frwl->branch->refcount++; + frwl->next = f->next; + + f->next = frwl; + return 0; +} + +/* This function deletes the a rule from a given rulenum and chain. + * With rulenum = 1 is the first rule is deleted. */ + +static int del_num_from_chain(struct ip_chain *chainptr, __u32 rulenum) +{ + struct ip_fwkernel *i=chainptr->chain,*tmp; + + FWC_HAVE_LOCK(fwc_wlocks); + + if (!chainptr->chain) + return ENOENT; + + /* Need a special case for the first rule */ + if (rulenum == 1) { + /* store temp to allow for freeing up of memory */ + tmp = chainptr->chain; + if (chainptr->chain->branch) chainptr->chain->branch->refcount--; + chainptr->chain = chainptr->chain->next; + kfree(tmp); /* free memory that is now unused */ + } else { + rulenum--; + while (--rulenum && i->next ) i = i->next; + if (!i->next) + return ENOENT; + tmp = i->next; + if (i->next->branch) + i->next->branch->refcount--; + i->next = i->next->next; + kfree(tmp); + } + return 0; +} + + +/* This function deletes the a rule from a given rule and chain. + * The rule that is deleted is the first occursance of that rule. */ +static int del_rule_from_chain(struct ip_chain *chainptr, + struct ip_fwkernel *frwl) +{ + struct ip_fwkernel *ltmp,*ftmp = chainptr->chain ; + int was_found; + + FWC_HAVE_LOCK(fwc_wlocks); + + /* Sure, we should compare marks, but since the `ipfwadm' + * script uses it for an unholy hack... well, life is easier + * this way. We also mask it out of the flags word. --PR */ + for (ltmp=NULL, was_found=0; + !was_found && ftmp != NULL; + ltmp = ftmp,ftmp = ftmp->next) { + if (ftmp->ipfw.fw_src.s_addr!=frwl->ipfw.fw_src.s_addr + || ftmp->ipfw.fw_dst.s_addr!=frwl->ipfw.fw_dst.s_addr + || ftmp->ipfw.fw_smsk.s_addr!=frwl->ipfw.fw_smsk.s_addr + || ftmp->ipfw.fw_dmsk.s_addr!=frwl->ipfw.fw_dmsk.s_addr +#if 0 + || ftmp->ipfw.fw_flg!=frwl->ipfw.fw_flg +#else + || ((ftmp->ipfw.fw_flg & ~IP_FW_F_MARKABS) + != (frwl->ipfw.fw_flg & ~IP_FW_F_MARKABS)) +#endif + || ftmp->ipfw.fw_invflg!=frwl->ipfw.fw_invflg + || ftmp->ipfw.fw_proto!=frwl->ipfw.fw_proto +#if 0 + || ftmp->ipfw.fw_mark!=frwl->ipfw.fw_mark +#endif + || ftmp->ipfw.fw_redirpt!=frwl->ipfw.fw_redirpt + || ftmp->ipfw.fw_spts[0]!=frwl->ipfw.fw_spts[0] + || ftmp->ipfw.fw_spts[1]!=frwl->ipfw.fw_spts[1] + || ftmp->ipfw.fw_dpts[0]!=frwl->ipfw.fw_dpts[0] + || ftmp->ipfw.fw_dpts[1]!=frwl->ipfw.fw_dpts[1] + || ftmp->ipfw.fw_outputsize!=frwl->ipfw.fw_outputsize) { + duprintf("del_rule_from_chain: mismatch:" + "src:%u/%u dst:%u/%u smsk:%u/%u dmsk:%u/%u " + "flg:%hX/%hX invflg:%hX/%hX proto:%u/%u " + "mark:%u/%u " + "ports:%hu-%hu/%hu-%hu %hu-%hu/%hu-%hu " + "outputsize:%hu-%hu\n", + ftmp->ipfw.fw_src.s_addr, + frwl->ipfw.fw_src.s_addr, + ftmp->ipfw.fw_dst.s_addr, + frwl->ipfw.fw_dst.s_addr, + ftmp->ipfw.fw_smsk.s_addr, + frwl->ipfw.fw_smsk.s_addr, + ftmp->ipfw.fw_dmsk.s_addr, + frwl->ipfw.fw_dmsk.s_addr, + ftmp->ipfw.fw_flg, + frwl->ipfw.fw_flg, + ftmp->ipfw.fw_invflg, + frwl->ipfw.fw_invflg, + ftmp->ipfw.fw_proto, + frwl->ipfw.fw_proto, + ftmp->ipfw.fw_mark, + frwl->ipfw.fw_mark, + ftmp->ipfw.fw_spts[0], + frwl->ipfw.fw_spts[0], + ftmp->ipfw.fw_spts[1], + frwl->ipfw.fw_spts[1], + ftmp->ipfw.fw_dpts[0], + frwl->ipfw.fw_dpts[0], + ftmp->ipfw.fw_dpts[1], + frwl->ipfw.fw_dpts[1], + ftmp->ipfw.fw_outputsize, + frwl->ipfw.fw_outputsize); + continue; + } + + if (strncmp(ftmp->ipfw.fw_vianame, + frwl->ipfw.fw_vianame, + IFNAMSIZ)) { + duprintf("del_rule_from_chain: if mismatch: %s/%s\n", + ftmp->ipfw.fw_vianame, + frwl->ipfw.fw_vianame); + continue; + } + if (ftmp->branch != frwl->branch) { + duprintf("del_rule_from_chain: branch mismatch: " + "%s/%s\n", + ftmp->branch?ftmp->branch->label:"(null)", + frwl->branch?frwl->branch->label:"(null)"); + continue; + } + if (ftmp->branch == NULL + && ftmp->simplebranch != frwl->simplebranch) { + duprintf("del_rule_from_chain: simplebranch mismatch: " + "%i/%i\n", + ftmp->simplebranch, frwl->simplebranch); + continue; + } + was_found = 1; + if (ftmp->branch) + ftmp->branch->refcount--; + if (ltmp) + ltmp->next = ftmp->next; + else + chainptr->chain = ftmp->next; + kfree(ftmp); + break; + } + + if (was_found) + return 0; + else { + duprintf("del_rule_from_chain: no matching rule found\n"); + return EINVAL; + } +} + +/* This function takes the label of a chain and deletes the first + * chain with that name. No special cases required for the built in + * chains as they have their refcount initilised to 1 so that they are + * never deleted. */ +static int del_chain(ip_chainlabel label) +{ + struct ip_chain *tmp,*tmp2; + + FWC_HAVE_LOCK(fwc_wlocks); + /* Corner case: return EBUSY not ENOENT for first elem ("input") */ + if (strcmp(label, ip_fw_chains->label) == 0) + return EBUSY; + + for (tmp = ip_fw_chains; tmp->next; tmp = tmp->next) + if(strcmp(tmp->next->label,label) == 0) + break; + + tmp2 = tmp->next; + if (!tmp2) + return ENOENT; + + if (tmp2->refcount) + return EBUSY; + + if (tmp2->chain) + return ENOTEMPTY; + + tmp->next = tmp2->next; + kfree(tmp2); + return 0; +} + +/* This is a function to initilise a chain. Built in rules start with + * refcount = 1 so that they cannot be deleted. User defined rules + * start with refcount = 0 so they can be deleted. */ +static struct ip_chain *ip_init_chain(ip_chainlabel name, + __u32 ref, + int policy) +{ + unsigned int i; + struct ip_chain *label + = kmalloc(SIZEOF_STRUCT_IP_CHAIN, GFP_KERNEL); + if (label == NULL) + panic("Can't kmalloc for firewall chains.\n"); + strcpy(label->label,name); + label->next = NULL; + label->chain = NULL; + label->refcount = ref; + label->policy = policy; + for (i = 0; i < smp_num_cpus*2; i++) { + label->reent[i].counters.pcnt = label->reent[i].counters.bcnt + = 0; + label->reent[i].prevchain = NULL; + label->reent[i].prevrule = NULL; + } + + return label; +} + +/* This is a function for reating a new chain. The chains is not + * created if a chain of the same name already exists */ +static int create_chain(ip_chainlabel label) +{ + struct ip_chain *tmp; + + if (!check_label(label)) + return EINVAL; + + FWC_HAVE_LOCK(fwc_wlocks); + for (tmp = ip_fw_chains; tmp->next; tmp = tmp->next) + if (strcmp(tmp->label,label) == 0) + return EEXIST; + + if (strcmp(tmp->label,label) == 0) + return EEXIST; + + tmp->next = ip_init_chain(label, 0, FW_SKIP); /* refcount is + * zero since this is a + * user defined chain * + * and therefore can be + * deleted */ + return 0; +} + +/* This function simply changes the policy on one of the built in + * chains. checking must be done before this is call to ensure that + * chainptr is pointing to one of the three possible chains */ +static int change_policy(struct ip_chain *chainptr, int policy) +{ + FWC_HAVE_LOCK(fwc_wlocks); + chainptr->policy = policy; + return 0; +} + +/* This function takes an ip_fwuser and converts it to a ip_fwkernel. It also + * performs some checks in the structure. */ +static struct ip_fwkernel *convert_ipfw(struct ip_fwuser *fwuser, int *errno) +{ + struct ip_fwkernel *fwkern; + + if ( (fwuser->ipfw.fw_flg & ~IP_FW_F_MASK) != 0 ) { + duprintf("convert_ipfw: undefined flag bits set (flags=%x)\n", + fwuser->ipfw.fw_flg); + *errno = EINVAL; + return NULL; + } + +#ifdef DEBUG_IP_FIREWALL_USER + /* These are sanity checks that don't really matter. + * We can get rid of these once testing is complete. + */ + if ((fwuser->ipfw.fw_flg & IP_FW_F_TCPSYN) + && ((fwuser->ipfw.fw_invflg & IP_FW_INV_PROTO) + || fwuser->ipfw.fw_proto != IPPROTO_TCP)) { + duprintf("convert_ipfw: TCP SYN flag set but proto != TCP!\n"); + *errno = EINVAL; + return NULL; + } + + if (strcmp(fwuser->label, IP_FW_LABEL_REDIRECT) != 0 + && fwuser->ipfw.fw_redirpt != 0) { + duprintf("convert_ipfw: Target not REDIR but redirpt != 0!\n"); + *errno = EINVAL; + return NULL; + } + + if ((!(fwuser->ipfw.fw_flg & IP_FW_F_FRAG) + && (fwuser->ipfw.fw_invflg & IP_FW_INV_FRAG)) + || (!(fwuser->ipfw.fw_flg & IP_FW_F_TCPSYN) + && (fwuser->ipfw.fw_invflg & IP_FW_INV_SYN))) { + duprintf("convert_ipfw: Can't have INV flag if flag unset!\n"); + *errno = EINVAL; + return NULL; + } + + if (((fwuser->ipfw.fw_invflg & IP_FW_INV_SRCPT) + && fwuser->ipfw.fw_spts[0] == 0 + && fwuser->ipfw.fw_spts[1] == 0xFFFF) + || ((fwuser->ipfw.fw_invflg & IP_FW_INV_DSTPT) + && fwuser->ipfw.fw_dpts[0] == 0 + && fwuser->ipfw.fw_dpts[1] == 0xFFFF) + || ((fwuser->ipfw.fw_invflg & IP_FW_INV_VIA) + && (fwuser->ipfw.fw_vianame)[0] == '\0') + || ((fwuser->ipfw.fw_invflg & IP_FW_INV_SRCIP) + && fwuser->ipfw.fw_smsk.s_addr == 0) + || ((fwuser->ipfw.fw_invflg & IP_FW_INV_DSTIP) + && fwuser->ipfw.fw_dmsk.s_addr == 0)) { + duprintf("convert_ipfw: INV flag makes rule unmatchable!\n"); + *errno = EINVAL; + return NULL; + } + + if ((fwuser->ipfw.fw_flg & IP_FW_F_FRAG) + && !(fwuser->ipfw.fw_invflg & IP_FW_INV_FRAG) + && (fwuser->ipfw.fw_spts[0] != 0 + || fwuser->ipfw.fw_spts[1] != 0xFFFF + || fwuser->ipfw.fw_dpts[0] != 0 + || fwuser->ipfw.fw_dpts[1] != 0xFFFF + || (fwuser->ipfw.fw_flg & IP_FW_F_TCPSYN))) { + duprintf("convert_ipfw: Can't test ports or SYN with frag!\n"); + *errno = EINVAL; + return NULL; + } +#endif + + if ((fwuser->ipfw.fw_spts[0] != 0 + || fwuser->ipfw.fw_spts[1] != 0xFFFF + || fwuser->ipfw.fw_dpts[0] != 0 + || fwuser->ipfw.fw_dpts[1] != 0xFFFF) + && ((fwuser->ipfw.fw_invflg & IP_FW_INV_PROTO) + || (fwuser->ipfw.fw_proto != IPPROTO_TCP + && fwuser->ipfw.fw_proto != IPPROTO_UDP + && fwuser->ipfw.fw_proto != IPPROTO_ICMP))) { + duprintf("convert_ipfw: Can only test ports for TCP/UDP/ICMP!\n"); + *errno = EINVAL; + return NULL; + } + + fwkern = kmalloc(SIZEOF_STRUCT_IP_FW_KERNEL, GFP_KERNEL); + if (!fwkern) { + duprintf("convert_ipfw: kmalloc failed!\n"); + *errno = ENOMEM; + return NULL; + } + memcpy(&fwkern->ipfw,&fwuser->ipfw,sizeof(struct ip_fw)); + + if (!find_special(fwuser->label, &fwkern->simplebranch)) { + fwkern->branch = find_label(fwuser->label); + if (!fwkern->branch) { + duprintf("convert_ipfw: chain doesn't exist `%s'.\n", + fwuser->label); + kfree(fwkern); + *errno = ENOENT; + return NULL; + } else if (fwkern->branch == IP_FW_INPUT_CHAIN + || fwkern->branch == IP_FW_FORWARD_CHAIN + || fwkern->branch == IP_FW_OUTPUT_CHAIN) { + duprintf("convert_ipfw: Can't branch to builtin chain `%s'.\n", + fwuser->label); + kfree(fwkern); + *errno = ENOENT; + return NULL; + } + } else + fwkern->branch = NULL; + memset(fwkern->counters, 0, sizeof(struct ip_counters)*NUM_SLOTS); + + /* Handle empty vianame by making it a wildcard */ + if ((fwkern->ipfw.fw_vianame)[0] == '\0') + fwkern->ipfw.fw_flg |= IP_FW_F_WILDIF; + + fwkern->next = NULL; + return fwkern; +} + +int ip_fw_ctl(int cmd, void *m, int len) +{ + int ret; + struct ip_chain *chain; + unsigned long flags; + + FWC_WRITE_LOCK_IRQ(&ip_fw_lock, flags); + + switch (cmd) { + case IP_FW_FLUSH: + if (len != sizeof(ip_chainlabel) || !check_label(m)) + ret = EINVAL; + else if ((chain = find_label(m)) == NULL) + ret = ENOENT; + else ret = clear_fw_chain(chain); + break; + + case IP_FW_ZERO: + if (len != sizeof(ip_chainlabel) || !check_label(m)) + ret = EINVAL; + else if ((chain = find_label(m)) == NULL) + ret = ENOENT; + else ret = zero_fw_chain(chain); + break; + + case IP_FW_CHECK: { + struct ip_fwtest *new = m; + struct iphdr *ip; + + /* Don't need write lock. */ + FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags); + + if (len != sizeof(struct ip_fwtest) || !check_label(m)) + return EINVAL; + + /* Need readlock to do find_label */ + FWC_READ_LOCK(&ip_fw_lock); + + if ((chain = find_label(new->fwt_label)) == NULL) + ret = ENOENT; + else { + ip = &(new->fwt_packet.fwp_iph); + + if (ip->ihl != sizeof(struct iphdr) / sizeof(int)) { + duprintf("ip_fw_ctl: ip->ihl=%d, want %d\n", + ip->ihl, + sizeof(struct iphdr) / sizeof(int)); + ret = EINVAL; + } + else { + ret = ip_fw_check(ip, new->fwt_packet.fwp_vianame, + NULL, chain, + NULL, SLOT_NUMBER(), 1); + switch (ret) { + case FW_ACCEPT: + ret = 0; break; + case FW_REDIRECT: + ret = ECONNABORTED; break; + case FW_MASQUERADE: + ret = ECONNRESET; break; + case FW_REJECT: + ret = ECONNREFUSED; break; + /* Hack to help diag; these only get + returned when testing. */ + case FW_SKIP+1: + ret = ELOOP; break; + case FW_SKIP: + ret = ENFILE; break; + default: /* FW_BLOCK */ + ret = ETIMEDOUT; break; + } + } + } + FWC_READ_UNLOCK(&ip_fw_lock); + return ret; + } + + case IP_FW_MASQ_TIMEOUTS: { +#ifdef CONFIG_IP_MASQUERADE + ret = ip_fw_masq_timeouts(m, len); +#else + ret = EINVAL; +#endif + } + break; + + case IP_FW_REPLACE: { + struct ip_fwkernel *ip_fwkern; + struct ip_fwnew *new = m; + + if (len != sizeof(struct ip_fwnew) + || !check_label(new->fwn_label)) + ret = EINVAL; + else if ((chain = find_label(new->fwn_label)) == NULL) + ret = ENOENT; + else if ((ip_fwkern = convert_ipfw(&new->fwn_rule, &ret)) + != NULL) + ret = replace_in_chain(chain, ip_fwkern, + new->fwn_rulenum); + } + break; + + case IP_FW_APPEND: { + struct ip_fwchange *new = m; + struct ip_fwkernel *ip_fwkern; + + if (len != sizeof(struct ip_fwchange) + || !check_label(new->fwc_label)) + ret = EINVAL; + else if ((chain = find_label(new->fwc_label)) == NULL) + ret = ENOENT; + else if ((ip_fwkern = convert_ipfw(&new->fwc_rule, &ret)) + != NULL) + ret = append_to_chain(chain, ip_fwkern); + } + break; + + case IP_FW_INSERT: { + struct ip_fwkernel *ip_fwkern; + struct ip_fwnew *new = m; + + if (len != sizeof(struct ip_fwnew) + || !check_label(new->fwn_label)) + ret = EINVAL; + else if ((chain = find_label(new->fwn_label)) == NULL) + ret = ENOENT; + else if ((ip_fwkern = convert_ipfw(&new->fwn_rule, &ret)) + != NULL) + ret = insert_in_chain(chain, ip_fwkern, + new->fwn_rulenum); + } + break; + + case IP_FW_DELETE: { + struct ip_fwchange *new = m; + struct ip_fwkernel *ip_fwkern; + + if (len != sizeof(struct ip_fwchange) + || !check_label(new->fwc_label)) + ret = EINVAL; + else if ((chain = find_label(new->fwc_label)) == NULL) + ret = ENOENT; + else if ((ip_fwkern = convert_ipfw(&new->fwc_rule, &ret)) + != NULL) { + ret = del_rule_from_chain(chain, ip_fwkern); + kfree(ip_fwkern); + } + } + break; + + case IP_FW_DELETE_NUM: { + struct ip_fwdelnum *new = m; + + if (len != sizeof(struct ip_fwdelnum) + || !check_label(new->fwd_label)) + ret = EINVAL; + else if ((chain = find_label(new->fwd_label)) == NULL) + ret = ENOENT; + else ret = del_num_from_chain(chain, new->fwd_rulenum); + } + break; + + case IP_FW_CREATECHAIN: { + if (len != sizeof(ip_chainlabel)) { + duprintf("create_chain: bad size %i\n", len); + ret = EINVAL; + } + else ret = create_chain(m); + } + break; + + case IP_FW_DELETECHAIN: { + if (len != sizeof(ip_chainlabel)) { + duprintf("delete_chain: bad size %i\n", len); + ret = EINVAL; + } + else ret = del_chain(m); + } + break; + + case IP_FW_POLICY: { + struct ip_fwpolicy *new = m; + + if (len != sizeof(struct ip_fwpolicy) + || !check_label(new->fwp_label)) + ret = EINVAL; + else if ((chain = find_label(new->fwp_label)) == NULL) + ret = ENOENT; + else if (chain != IP_FW_INPUT_CHAIN + && chain != IP_FW_FORWARD_CHAIN + && chain != IP_FW_OUTPUT_CHAIN) { + duprintf("change_policy: can't change policy on user" + " defined chain.\n"); + ret = EINVAL; + } + else { + int pol = FW_SKIP; + find_special(new->fwp_policy, &pol); + + switch(pol) { + case FW_MASQUERADE: + if (chain != IP_FW_FORWARD_CHAIN) { + ret = EINVAL; + break; + } + /* Fall thru... */ + case FW_BLOCK: + case FW_ACCEPT: + case FW_REJECT: + ret = change_policy(chain, pol); + break; + default: + duprintf("change_policy: bad policy `%s'\n", + new->fwp_policy); + ret = EINVAL; + } + } + break; + + } + default: + duprintf("ip_fw_ctl: unknown request %d\n",cmd); + ret = EINVAL; + } + + FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags); + return ret; +} + +/* Returns bytes used - doesn't NUL terminate */ +static int dump_rule(char *buffer, + const char *chainlabel, + const struct ip_fwkernel *rule) +{ + int len; + unsigned int i; + __u64 packets = 0, bytes = 0; + + FWC_HAVE_LOCK(fwc_wlocks); + for (i = 0; i < NUM_SLOTS; i++) { + packets += rule->counters[i].pcnt; + bytes += rule->counters[i].bcnt; + } + + len=sprintf(buffer, + "%9s " /* Chain name */ + "%08lX/%08lX->%08lX/%08lX " /* Source & Destination IPs */ + "%.16s " /* Interface */ + "%X %X " /* fw_flg and fw_invflg fields */ + "%u " /* Protocol */ + "%-9u %-9u %-9u %-9u " /* Packet & byte counters */ + "%u-%u %u-%u " /* Source & Dest port ranges */ + "A%02X X%02X " /* TOS and and xor masks */ + "%08X " /* Redirection port */ + "%u " /* fw_mark field */ + "%u " /* output size */ + "%9s\n", /* Target */ + chainlabel, + ntohl(rule->ipfw.fw_src.s_addr), + ntohl(rule->ipfw.fw_smsk.s_addr), + ntohl(rule->ipfw.fw_dst.s_addr), + ntohl(rule->ipfw.fw_dmsk.s_addr), + (rule->ipfw.fw_vianame)[0] ? rule->ipfw.fw_vianame : "-", + rule->ipfw.fw_flg, + rule->ipfw.fw_invflg, + rule->ipfw.fw_proto, + (__u32)(packets >> 32), (__u32)packets, + (__u32)(bytes >> 32), (__u32)bytes, + rule->ipfw.fw_spts[0], rule->ipfw.fw_spts[1], + rule->ipfw.fw_dpts[0], rule->ipfw.fw_dpts[1], + rule->ipfw.fw_tosand, rule->ipfw.fw_tosxor, + rule->ipfw.fw_redirpt, + rule->ipfw.fw_mark, + rule->ipfw.fw_outputsize, + branchname(rule->branch,rule->simplebranch)); + + duprintf("dump_rule: %i bytes done.\n", len); + return len; +} + +/* File offset is actually in records, not bytes. */ +static int ip_chain_procinfo(char *buffer, char **start, + off_t offset, int length, int reset) +{ + struct ip_chain *i; + struct ip_fwkernel *j = ip_fw_chains->chain; + unsigned long flags; + int len = 0; + int last_len = 0; + off_t upto = 0; + + duprintf("Offset starts at %lu\n", offset); + duprintf("ip_fw_chains is 0x%0lX\n", (unsigned long int)ip_fw_chains); + + /* Need a write lock to lock out ``readers'' which update counters. */ + FWC_WRITE_LOCK_IRQ(&ip_fw_lock, flags); + + for (i = ip_fw_chains; i; i = i->next) { + for (j = i->chain; j; j = j->next) { + if (upto == offset) break; + duprintf("Skipping rule in chain `%s'\n", + i->label); + upto++; + } + if (upto == offset) break; + } + + /* Don't init j first time, or once i = NULL */ + for (; i; (void)((i = i->next) && (j = i->chain))) { + duprintf("Dumping chain `%s'\n", i->label); + for (; j; j = j->next, upto++, last_len = len) + { + len += dump_rule(buffer+len, i->label, j); + if (len > length) { + duprintf("Dumped to %i (past %i). " + "Moving back to %i.\n", + len, length, last_len); + len = last_len; + goto outside; + } + else if (reset) + memset(j->counters, 0, + sizeof(struct ip_counters)*NUM_SLOTS); + } + } +outside: + FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags); + buffer[len] = '\0'; + + duprintf("ip_chain_procinfo: Length = %i (of %i). Offset = %li.\n", + len, length, upto); + /* `start' hack - see fs/proc/generic.c line ~165 */ + *start=(char *)((unsigned int)upto-offset); + return len; +} + +static int ip_chain_name_procinfo(char *buffer, char **start, + off_t offset, int length, int reset) +{ + struct ip_chain *i; + int len = 0,last_len = 0; + off_t pos = 0,begin = 0; + unsigned long flags; + + /* Need a write lock to lock out ``readers'' which update counters. */ + FWC_WRITE_LOCK_IRQ(&ip_fw_lock, flags); + + for (i = ip_fw_chains; i; i = i->next) + { + unsigned int j; + __u32 packetsHi = 0, packetsLo = 0, bytesHi = 0, bytesLo = 0; + + for (j = 0; j < NUM_SLOTS; j++) { + packetsLo += i->reent[j].counters.pcnt & 0xFFFFFFFF; + packetsHi += ((i->reent[j].counters.pcnt >> 32) + & 0xFFFFFFFF); + bytesLo += i->reent[j].counters.bcnt & 0xFFFFFFFF; + bytesHi += ((i->reent[j].counters.bcnt >> 32) + & 0xFFFFFFFF); + } + + /* print the label and the policy */ + len+=sprintf(buffer+len,"%s %s %i %u %u %u %u\n", + i->label,branchname(NULL, i->policy),i->refcount, + packetsHi, packetsLo, bytesHi, bytesLo); + pos=begin+len; + if(posoffset+length) { + len = last_len; + break; + } + + last_len = len; + } + FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags); + + *start = buffer+(offset-begin); + len-=(offset-begin); + if(len>length) + len=length; + return len; +} + +/* + * Interface to the generic firewall chains. + */ +int ipfw_input_check(struct firewall_ops *this, int pf, struct device *dev, + void *phdr, void *arg, struct sk_buff **pskb) +{ + return ip_fw_check(phdr, dev->name, + arg, IP_FW_INPUT_CHAIN, *pskb, SLOT_NUMBER(), 0); +} + +int ipfw_output_check(struct firewall_ops *this, int pf, struct device *dev, + void *phdr, void *arg, struct sk_buff **pskb) +{ + /* Locally generated bogus packets by root. . */ + if (((struct iphdr *)phdr)->ihl * 4 < sizeof(struct iphdr) + || (*pskb)->len < sizeof(struct iphdr)) + return FW_ACCEPT; + return ip_fw_check(phdr, dev->name, + arg, IP_FW_OUTPUT_CHAIN, *pskb, SLOT_NUMBER(), 0); +} + +int ipfw_forward_check(struct firewall_ops *this, int pf, struct device *dev, + void *phdr, void *arg, struct sk_buff **pskb) +{ + return ip_fw_check(phdr, dev->name, + arg, IP_FW_FORWARD_CHAIN, *pskb, SLOT_NUMBER(), 0); +} + +struct firewall_ops ipfw_ops= +{ + NULL, + ipfw_forward_check, + ipfw_input_check, + ipfw_output_check, + PF_INET, + 0 /* We don't even allow a fall through so we are last */ +}; + +#ifdef CONFIG_PROC_FS +static struct proc_dir_entry proc_net_ipfwchains_chain = { + PROC_NET_IPFW_CHAINS, sizeof(IP_FW_PROC_CHAINS)-1, + IP_FW_PROC_CHAINS, S_IFREG | S_IRUSR | S_IWUSR, 1, 0, 0, + 0, &proc_net_inode_operations, ip_chain_procinfo +}; + +static struct proc_dir_entry proc_net_ipfwchains_chainnames = { + PROC_NET_IPFW_CHAIN_NAMES, sizeof(IP_FW_PROC_CHAIN_NAMES)-1, + IP_FW_PROC_CHAIN_NAMES, S_IFREG | S_IRUSR | S_IWUSR, 1, 0, 0, + 0, &proc_net_inode_operations, ip_chain_name_procinfo +}; + +#endif + +__initfunc(void ip_fw_init(void)) +{ +#ifdef DEBUG_IP_FIRWALL_LOCKING + fwc_wlocks = fwc_rlocks = 0; +#endif + + IP_FW_INPUT_CHAIN = ip_init_chain(IP_FW_LABEL_INPUT, 1, FW_ACCEPT); + IP_FW_FORWARD_CHAIN = ip_init_chain(IP_FW_LABEL_FORWARD, 1, FW_ACCEPT); + IP_FW_OUTPUT_CHAIN = ip_init_chain(IP_FW_LABEL_OUTPUT, 1, FW_ACCEPT); + + if(register_firewall(PF_INET,&ipfw_ops)<0) + panic("Unable to register IP firewall.\n"); + +#ifdef CONFIG_PROC_FS + proc_net_register(&proc_net_ipfwchains_chain); + proc_net_register(&proc_net_ipfwchains_chainnames); +#endif + +#ifdef CONFIG_IP_FIREWALL_NETLINK + ipfwsk = netlink_kernel_create(NETLINK_FIREWALL, NULL); + if (ipfwsk == NULL) + panic("ip_fw_init: cannot initialize netlink\n"); +#endif +#if defined(DEBUG_IP_FIREWALL) || defined(DEBUG_IP_FIREWALL_USER) + printk("Firewall graphs enabled! Untested kernel coming thru. \n"); +#endif +} -- cgit v1.2.3