diff options
Diffstat (limited to 'net')
28 files changed, 865 insertions, 317 deletions
diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c index 17fd5f2cb4b8..98de6e7fd86d 100644 --- a/net/bridge/netfilter/ebt_ip6.c +++ b/net/bridge/netfilter/ebt_ip6.c @@ -65,8 +65,8 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par) return false; if (FWINV(info->protocol != nexthdr, EBT_IP6_PROTO)) return false; - if (!(info->bitmask & ( EBT_IP6_DPORT | - EBT_IP6_SPORT | EBT_IP6_ICMP6))) + if (!(info->bitmask & (EBT_IP6_DPORT | + EBT_IP6_SPORT | EBT_IP6_ICMP6))) return true; /* min icmpv6 headersize is 4, so sizeof(_pkthdr) is ok. */ diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index 0ad639a96142..152300d164ac 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -36,14 +36,12 @@ static int ebt_log_tg_check(const struct xt_tgchk_param *par) return 0; } -struct tcpudphdr -{ +struct tcpudphdr { __be16 src; __be16 dst; }; -struct arppayload -{ +struct arppayload { unsigned char mac_src[ETH_ALEN]; unsigned char ip_src[4]; unsigned char mac_dst[ETH_ALEN]; @@ -152,7 +150,8 @@ ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum, ntohs(ah->ar_op)); /* If it's for Ethernet and the lengths are OK, - * then log the ARP payload */ + * then log the ARP payload + */ if (ah->ar_hrd == htons(1) && ah->ar_hln == ETH_ALEN && ah->ar_pln == sizeof(__be32)) { diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c index 0c40570069ba..6b731e12ecfa 100644 --- a/net/bridge/netfilter/ebt_stp.c +++ b/net/bridge/netfilter/ebt_stp.c @@ -41,7 +41,7 @@ struct stp_config_pdu { #define NR32(p) ((p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3]) static bool ebt_filter_config(const struct ebt_stp_info *info, - const struct stp_config_pdu *stpc) + const struct stp_config_pdu *stpc) { const struct ebt_stp_config_info *c; uint16_t v16; diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c index 618568888128..98c221dbf059 100644 --- a/net/bridge/netfilter/ebt_vlan.c +++ b/net/bridge/netfilter/ebt_vlan.c @@ -66,7 +66,8 @@ ebt_vlan_mt(const struct sk_buff *skb, struct xt_action_param *par) * - Canonical Format Indicator (CFI). The Canonical Format Indicator * (CFI) is a single bit flag value. Currently ignored. * - VLAN Identifier (VID). The VID is encoded as - * an unsigned binary number. */ + * an unsigned binary number. + */ id = TCI & VLAN_VID_MASK; prio = (TCI >> 13) & 0x7; @@ -98,7 +99,8 @@ static int ebt_vlan_mt_check(const struct xt_mtchk_param *par) } /* Check for bitmask range - * True if even one bit is out of mask */ + * True if even one bit is out of mask + */ if (info->bitmask & ~EBT_VLAN_MASK) { pr_debug("bitmask %2X is out of mask (%2X)\n", info->bitmask, EBT_VLAN_MASK); @@ -117,7 +119,8 @@ static int ebt_vlan_mt_check(const struct xt_mtchk_param *par) * 0 - The null VLAN ID. * 1 - The default Port VID (PVID) * 0x0FFF - Reserved for implementation use. - * if_vlan.h: VLAN_N_VID 4096. */ + * if_vlan.h: VLAN_N_VID 4096. + */ if (GET_BITMASK(EBT_VLAN_ID)) { if (!!info->id) { /* if id!=0 => check vid range */ if (info->id > VLAN_N_VID) { @@ -128,7 +131,8 @@ static int ebt_vlan_mt_check(const struct xt_mtchk_param *par) /* Note: This is valid VLAN-tagged frame point. * Any value of user_priority are acceptable, * but should be ignored according to 802.1Q Std. - * So we just drop the prio flag. */ + * So we just drop the prio flag. + */ info->bitmask &= ~EBT_VLAN_PRIO; } /* Else, id=0 (null VLAN ID) => user_priority range (any?) */ @@ -143,7 +147,8 @@ static int ebt_vlan_mt_check(const struct xt_mtchk_param *par) } /* Check for encapsulated proto range - it is possible to be * any value for u_short range. - * if_ether.h: ETH_ZLEN 60 - Min. octets in frame sans FCS */ + * if_ether.h: ETH_ZLEN 60 - Min. octets in frame sans FCS + */ if (GET_BITMASK(EBT_VLAN_ENCAP)) { if ((unsigned short) ntohs(info->encap) < ETH_ZLEN) { pr_debug("encap frame length %d is less than " diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index 32eccd101f26..593a1bdc079e 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -12,7 +12,7 @@ #include <linux/module.h> #define FILTER_VALID_HOOKS ((1 << NF_BR_LOCAL_IN) | (1 << NF_BR_FORWARD) | \ - (1 << NF_BR_LOCAL_OUT)) + (1 << NF_BR_LOCAL_OUT)) static struct ebt_entries initial_chains[] = { { diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index ec55358f00c8..eb33919821ee 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -12,7 +12,7 @@ #include <linux/module.h> #define NAT_VALID_HOOKS ((1 << NF_BR_PRE_ROUTING) | (1 << NF_BR_LOCAL_OUT) | \ - (1 << NF_BR_POST_ROUTING)) + (1 << NF_BR_POST_ROUTING)) static struct ebt_entries initial_chains[] = { { diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index f46ca417bf2d..67b2e27999aa 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -35,8 +35,7 @@ "report to author: "format, ## args) /* #define BUGPRINT(format, args...) */ -/* - * Each cpu has its own set of counters, so there is no need for write_lock in +/* Each cpu has its own set of counters, so there is no need for write_lock in * the softirq * For reading or updating the counters, the user context needs to * get a write_lock @@ -46,7 +45,7 @@ #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1)) #define COUNTER_OFFSET(n) (SMP_ALIGN(n * sizeof(struct ebt_counter))) #define COUNTER_BASE(c, n, cpu) ((struct ebt_counter *)(((char *)c) + \ - COUNTER_OFFSET(n) * cpu)) + COUNTER_OFFSET(n) * cpu)) @@ -126,7 +125,7 @@ ebt_dev_check(const char *entry, const struct net_device *device) /* process standard matches */ static inline int ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb, - const struct net_device *in, const struct net_device *out) + const struct net_device *in, const struct net_device *out) { const struct ethhdr *h = eth_hdr(skb); const struct net_bridge_port *p; @@ -162,7 +161,7 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb, for (i = 0; i < 6; i++) verdict |= (h->h_source[i] ^ e->sourcemac[i]) & e->sourcemsk[i]; - if (FWINV2(verdict != 0, EBT_ISOURCE) ) + if (FWINV2(verdict != 0, EBT_ISOURCE)) return 1; } if (e->bitmask & EBT_DESTMAC) { @@ -170,7 +169,7 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb, for (i = 0; i < 6; i++) verdict |= (h->h_dest[i] ^ e->destmac[i]) & e->destmsk[i]; - if (FWINV2(verdict != 0, EBT_IDEST) ) + if (FWINV2(verdict != 0, EBT_IDEST)) return 1; } return 0; @@ -237,7 +236,8 @@ unsigned int ebt_do_table(struct sk_buff *skb, (*(counter_base + i)).bcnt += skb->len; /* these should only watch: not modify, nor tell us - what to do with the packet */ + * what to do with the packet + */ EBT_WATCHER_ITERATE(point, ebt_do_watcher, skb, &acpar); t = (struct ebt_entry_target *) @@ -323,7 +323,7 @@ letscontinue: /* If it succeeds, returns element and locks mutex */ static inline void * find_inlist_lock_noload(struct list_head *head, const char *name, int *error, - struct mutex *mutex) + struct mutex *mutex) { struct { struct list_head list; @@ -342,7 +342,7 @@ find_inlist_lock_noload(struct list_head *head, const char *name, int *error, static void * find_inlist_lock(struct list_head *head, const char *name, const char *prefix, - int *error, struct mutex *mutex) + int *error, struct mutex *mutex) { return try_then_request_module( find_inlist_lock_noload(head, name, error, mutex), @@ -451,7 +451,8 @@ static int ebt_verify_pointers(const struct ebt_replace *repl, if (i != NF_BR_NUMHOOKS || !(e->bitmask & EBT_ENTRY_OR_ENTRIES)) { if (e->bitmask != 0) { /* we make userspace set this right, - so there is no misunderstanding */ + * so there is no misunderstanding + */ BUGPRINT("EBT_ENTRY_OR_ENTRIES shouldn't be set " "in distinguisher\n"); return -EINVAL; @@ -487,15 +488,14 @@ static int ebt_verify_pointers(const struct ebt_replace *repl, return 0; } -/* - * this one is very careful, as it is the first function +/* this one is very careful, as it is the first function * to parse the userspace data */ static inline int ebt_check_entry_size_and_hooks(const struct ebt_entry *e, - const struct ebt_table_info *newinfo, - unsigned int *n, unsigned int *cnt, - unsigned int *totalcnt, unsigned int *udc_cnt) + const struct ebt_table_info *newinfo, + unsigned int *n, unsigned int *cnt, + unsigned int *totalcnt, unsigned int *udc_cnt) { int i; @@ -504,10 +504,12 @@ ebt_check_entry_size_and_hooks(const struct ebt_entry *e, break; } /* beginning of a new chain - if i == NF_BR_NUMHOOKS it must be a user defined chain */ + * if i == NF_BR_NUMHOOKS it must be a user defined chain + */ if (i != NF_BR_NUMHOOKS || !e->bitmask) { /* this checks if the previous chain has as many entries - as it said it has */ + * as it said it has + */ if (*n != *cnt) { BUGPRINT("nentries does not equal the nr of entries " "in the chain\n"); @@ -549,20 +551,18 @@ ebt_check_entry_size_and_hooks(const struct ebt_entry *e, return 0; } -struct ebt_cl_stack -{ +struct ebt_cl_stack { struct ebt_chainstack cs; int from; unsigned int hookmask; }; -/* - * we need these positions to check that the jumps to a different part of the +/* We need these positions to check that the jumps to a different part of the * entries is a jump to the beginning of a new chain. */ static inline int ebt_get_udc_positions(struct ebt_entry *e, struct ebt_table_info *newinfo, - unsigned int *n, struct ebt_cl_stack *udc) + unsigned int *n, struct ebt_cl_stack *udc) { int i; @@ -649,9 +649,9 @@ ebt_cleanup_entry(struct ebt_entry *e, struct net *net, unsigned int *cnt) static inline int ebt_check_entry(struct ebt_entry *e, struct net *net, - const struct ebt_table_info *newinfo, - const char *name, unsigned int *cnt, - struct ebt_cl_stack *cl_s, unsigned int udc_cnt) + const struct ebt_table_info *newinfo, + const char *name, unsigned int *cnt, + struct ebt_cl_stack *cl_s, unsigned int udc_cnt) { struct ebt_entry_target *t; struct xt_target *target; @@ -673,7 +673,7 @@ ebt_check_entry(struct ebt_entry *e, struct net *net, BUGPRINT("Unknown flag for inv bitmask\n"); return -EINVAL; } - if ( (e->bitmask & EBT_NOPROTO) && (e->bitmask & EBT_802_3) ) { + if ((e->bitmask & EBT_NOPROTO) && (e->bitmask & EBT_802_3)) { BUGPRINT("NOPROTO & 802_3 not allowed\n"); return -EINVAL; } @@ -687,7 +687,8 @@ ebt_check_entry(struct ebt_entry *e, struct net *net, break; } /* (1 << NF_BR_NUMHOOKS) tells the check functions the rule is on - a base chain */ + * a base chain + */ if (i < NF_BR_NUMHOOKS) hookmask = (1 << hook) | (1 << NF_BR_NUMHOOKS); else { @@ -758,13 +759,12 @@ cleanup_matches: return ret; } -/* - * checks for loops and sets the hook mask for udc +/* checks for loops and sets the hook mask for udc * the hook mask for udc tells us from which base chains the udc can be * accessed. This mask is a parameter to the check() functions of the extensions */ static int check_chainloops(const struct ebt_entries *chain, struct ebt_cl_stack *cl_s, - unsigned int udc_cnt, unsigned int hooknr, char *base) + unsigned int udc_cnt, unsigned int hooknr, char *base) { int i, chain_nr = -1, pos = 0, nentries = chain->nentries, verdict; const struct ebt_entry *e = (struct ebt_entry *)chain->data; @@ -853,7 +853,8 @@ static int translate_table(struct net *net, const char *name, return -EINVAL; } /* make sure chains are ordered after each other in same order - as their corresponding hooks */ + * as their corresponding hooks + */ for (j = i + 1; j < NF_BR_NUMHOOKS; j++) { if (!newinfo->hook_entry[j]) continue; @@ -868,7 +869,8 @@ static int translate_table(struct net *net, const char *name, i = 0; /* holds the expected nr. of entries for the chain */ j = 0; /* holds the up to now counted entries for the chain */ k = 0; /* holds the total nr. of entries, should equal - newinfo->nentries afterwards */ + * newinfo->nentries afterwards + */ udc_cnt = 0; /* will hold the nr. of user defined chains (udc) */ ret = EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size, ebt_check_entry_size_and_hooks, newinfo, @@ -888,10 +890,12 @@ static int translate_table(struct net *net, const char *name, } /* get the location of the udc, put them in an array - while we're at it, allocate the chainstack */ + * while we're at it, allocate the chainstack + */ if (udc_cnt) { /* this will get free'd in do_replace()/ebt_register_table() - if an error occurs */ + * if an error occurs + */ newinfo->chainstack = vmalloc(nr_cpu_ids * sizeof(*(newinfo->chainstack))); if (!newinfo->chainstack) @@ -932,14 +936,15 @@ static int translate_table(struct net *net, const char *name, } /* we now know the following (along with E=mc²): - - the nr of entries in each chain is right - - the size of the allocated space is right - - all valid hooks have a corresponding chain - - there are no loops - - wrong data can still be on the level of a single entry - - could be there are jumps to places that are not the - beginning of a chain. This can only occur in chains that - are not accessible from any base chains, so we don't care. */ + * - the nr of entries in each chain is right + * - the size of the allocated space is right + * - all valid hooks have a corresponding chain + * - there are no loops + * - wrong data can still be on the level of a single entry + * - could be there are jumps to places that are not the + * beginning of a chain. This can only occur in chains that + * are not accessible from any base chains, so we don't care. + */ /* used to know what we need to clean up if something goes wrong */ i = 0; @@ -955,7 +960,7 @@ static int translate_table(struct net *net, const char *name, /* called under write_lock */ static void get_counters(const struct ebt_counter *oldcounters, - struct ebt_counter *counters, unsigned int nentries) + struct ebt_counter *counters, unsigned int nentries) { int i, cpu; struct ebt_counter *counter_base; @@ -986,7 +991,8 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl, struct ebt_table *t; /* the user wants counters back - the check on the size is done later, when we have the lock */ + * the check on the size is done later, when we have the lock + */ if (repl->num_counters) { unsigned long size = repl->num_counters * sizeof(*counterstmp); counterstmp = vmalloc(size); @@ -1038,9 +1044,10 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl, write_unlock_bh(&t->lock); mutex_unlock(&ebt_mutex); /* so, a user can change the chains while having messed up her counter - allocation. Only reason why this is done is because this way the lock - is held only once, while this doesn't bring the kernel into a - dangerous state. */ + * allocation. Only reason why this is done is because this way the lock + * is held only once, while this doesn't bring the kernel into a + * dangerous state. + */ if (repl->num_counters && copy_to_user(repl->counters, counterstmp, repl->num_counters * sizeof(struct ebt_counter))) { @@ -1342,13 +1349,14 @@ static int update_counters(struct net *net, const void __user *user, } static inline int ebt_make_matchname(const struct ebt_entry_match *m, - const char *base, char __user *ubase) + const char *base, char __user *ubase) { char __user *hlp = ubase + ((char *)m - base); char name[EBT_FUNCTION_MAXNAMELEN] = {}; /* ebtables expects 32 bytes long names but xt_match names are 29 bytes - long. Copy 29 bytes and fill remaining bytes with zeroes. */ + * long. Copy 29 bytes and fill remaining bytes with zeroes. + */ strlcpy(name, m->u.match->name, sizeof(name)); if (copy_to_user(hlp, name, EBT_FUNCTION_MAXNAMELEN)) return -EFAULT; @@ -1356,19 +1364,19 @@ static inline int ebt_make_matchname(const struct ebt_entry_match *m, } static inline int ebt_make_watchername(const struct ebt_entry_watcher *w, - const char *base, char __user *ubase) + const char *base, char __user *ubase) { char __user *hlp = ubase + ((char *)w - base); char name[EBT_FUNCTION_MAXNAMELEN] = {}; strlcpy(name, w->u.watcher->name, sizeof(name)); - if (copy_to_user(hlp , name, EBT_FUNCTION_MAXNAMELEN)) + if (copy_to_user(hlp, name, EBT_FUNCTION_MAXNAMELEN)) return -EFAULT; return 0; } -static inline int -ebt_make_names(struct ebt_entry *e, const char *base, char __user *ubase) +static inline int ebt_make_names(struct ebt_entry *e, const char *base, + char __user *ubase) { int ret; char __user *hlp; @@ -1394,9 +1402,9 @@ ebt_make_names(struct ebt_entry *e, const char *base, char __user *ubase) } static int copy_counters_to_user(struct ebt_table *t, - const struct ebt_counter *oldcounters, - void __user *user, unsigned int num_counters, - unsigned int nentries) + const struct ebt_counter *oldcounters, + void __user *user, unsigned int num_counters, + unsigned int nentries) { struct ebt_counter *counterstmp; int ret = 0; @@ -1427,7 +1435,7 @@ static int copy_counters_to_user(struct ebt_table *t, /* called with ebt_mutex locked */ static int copy_everything_to_user(struct ebt_table *t, void __user *user, - const int *len, int cmd) + const int *len, int cmd) { struct ebt_replace tmp; const struct ebt_counter *oldcounters; @@ -1595,8 +1603,7 @@ static int ebt_compat_entry_padsize(void) static int ebt_compat_match_offset(const struct xt_match *match, unsigned int userlen) { - /* - * ebt_among needs special handling. The kernel .matchsize is + /* ebt_among needs special handling. The kernel .matchsize is * set to -1 at registration time; at runtime an EBT_ALIGN()ed * value is expected. * Example: userspace sends 4500, ebt_among.c wants 4504. @@ -1966,8 +1973,7 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt, return off + match_size; } -/* - * return size of all matches, watchers or target, including necessary +/* return size of all matches, watchers or target, including necessary * alignment and padding. */ static int ebt_size_mwt(struct compat_ebt_entry_mwt *match32, @@ -2070,8 +2076,7 @@ static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base, if (ret < 0) return ret; buf_start = (char *) entry; - /* - * 0: matches offset, always follows ebt_entry. + /* 0: matches offset, always follows ebt_entry. * 1: watchers offset, from ebt_entry structure * 2: target offset, from ebt_entry structure * 3: next ebt_entry offset, from ebt_entry structure @@ -2115,8 +2120,7 @@ static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base, return 0; } -/* - * repl->entries_size is the size of the ebt_entry blob in userspace. +/* repl->entries_size is the size of the ebt_entry blob in userspace. * It might need more memory when copied to a 64 bit kernel in case * userspace is 32-bit. So, first task: find out how much memory is needed. * @@ -2305,7 +2309,7 @@ static int compat_do_ebt_set_ctl(struct sock *sk, break; default: ret = -EINVAL; - } + } return ret; } @@ -2360,8 +2364,7 @@ static int compat_do_ebt_get_ctl(struct sock *sk, int cmd, break; case EBT_SO_GET_ENTRIES: case EBT_SO_GET_INIT_ENTRIES: - /* - * try real handler first in case of userland-side padding. + /* try real handler first in case of userland-side padding. * in case we are dealing with an 'ordinary' 32 bit binary * without 64bit compatibility padding, this will fail right * after copy_from_user when the *len argument is validated. diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c index a21269b83f16..4b901d9f2e7c 100644 --- a/net/bridge/netfilter/nft_meta_bridge.c +++ b/net/bridge/netfilter/nft_meta_bridge.c @@ -84,6 +84,7 @@ static const struct nft_expr_ops nft_meta_bridge_set_ops = { .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), .eval = nft_meta_set_eval, .init = nft_meta_set_init, + .destroy = nft_meta_set_destroy, .dump = nft_meta_set_dump, }; diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index c747b2d9eb77..b6ea57ec5e14 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -14,7 +14,6 @@ #include <net/netfilter/ipv4/nf_reject.h> #include <linux/netfilter_ipv4.h> #include <linux/netfilter_bridge.h> -#include <net/netfilter/ipv4/nf_reject.h> const struct tcphdr *nf_reject_ip_tcphdr_get(struct sk_buff *oldskb, struct tcphdr *_oth, int hook) diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index bab4441ed4e4..e4347aeb2e65 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -56,7 +56,6 @@ struct nf_ct_frag6_skb_cb { struct inet6_skb_parm h; int offset; - struct sk_buff *orig; }; #define NFCT_FRAG6_CB(skb) ((struct nf_ct_frag6_skb_cb *)((skb)->cb)) @@ -170,12 +169,6 @@ static unsigned int nf_hashfn(const struct inet_frag_queue *q) return nf_hash_frag(nq->id, &nq->saddr, &nq->daddr); } -static void nf_skb_free(struct sk_buff *skb) -{ - if (NFCT_FRAG6_CB(skb)->orig) - kfree_skb(NFCT_FRAG6_CB(skb)->orig); -} - static void nf_ct_frag6_expire(unsigned long data) { struct frag_queue *fq; @@ -369,17 +362,18 @@ err: /* * Check if this packet is complete. - * Returns NULL on failure by any reason, and pointer - * to current nexthdr field in reassembled frame. * * It is called with locked fq, and caller must check that * queue is eligible for reassembly i.e. it is not COMPLETE, * the last and the first frames arrived and all the bits are here. + * + * returns true if *prev skb has been transformed into the reassembled + * skb, false otherwise. */ -static struct sk_buff * -nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) +static bool +nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_device *dev) { - struct sk_buff *fp, *op, *head = fq->q.fragments; + struct sk_buff *fp, *head = fq->q.fragments; int payload_len; u8 ecn; @@ -390,22 +384,21 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) ecn = ip_frag_ecn_table[fq->ecn]; if (unlikely(ecn == 0xff)) - goto out_fail; + return false; /* Unfragmented part is taken from the first segment. */ payload_len = ((head->data - skb_network_header(head)) - sizeof(struct ipv6hdr) + fq->q.len - sizeof(struct frag_hdr)); if (payload_len > IPV6_MAXPLEN) { - pr_debug("payload len is too large.\n"); - goto out_oversize; + net_dbg_ratelimited("nf_ct_frag6_reasm: payload len = %d\n", + payload_len); + return false; } /* Head of list must not be cloned. */ - if (skb_unclone(head, GFP_ATOMIC)) { - pr_debug("skb is cloned but can't expand head"); - goto out_oom; - } + if (skb_unclone(head, GFP_ATOMIC)) + return false; /* If the first fragment is fragmented itself, we split * it to two chunks: the first with data and paged part @@ -416,7 +409,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) clone = alloc_skb(0, GFP_ATOMIC); if (clone == NULL) - goto out_oom; + return false; clone->next = head->next; head->next = clone; @@ -430,10 +423,41 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) clone->csum = 0; clone->ip_summed = head->ip_summed; - NFCT_FRAG6_CB(clone)->orig = NULL; add_frag_mem_limit(fq->q.net, clone->truesize); } + /* morph head into last received skb: prev. + * + * This allows callers of ipv6 conntrack defrag to continue + * to use the last skb(frag) passed into the reasm engine. + * The last skb frag 'silently' turns into the full reassembled skb. + * + * Since prev is also part of q->fragments we have to clone it first. + */ + if (head != prev) { + struct sk_buff *iter; + + fp = skb_clone(prev, GFP_ATOMIC); + if (!fp) + return false; + + fp->next = prev->next; + + iter = head; + while (iter) { + if (iter->next == prev) { + iter->next = fp; + break; + } + iter = iter->next; + } + + skb_morph(prev, head); + prev->next = head->next; + consume_skb(head); + head = prev; + } + /* We have to remove fragment header from datagram and to relocate * header in order to calculate ICV correctly. */ skb_network_header(head)[fq->nhoffset] = skb_transport_header(head)[0]; @@ -474,31 +498,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) fq->q.fragments = NULL; fq->q.fragments_tail = NULL; - /* all original skbs are linked into the NFCT_FRAG6_CB(head).orig */ - fp = skb_shinfo(head)->frag_list; - if (fp && NFCT_FRAG6_CB(fp)->orig == NULL) - /* at above code, head skb is divided into two skbs. */ - fp = fp->next; - - op = NFCT_FRAG6_CB(head)->orig; - for (; fp; fp = fp->next) { - struct sk_buff *orig = NFCT_FRAG6_CB(fp)->orig; - - op->next = orig; - op = orig; - NFCT_FRAG6_CB(fp)->orig = NULL; - } - - return head; - -out_oversize: - net_dbg_ratelimited("nf_ct_frag6_reasm: payload len = %d\n", - payload_len); - goto out_fail; -out_oom: - net_dbg_ratelimited("nf_ct_frag6_reasm: no memory for reassembly\n"); -out_fail: - return NULL; + return true; } /* @@ -564,89 +564,61 @@ find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff) return 0; } -struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) +int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) { - struct sk_buff *clone; struct net_device *dev = skb->dev; + int fhoff, nhoff, ret; struct frag_hdr *fhdr; struct frag_queue *fq; struct ipv6hdr *hdr; - int fhoff, nhoff; u8 prevhdr; - struct sk_buff *ret_skb = NULL; /* Jumbo payload inhibits frag. header */ if (ipv6_hdr(skb)->payload_len == 0) { pr_debug("payload len = 0\n"); - return skb; + return -EINVAL; } if (find_prev_fhdr(skb, &prevhdr, &nhoff, &fhoff) < 0) - return skb; + return -EINVAL; - clone = skb_clone(skb, GFP_ATOMIC); - if (clone == NULL) { - pr_debug("Can't clone skb\n"); - return skb; - } + if (!pskb_may_pull(skb, fhoff + sizeof(*fhdr))) + return -ENOMEM; - NFCT_FRAG6_CB(clone)->orig = skb; - - if (!pskb_may_pull(clone, fhoff + sizeof(*fhdr))) { - pr_debug("message is too short.\n"); - goto ret_orig; - } - - skb_set_transport_header(clone, fhoff); - hdr = ipv6_hdr(clone); - fhdr = (struct frag_hdr *)skb_transport_header(clone); + skb_set_transport_header(skb, fhoff); + hdr = ipv6_hdr(skb); + fhdr = (struct frag_hdr *)skb_transport_header(skb); fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr, skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr)); if (fq == NULL) { pr_debug("Can't find and can't create new queue\n"); - goto ret_orig; + return -ENOMEM; } spin_lock_bh(&fq->q.lock); - if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) { - spin_unlock_bh(&fq->q.lock); - pr_debug("Can't insert skb to queue\n"); - inet_frag_put(&fq->q, &nf_frags); - goto ret_orig; + if (nf_ct_frag6_queue(fq, skb, fhdr, nhoff) < 0) { + ret = -EINVAL; + goto out_unlock; } + /* after queue has assumed skb ownership, only 0 or -EINPROGRESS + * must be returned. + */ + ret = -EINPROGRESS; if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && - fq->q.meat == fq->q.len) { - ret_skb = nf_ct_frag6_reasm(fq, dev); - if (ret_skb == NULL) - pr_debug("Can't reassemble fragmented packets\n"); - } - spin_unlock_bh(&fq->q.lock); + fq->q.meat == fq->q.len && + nf_ct_frag6_reasm(fq, skb, dev)) + ret = 0; +out_unlock: + spin_unlock_bh(&fq->q.lock); inet_frag_put(&fq->q, &nf_frags); - return ret_skb; - -ret_orig: - kfree_skb(clone); - return skb; + return ret; } EXPORT_SYMBOL_GPL(nf_ct_frag6_gather); -void nf_ct_frag6_consume_orig(struct sk_buff *skb) -{ - struct sk_buff *s, *s2; - - for (s = NFCT_FRAG6_CB(skb)->orig; s;) { - s2 = s->next; - s->next = NULL; - consume_skb(s); - s = s2; - } -} -EXPORT_SYMBOL_GPL(nf_ct_frag6_consume_orig); - static int nf_ct_net_init(struct net *net) { int res; @@ -681,7 +653,6 @@ int nf_ct_frag6_init(void) nf_frags.hashfn = nf_hashfn; nf_frags.constructor = ip6_frag_init; nf_frags.destructor = NULL; - nf_frags.skb_free = nf_skb_free; nf_frags.qsize = sizeof(struct frag_queue); nf_frags.match = ip6_frag_match; nf_frags.frag_expire = nf_ct_frag6_expire; diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index 4fdbed5ebfb6..f7aab5ab93a5 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -55,7 +55,7 @@ static unsigned int ipv6_defrag(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - struct sk_buff *reasm; + int err; #if IS_ENABLED(CONFIG_NF_CONNTRACK) /* Previously seen (loopback)? */ @@ -63,23 +63,13 @@ static unsigned int ipv6_defrag(void *priv, return NF_ACCEPT; #endif - reasm = nf_ct_frag6_gather(state->net, skb, - nf_ct6_defrag_user(state->hook, skb)); + err = nf_ct_frag6_gather(state->net, skb, + nf_ct6_defrag_user(state->hook, skb)); /* queued */ - if (reasm == NULL) + if (err == -EINPROGRESS) return NF_STOLEN; - /* error occurred or not fragmented */ - if (reasm == skb) - return NF_ACCEPT; - - nf_ct_frag6_consume_orig(reasm); - - NF_HOOK_THRESH(NFPROTO_IPV6, state->hook, state->net, state->sk, reasm, - state->in, state->out, - state->okfn, NF_IP6_PRI_CONNTRACK_DEFRAG + 1); - - return NF_STOLEN; + return NF_ACCEPT; } static struct nf_hook_ops ipv6_defrag_ops[] = { diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index e0f922b777e3..4709f657b7b6 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -14,7 +14,6 @@ #include <net/netfilter/ipv6/nf_reject.h> #include <linux/netfilter_ipv6.h> #include <linux/netfilter_bridge.h> -#include <net/netfilter/ipv6/nf_reject.h> const struct tcphdr *nf_reject_ip6_tcphdr_get(struct sk_buff *oldskb, struct tcphdr *otcph, diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 7638c36b498c..22934846b5d1 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -67,7 +67,7 @@ obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o # nf_tables -nf_tables-objs += nf_tables_core.o nf_tables_api.o +nf_tables-objs += nf_tables_core.o nf_tables_api.o nf_tables_trace.o nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o nft_dynset.o nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index acf5c7b3f378..278927ab0948 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -596,11 +596,18 @@ static int exp_proc_init(struct net *net) { #ifdef CONFIG_NF_CONNTRACK_PROCFS struct proc_dir_entry *proc; + kuid_t root_uid; + kgid_t root_gid; proc = proc_create("nf_conntrack_expect", 0440, net->proc_net, &exp_file_ops); if (!proc) return -ENOMEM; + + root_uid = make_kuid(net->user_ns, 0); + root_gid = make_kgid(net->user_ns, 0); + if (uid_valid(root_uid) && gid_valid(root_gid)) + proc_set_user(proc, root_uid, root_gid); #endif /* CONFIG_NF_CONNTRACK_PROCFS */ return 0; } diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 1fb3cacc04e1..0f1a45bcacb2 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -392,11 +392,18 @@ static const struct file_operations ct_cpu_seq_fops = { static int nf_conntrack_standalone_init_proc(struct net *net) { struct proc_dir_entry *pde; + kuid_t root_uid; + kgid_t root_gid; pde = proc_create("nf_conntrack", 0440, net->proc_net, &ct_file_ops); if (!pde) goto out_nf_conntrack; + root_uid = make_kuid(net->user_ns, 0); + root_gid = make_kgid(net->user_ns, 0); + if (uid_valid(root_uid) && gid_valid(root_gid)) + proc_set_user(pde, root_uid, root_gid); + pde = proc_create("nf_conntrack", S_IRUGO, net->proc_net_stat, &ct_cpu_seq_fops); if (!pde) diff --git a/net/netfilter/nf_conntrack_timeout.c b/net/netfilter/nf_conntrack_timeout.c index 93da609d9d29..26e742006c48 100644 --- a/net/netfilter/nf_conntrack_timeout.c +++ b/net/netfilter/nf_conntrack_timeout.c @@ -25,7 +25,7 @@ #include <net/netfilter/nf_conntrack_timeout.h> struct ctnl_timeout * -(*nf_ct_timeout_find_get_hook)(const char *name) __read_mostly; +(*nf_ct_timeout_find_get_hook)(struct net *net, const char *name) __read_mostly; EXPORT_SYMBOL_GPL(nf_ct_timeout_find_get_hook); void (*nf_ct_timeout_put_hook)(struct ctnl_timeout *timeout) __read_mostly; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 2cb429d34c03..4a23f77c363a 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4441,22 +4441,22 @@ static void nft_verdict_uninit(const struct nft_data *data) } } -static int nft_verdict_dump(struct sk_buff *skb, const struct nft_data *data) +int nft_verdict_dump(struct sk_buff *skb, int type, const struct nft_verdict *v) { struct nlattr *nest; - nest = nla_nest_start(skb, NFTA_DATA_VERDICT); + nest = nla_nest_start(skb, type); if (!nest) goto nla_put_failure; - if (nla_put_be32(skb, NFTA_VERDICT_CODE, htonl(data->verdict.code))) + if (nla_put_be32(skb, NFTA_VERDICT_CODE, htonl(v->code))) goto nla_put_failure; - switch (data->verdict.code) { + switch (v->code) { case NFT_JUMP: case NFT_GOTO: if (nla_put_string(skb, NFTA_VERDICT_CHAIN, - data->verdict.chain->name)) + v->chain->name)) goto nla_put_failure; } nla_nest_end(skb, nest); @@ -4567,7 +4567,7 @@ int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data, err = nft_value_dump(skb, data, len); break; case NFT_DATA_VERDICT: - err = nft_verdict_dump(skb, data); + err = nft_verdict_dump(skb, NFTA_DATA_VERDICT, &data->verdict); break; default: err = -EINVAL; diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index f3695a497408..e9f8dffcc244 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -16,22 +16,17 @@ #include <linux/skbuff.h> #include <linux/netlink.h> #include <linux/netfilter.h> +#include <linux/static_key.h> #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_log.h> -enum nft_trace { - NFT_TRACE_RULE, - NFT_TRACE_RETURN, - NFT_TRACE_POLICY, -}; - -static const char *const comments[] = { - [NFT_TRACE_RULE] = "rule", - [NFT_TRACE_RETURN] = "return", - [NFT_TRACE_POLICY] = "policy", +static const char *const comments[__NFT_TRACETYPE_MAX] = { + [NFT_TRACETYPE_POLICY] = "policy", + [NFT_TRACETYPE_RETURN] = "return", + [NFT_TRACETYPE_RULE] = "rule", }; static struct nf_loginfo trace_loginfo = { @@ -44,22 +39,36 @@ static struct nf_loginfo trace_loginfo = { }, }; -static void __nft_trace_packet(const struct nft_pktinfo *pkt, - const struct nft_chain *chain, - int rulenum, enum nft_trace type) +static noinline void __nft_trace_packet(struct nft_traceinfo *info, + const struct nft_chain *chain, + int rulenum, enum nft_trace_types type) { + const struct nft_pktinfo *pkt = info->pkt; + + if (!info->trace || !pkt->skb->nf_trace) + return; + + info->chain = chain; + info->type = type; + + nft_trace_notify(info); + nf_log_trace(pkt->net, pkt->pf, pkt->hook, pkt->skb, pkt->in, pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", chain->table->name, chain->name, comments[type], rulenum); } -static inline void nft_trace_packet(const struct nft_pktinfo *pkt, +static inline void nft_trace_packet(struct nft_traceinfo *info, const struct nft_chain *chain, - int rulenum, enum nft_trace type) + const struct nft_rule *rule, + int rulenum, + enum nft_trace_types type) { - if (unlikely(pkt->skb->nf_trace)) - __nft_trace_packet(pkt, chain, rulenum, type); + if (static_branch_unlikely(&nft_trace_enabled)) { + info->rule = rule; + __nft_trace_packet(info, chain, rulenum, type); + } } static void nft_cmp_fast_eval(const struct nft_expr *expr, @@ -121,7 +130,11 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv) struct nft_stats *stats; int rulenum; unsigned int gencursor = nft_genmask_cur(net); + struct nft_traceinfo info; + info.trace = false; + if (static_branch_unlikely(&nft_trace_enabled)) + nft_trace_init(&info, pkt, ®s.verdict, basechain); do_chain: rulenum = 0; rule = list_entry(&chain->rules, struct nft_rule, list); @@ -151,7 +164,8 @@ next_rule: regs.verdict.code = NFT_CONTINUE; continue; case NFT_CONTINUE: - nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); + nft_trace_packet(&info, chain, rule, + rulenum, NFT_TRACETYPE_RULE); continue; } break; @@ -161,7 +175,8 @@ next_rule: case NF_ACCEPT: case NF_DROP: case NF_QUEUE: - nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); + nft_trace_packet(&info, chain, rule, + rulenum, NFT_TRACETYPE_RULE); return regs.verdict.code; } @@ -174,7 +189,8 @@ next_rule: stackptr++; /* fall through */ case NFT_GOTO: - nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); + nft_trace_packet(&info, chain, rule, + rulenum, NFT_TRACETYPE_RULE); chain = regs.verdict.chain; goto do_chain; @@ -182,7 +198,8 @@ next_rule: rulenum++; /* fall through */ case NFT_RETURN: - nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RETURN); + nft_trace_packet(&info, chain, rule, + rulenum, NFT_TRACETYPE_RETURN); break; default: WARN_ON(1); @@ -196,7 +213,8 @@ next_rule: goto next_rule; } - nft_trace_packet(pkt, basechain, -1, NFT_TRACE_POLICY); + nft_trace_packet(&info, basechain, NULL, -1, + NFT_TRACETYPE_POLICY); rcu_read_lock_bh(); stats = this_cpu_ptr(rcu_dereference(nft_base_chain(basechain)->stats)); diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c new file mode 100644 index 000000000000..e9e959f65d91 --- /dev/null +++ b/net/netfilter/nf_tables_trace.c @@ -0,0 +1,275 @@ +/* + * (C) 2015 Red Hat GmbH + * Author: Florian Westphal <fw@strlen.de> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/static_key.h> +#include <linux/hash.h> +#include <linux/jhash.h> +#include <linux/if_vlan.h> +#include <linux/init.h> +#include <linux/skbuff.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nfnetlink.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_core.h> +#include <net/netfilter/nf_tables.h> + +#define NFT_TRACETYPE_LL_HSIZE 20 +#define NFT_TRACETYPE_NETWORK_HSIZE 40 +#define NFT_TRACETYPE_TRANSPORT_HSIZE 20 + +DEFINE_STATIC_KEY_FALSE(nft_trace_enabled); +EXPORT_SYMBOL_GPL(nft_trace_enabled); + +static int trace_fill_id(struct sk_buff *nlskb, struct sk_buff *skb) +{ + __be32 id; + + /* using skb address as ID results in a limited number of + * values (and quick reuse). + * + * So we attempt to use as many skb members that will not + * change while skb is with netfilter. + */ + id = (__be32)jhash_2words(hash32_ptr(skb), skb_get_hash(skb), + skb->skb_iif); + + return nla_put_be32(nlskb, NFTA_TRACE_ID, id); +} + +static int trace_fill_header(struct sk_buff *nlskb, u16 type, + const struct sk_buff *skb, + int off, unsigned int len) +{ + struct nlattr *nla; + + if (len == 0) + return 0; + + nla = nla_reserve(nlskb, type, len); + if (!nla || skb_copy_bits(skb, off, nla_data(nla), len)) + return -1; + + return 0; +} + +static int nf_trace_fill_ll_header(struct sk_buff *nlskb, + const struct sk_buff *skb) +{ + struct vlan_ethhdr veth; + int off; + + BUILD_BUG_ON(sizeof(veth) > NFT_TRACETYPE_LL_HSIZE); + + off = skb_mac_header(skb) - skb->data; + if (off != -ETH_HLEN) + return -1; + + if (skb_copy_bits(skb, off, &veth, ETH_HLEN)) + return -1; + + veth.h_vlan_proto = skb->vlan_proto; + veth.h_vlan_TCI = htons(skb_vlan_tag_get(skb)); + veth.h_vlan_encapsulated_proto = skb->protocol; + + return nla_put(nlskb, NFTA_TRACE_LL_HEADER, sizeof(veth), &veth); +} + +static int nf_trace_fill_dev_info(struct sk_buff *nlskb, + const struct net_device *indev, + const struct net_device *outdev) +{ + if (indev) { + if (nla_put_be32(nlskb, NFTA_TRACE_IIF, + htonl(indev->ifindex))) + return -1; + + if (nla_put_be16(nlskb, NFTA_TRACE_IIFTYPE, + htons(indev->type))) + return -1; + } + + if (outdev) { + if (nla_put_be32(nlskb, NFTA_TRACE_OIF, + htonl(outdev->ifindex))) + return -1; + + if (nla_put_be16(nlskb, NFTA_TRACE_OIFTYPE, + htons(outdev->type))) + return -1; + } + + return 0; +} + +static int nf_trace_fill_pkt_info(struct sk_buff *nlskb, + const struct nft_pktinfo *pkt) +{ + const struct sk_buff *skb = pkt->skb; + unsigned int len = min_t(unsigned int, + pkt->xt.thoff - skb_network_offset(skb), + NFT_TRACETYPE_NETWORK_HSIZE); + int off = skb_network_offset(skb); + + if (trace_fill_header(nlskb, NFTA_TRACE_NETWORK_HEADER, skb, off, len)) + return -1; + + len = min_t(unsigned int, skb->len - pkt->xt.thoff, + NFT_TRACETYPE_TRANSPORT_HSIZE); + + if (trace_fill_header(nlskb, NFTA_TRACE_TRANSPORT_HEADER, skb, + pkt->xt.thoff, len)) + return -1; + + if (!skb_mac_header_was_set(skb)) + return 0; + + if (skb_vlan_tag_get(skb)) + return nf_trace_fill_ll_header(nlskb, skb); + + off = skb_mac_header(skb) - skb->data; + len = min_t(unsigned int, -off, NFT_TRACETYPE_LL_HSIZE); + return trace_fill_header(nlskb, NFTA_TRACE_LL_HEADER, + skb, off, len); +} + +static int nf_trace_fill_rule_info(struct sk_buff *nlskb, + const struct nft_traceinfo *info) +{ + if (!info->rule) + return 0; + + /* a continue verdict with ->type == RETURN means that this is + * an implicit return (end of chain reached). + * + * Since no rule matched, the ->rule pointer is invalid. + */ + if (info->type == NFT_TRACETYPE_RETURN && + info->verdict->code == NFT_CONTINUE) + return 0; + + return nla_put_be64(nlskb, NFTA_TRACE_RULE_HANDLE, + cpu_to_be64(info->rule->handle)); +} + +void nft_trace_notify(struct nft_traceinfo *info) +{ + const struct nft_pktinfo *pkt = info->pkt; + struct nfgenmsg *nfmsg; + struct nlmsghdr *nlh; + struct sk_buff *skb; + unsigned int size; + int event = (NFNL_SUBSYS_NFTABLES << 8) | NFT_MSG_TRACE; + + if (!nfnetlink_has_listeners(pkt->net, NFNLGRP_NFTRACE)) + return; + + size = nlmsg_total_size(sizeof(struct nfgenmsg)) + + nla_total_size(NFT_TABLE_MAXNAMELEN) + + nla_total_size(NFT_CHAIN_MAXNAMELEN) + + nla_total_size(sizeof(__be64)) + /* rule handle */ + nla_total_size(sizeof(__be32)) + /* trace type */ + nla_total_size(0) + /* VERDICT, nested */ + nla_total_size(sizeof(u32)) + /* verdict code */ + nla_total_size(NFT_CHAIN_MAXNAMELEN) + /* jump target */ + nla_total_size(sizeof(u32)) + /* id */ + nla_total_size(NFT_TRACETYPE_LL_HSIZE) + + nla_total_size(NFT_TRACETYPE_NETWORK_HSIZE) + + nla_total_size(NFT_TRACETYPE_TRANSPORT_HSIZE) + + nla_total_size(sizeof(u32)) + /* iif */ + nla_total_size(sizeof(__be16)) + /* iiftype */ + nla_total_size(sizeof(u32)) + /* oif */ + nla_total_size(sizeof(__be16)) + /* oiftype */ + nla_total_size(sizeof(u32)) + /* mark */ + nla_total_size(sizeof(u32)) + /* nfproto */ + nla_total_size(sizeof(u32)); /* policy */ + + skb = nlmsg_new(size, GFP_ATOMIC); + if (!skb) + return; + + nlh = nlmsg_put(skb, 0, 0, event, sizeof(struct nfgenmsg), 0); + if (!nlh) + goto nla_put_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = info->basechain->type->family; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + if (nla_put_be32(skb, NFTA_TRACE_NFPROTO, htonl(pkt->pf))) + goto nla_put_failure; + + if (nla_put_be32(skb, NFTA_TRACE_TYPE, htonl(info->type))) + goto nla_put_failure; + + if (trace_fill_id(skb, pkt->skb)) + goto nla_put_failure; + + if (info->chain) { + if (nla_put_string(skb, NFTA_TRACE_CHAIN, + info->chain->name)) + goto nla_put_failure; + if (nla_put_string(skb, NFTA_TRACE_TABLE, + info->chain->table->name)) + goto nla_put_failure; + } + + if (nf_trace_fill_rule_info(skb, info)) + goto nla_put_failure; + + switch (info->type) { + case NFT_TRACETYPE_UNSPEC: + case __NFT_TRACETYPE_MAX: + break; + case NFT_TRACETYPE_RETURN: + case NFT_TRACETYPE_RULE: + if (nft_verdict_dump(skb, NFTA_TRACE_VERDICT, info->verdict)) + goto nla_put_failure; + break; + case NFT_TRACETYPE_POLICY: + if (nla_put_be32(skb, NFTA_TRACE_POLICY, + info->basechain->policy)) + goto nla_put_failure; + break; + } + + if (pkt->skb->mark && + nla_put_be32(skb, NFTA_TRACE_MARK, htonl(pkt->skb->mark))) + goto nla_put_failure; + + if (!info->packet_dumped) { + if (nf_trace_fill_dev_info(skb, pkt->in, pkt->out)) + goto nla_put_failure; + + if (nf_trace_fill_pkt_info(skb, pkt)) + goto nla_put_failure; + info->packet_dumped = true; + } + + nlmsg_end(skb, nlh); + nfnetlink_send(skb, pkt->net, 0, NFNLGRP_NFTRACE, 0, GFP_ATOMIC); + return; + + nla_put_failure: + WARN_ON_ONCE(1); + kfree_skb(skb); +} + +void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt, + const struct nft_verdict *verdict, + const struct nft_chain *chain) +{ + info->basechain = nft_base_chain(chain); + info->trace = true; + info->packet_dumped = false; + info->pkt = pkt; + info->verdict = verdict; +} diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 77afe913d03d..9ed453465167 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -33,6 +33,10 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER); +#define nfnl_dereference_protected(id) \ + rcu_dereference_protected(table[(id)].subsys, \ + lockdep_nfnl_is_held((id))) + static char __initdata nfversion[] = "0.30"; static struct { @@ -49,6 +53,7 @@ static const int nfnl_group2type[NFNLGRP_MAX+1] = { [NFNLGRP_CONNTRACK_EXP_DESTROY] = NFNL_SUBSYS_CTNETLINK_EXP, [NFNLGRP_NFTABLES] = NFNL_SUBSYS_NFTABLES, [NFNLGRP_ACCT_QUOTA] = NFNL_SUBSYS_ACCT, + [NFNLGRP_NFTRACE] = NFNL_SUBSYS_NFTABLES, }; void nfnl_lock(__u8 subsys_id) @@ -207,8 +212,7 @@ replay: } else { rcu_read_unlock(); nfnl_lock(subsys_id); - if (rcu_dereference_protected(table[subsys_id].subsys, - lockdep_is_held(&table[subsys_id].mutex)) != ss || + if (nfnl_dereference_protected(subsys_id) != ss || nfnetlink_find_client(type, ss) != nc) err = -EAGAIN; else if (nc->call) @@ -296,15 +300,13 @@ replay: return netlink_ack(oskb, nlh, -ENOMEM); nfnl_lock(subsys_id); - ss = rcu_dereference_protected(table[subsys_id].subsys, - lockdep_is_held(&table[subsys_id].mutex)); + ss = nfnl_dereference_protected(subsys_id); if (!ss) { #ifdef CONFIG_MODULES nfnl_unlock(subsys_id); request_module("nfnetlink-subsys-%d", subsys_id); nfnl_lock(subsys_id); - ss = rcu_dereference_protected(table[subsys_id].subsys, - lockdep_is_held(&table[subsys_id].mutex)); + ss = nfnl_dereference_protected(subsys_id); if (!ss) #endif { diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index c7a2d0e1c462..3921d544f5ba 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -38,8 +38,6 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); MODULE_DESCRIPTION("cttimeout: Extended Netfilter Connection Tracking timeout tuning"); -static LIST_HEAD(cttimeout_list); - static const struct nla_policy cttimeout_nla_policy[CTA_TIMEOUT_MAX+1] = { [CTA_TIMEOUT_NAME] = { .type = NLA_NUL_STRING, .len = CTNL_TIMEOUT_NAME_MAX - 1}, @@ -90,7 +88,7 @@ cttimeout_new_timeout(struct sock *ctnl, struct sk_buff *skb, l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO])); l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]); - list_for_each_entry(timeout, &cttimeout_list, head) { + list_for_each_entry(timeout, &net->nfct_timeout_list, head) { if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) continue; @@ -145,7 +143,7 @@ cttimeout_new_timeout(struct sock *ctnl, struct sk_buff *skb, timeout->l3num = l3num; timeout->l4proto = l4proto; atomic_set(&timeout->refcnt, 1); - list_add_tail_rcu(&timeout->head, &cttimeout_list); + list_add_tail_rcu(&timeout->head, &net->nfct_timeout_list); return 0; err: @@ -209,6 +207,7 @@ nla_put_failure: static int ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = sock_net(skb->sk); struct ctnl_timeout *cur, *last; if (cb->args[2]) @@ -219,7 +218,7 @@ ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb) cb->args[1] = 0; rcu_read_lock(); - list_for_each_entry_rcu(cur, &cttimeout_list, head) { + list_for_each_entry_rcu(cur, &net->nfct_timeout_list, head) { if (last) { if (cur != last) continue; @@ -245,6 +244,7 @@ cttimeout_get_timeout(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const cda[]) { + struct net *net = sock_net(skb->sk); int ret = -ENOENT; char *name; struct ctnl_timeout *cur; @@ -260,7 +260,7 @@ cttimeout_get_timeout(struct sock *ctnl, struct sk_buff *skb, return -EINVAL; name = nla_data(cda[CTA_TIMEOUT_NAME]); - list_for_each_entry(cur, &cttimeout_list, head) { + list_for_each_entry(cur, &net->nfct_timeout_list, head) { struct sk_buff *skb2; if (strncmp(cur->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) @@ -301,17 +301,17 @@ static void untimeout(struct nf_conntrack_tuple_hash *i, RCU_INIT_POINTER(timeout_ext->timeout, NULL); } -static void ctnl_untimeout(struct ctnl_timeout *timeout) +static void ctnl_untimeout(struct net *net, struct ctnl_timeout *timeout) { struct nf_conntrack_tuple_hash *h; const struct hlist_nulls_node *nn; int i; local_bh_disable(); - for (i = 0; i < init_net.ct.htable_size; i++) { + for (i = 0; i < net->ct.htable_size; i++) { spin_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); - if (i < init_net.ct.htable_size) { - hlist_nulls_for_each_entry(h, nn, &init_net.ct.hash[i], hnnode) + if (i < net->ct.htable_size) { + hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode) untimeout(h, timeout); } spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); @@ -320,7 +320,7 @@ static void ctnl_untimeout(struct ctnl_timeout *timeout) } /* try to delete object, fail if it is still in use. */ -static int ctnl_timeout_try_del(struct ctnl_timeout *timeout) +static int ctnl_timeout_try_del(struct net *net, struct ctnl_timeout *timeout) { int ret = 0; @@ -329,7 +329,7 @@ static int ctnl_timeout_try_del(struct ctnl_timeout *timeout) /* We are protected by nfnl mutex. */ list_del_rcu(&timeout->head); nf_ct_l4proto_put(timeout->l4proto); - ctnl_untimeout(timeout); + ctnl_untimeout(net, timeout); kfree_rcu(timeout, rcu_head); } else { /* still in use, restore reference counter. */ @@ -344,23 +344,24 @@ cttimeout_del_timeout(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const cda[]) { + struct net *net = sock_net(skb->sk); char *name; struct ctnl_timeout *cur; int ret = -ENOENT; if (!cda[CTA_TIMEOUT_NAME]) { - list_for_each_entry(cur, &cttimeout_list, head) - ctnl_timeout_try_del(cur); + list_for_each_entry(cur, &net->nfct_timeout_list, head) + ctnl_timeout_try_del(net, cur); return 0; } name = nla_data(cda[CTA_TIMEOUT_NAME]); - list_for_each_entry(cur, &cttimeout_list, head) { + list_for_each_entry(cur, &net->nfct_timeout_list, head) { if (strncmp(cur->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) continue; - ret = ctnl_timeout_try_del(cur); + ret = ctnl_timeout_try_del(net, cur); if (ret < 0) return ret; @@ -511,12 +512,13 @@ err: } #ifdef CONFIG_NF_CONNTRACK_TIMEOUT -static struct ctnl_timeout *ctnl_timeout_find_get(const char *name) +static struct ctnl_timeout * +ctnl_timeout_find_get(struct net *net, const char *name) { struct ctnl_timeout *timeout, *matching = NULL; rcu_read_lock(); - list_for_each_entry_rcu(timeout, &cttimeout_list, head) { + list_for_each_entry_rcu(timeout, &net->nfct_timeout_list, head) { if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) continue; @@ -569,10 +571,39 @@ static const struct nfnetlink_subsystem cttimeout_subsys = { MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK_TIMEOUT); +static int __net_init cttimeout_net_init(struct net *net) +{ + INIT_LIST_HEAD(&net->nfct_timeout_list); + + return 0; +} + +static void __net_exit cttimeout_net_exit(struct net *net) +{ + struct ctnl_timeout *cur, *tmp; + + ctnl_untimeout(net, NULL); + + list_for_each_entry_safe(cur, tmp, &net->nfct_timeout_list, head) { + list_del_rcu(&cur->head); + nf_ct_l4proto_put(cur->l4proto); + kfree_rcu(cur, rcu_head); + } +} + +static struct pernet_operations cttimeout_ops = { + .init = cttimeout_net_init, + .exit = cttimeout_net_exit, +}; + static int __init cttimeout_init(void) { int ret; + ret = register_pernet_subsys(&cttimeout_ops); + if (ret < 0) + return ret; + ret = nfnetlink_subsys_register(&cttimeout_subsys); if (ret < 0) { pr_err("cttimeout_init: cannot register cttimeout with " @@ -586,28 +617,17 @@ static int __init cttimeout_init(void) return 0; err_out: + unregister_pernet_subsys(&cttimeout_ops); return ret; } static void __exit cttimeout_exit(void) { - struct ctnl_timeout *cur, *tmp; - pr_info("cttimeout: unregistering from nfnetlink.\n"); nfnetlink_subsys_unregister(&cttimeout_subsys); - /* Make sure no conntrack objects refer to custom timeouts anymore. */ - ctnl_untimeout(NULL); - - list_for_each_entry_safe(cur, tmp, &cttimeout_list, head) { - list_del_rcu(&cur->head); - /* We are sure that our objects have no clients at this point, - * it's safe to release them all without checking refcnt. - */ - nf_ct_l4proto_put(cur->l4proto); - kfree_rcu(cur, rcu_head); - } + unregister_pernet_subsys(&cttimeout_ops); #ifdef CONFIG_NF_CONNTRACK_TIMEOUT RCU_INIT_POINTER(nf_ct_timeout_find_get_hook, NULL); RCU_INIT_POINTER(nf_ct_timeout_put_hook, NULL); diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 740cce4685ac..70b6bd3b781e 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -293,24 +293,20 @@ nfulnl_set_nlbufsiz(struct nfulnl_instance *inst, u_int32_t nlbufsiz) return status; } -static int +static void nfulnl_set_timeout(struct nfulnl_instance *inst, u_int32_t timeout) { spin_lock_bh(&inst->lock); inst->flushtimeout = timeout; spin_unlock_bh(&inst->lock); - - return 0; } -static int +static void nfulnl_set_qthresh(struct nfulnl_instance *inst, u_int32_t qthresh) { spin_lock_bh(&inst->lock); inst->qthreshold = qthresh; spin_unlock_bh(&inst->lock); - - return 0; } static int @@ -1064,15 +1060,26 @@ static int __net_init nfnl_log_net_init(struct net *net) { unsigned int i; struct nfnl_log_net *log = nfnl_log_pernet(net); +#ifdef CONFIG_PROC_FS + struct proc_dir_entry *proc; + kuid_t root_uid; + kgid_t root_gid; +#endif for (i = 0; i < INSTANCE_BUCKETS; i++) INIT_HLIST_HEAD(&log->instance_table[i]); spin_lock_init(&log->instances_lock); #ifdef CONFIG_PROC_FS - if (!proc_create("nfnetlink_log", 0440, - net->nf.proc_netfilter, &nful_file_ops)) + proc = proc_create("nfnetlink_log", 0440, + net->nf.proc_netfilter, &nful_file_ops); + if (!proc) return -ENOMEM; + + root_uid = make_kuid(net->user_ns, 0); + root_gid = make_kgid(net->user_ns, 0); + if (uid_valid(root_uid) && gid_valid(root_gid)) + proc_set_user(proc, root_uid, root_gid); #endif return 0; } diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 1915cab7f32d..fe885bf271c5 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -18,12 +18,16 @@ #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/smp.h> +#include <linux/static_key.h> #include <net/dst.h> #include <net/sock.h> #include <net/tcp_states.h> /* for TCP_TIME_WAIT */ #include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nft_meta.h> +#include <uapi/linux/netfilter_bridge.h> /* NF_BR_PRE_ROUTING */ + void nft_meta_get_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -188,6 +192,13 @@ err: } EXPORT_SYMBOL_GPL(nft_meta_get_eval); +/* don't change or set _LOOPBACK, _USER, etc. */ +static bool pkt_type_ok(u32 p) +{ + return p == PACKET_HOST || p == PACKET_BROADCAST || + p == PACKET_MULTICAST || p == PACKET_OTHERHOST; +} + void nft_meta_set_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -203,6 +214,11 @@ void nft_meta_set_eval(const struct nft_expr *expr, case NFT_META_PRIORITY: skb->priority = value; break; + case NFT_META_PKTTYPE: + if (skb->pkt_type != value && + pkt_type_ok(value) && pkt_type_ok(skb->pkt_type)) + skb->pkt_type = value; + break; case NFT_META_NFTRACE: skb->nf_trace = 1; break; @@ -271,6 +287,24 @@ int nft_meta_get_init(const struct nft_ctx *ctx, } EXPORT_SYMBOL_GPL(nft_meta_get_init); +static int nft_meta_set_init_pkttype(const struct nft_ctx *ctx) +{ + unsigned int hooks; + + switch (ctx->afi->family) { + case NFPROTO_BRIDGE: + hooks = 1 << NF_BR_PRE_ROUTING; + break; + case NFPROTO_NETDEV: + hooks = 1 << NF_NETDEV_INGRESS; + break; + default: + return -EOPNOTSUPP; + } + + return nft_chain_validate_hooks(ctx->chain, hooks); +} + int nft_meta_set_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) @@ -288,6 +322,12 @@ int nft_meta_set_init(const struct nft_ctx *ctx, case NFT_META_NFTRACE: len = sizeof(u8); break; + case NFT_META_PKTTYPE: + err = nft_meta_set_init_pkttype(ctx); + if (err) + return err; + len = sizeof(u8); + break; default: return -EOPNOTSUPP; } @@ -297,6 +337,9 @@ int nft_meta_set_init(const struct nft_ctx *ctx, if (err < 0) return err; + if (priv->key == NFT_META_NFTRACE) + static_branch_inc(&nft_trace_enabled); + return 0; } EXPORT_SYMBOL_GPL(nft_meta_set_init); @@ -334,6 +377,16 @@ nla_put_failure: } EXPORT_SYMBOL_GPL(nft_meta_set_dump); +void nft_meta_set_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + const struct nft_meta *priv = nft_expr_priv(expr); + + if (priv->key == NFT_META_NFTRACE) + static_branch_dec(&nft_trace_enabled); +} +EXPORT_SYMBOL_GPL(nft_meta_set_destroy); + static struct nft_expr_type nft_meta_type; static const struct nft_expr_ops nft_meta_get_ops = { .type = &nft_meta_type, @@ -348,6 +401,7 @@ static const struct nft_expr_ops nft_meta_set_ops = { .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), .eval = nft_meta_set_eval, .init = nft_meta_set_init, + .destroy = nft_meta_set_destroy, .dump = nft_meta_set_dump, }; diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 09b4b07eb676..12cd4bf16d17 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -107,10 +107,13 @@ err: } static const struct nla_policy nft_payload_policy[NFTA_PAYLOAD_MAX + 1] = { - [NFTA_PAYLOAD_DREG] = { .type = NLA_U32 }, - [NFTA_PAYLOAD_BASE] = { .type = NLA_U32 }, - [NFTA_PAYLOAD_OFFSET] = { .type = NLA_U32 }, - [NFTA_PAYLOAD_LEN] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_SREG] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_DREG] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_BASE] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_OFFSET] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_LEN] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_CSUM_TYPE] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_CSUM_OFFSET] = { .type = NLA_U32 }, }; static int nft_payload_init(const struct nft_ctx *ctx, @@ -160,6 +163,118 @@ const struct nft_expr_ops nft_payload_fast_ops = { .dump = nft_payload_dump, }; +static void nft_payload_set_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_payload_set *priv = nft_expr_priv(expr); + struct sk_buff *skb = pkt->skb; + const u32 *src = ®s->data[priv->sreg]; + int offset, csum_offset; + __wsum fsum, tsum; + __sum16 sum; + + switch (priv->base) { + case NFT_PAYLOAD_LL_HEADER: + if (!skb_mac_header_was_set(skb)) + goto err; + offset = skb_mac_header(skb) - skb->data; + break; + case NFT_PAYLOAD_NETWORK_HEADER: + offset = skb_network_offset(skb); + break; + case NFT_PAYLOAD_TRANSPORT_HEADER: + offset = pkt->xt.thoff; + break; + default: + BUG(); + } + + csum_offset = offset + priv->csum_offset; + offset += priv->offset; + + if (priv->csum_type == NFT_PAYLOAD_CSUM_INET && + (priv->base != NFT_PAYLOAD_TRANSPORT_HEADER || + skb->ip_summed != CHECKSUM_PARTIAL)) { + if (skb_copy_bits(skb, csum_offset, &sum, sizeof(sum)) < 0) + goto err; + + fsum = skb_checksum(skb, offset, priv->len, 0); + tsum = csum_partial(src, priv->len, 0); + sum = csum_fold(csum_add(csum_sub(~csum_unfold(sum), fsum), + tsum)); + if (sum == 0) + sum = CSUM_MANGLED_0; + + if (!skb_make_writable(skb, csum_offset + sizeof(sum)) || + skb_store_bits(skb, csum_offset, &sum, sizeof(sum)) < 0) + goto err; + } + + if (!skb_make_writable(skb, max(offset + priv->len, 0)) || + skb_store_bits(skb, offset, src, priv->len) < 0) + goto err; + + return; +err: + regs->verdict.code = NFT_BREAK; +} + +static int nft_payload_set_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_payload_set *priv = nft_expr_priv(expr); + + priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE])); + priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET])); + priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN])); + priv->sreg = nft_parse_register(tb[NFTA_PAYLOAD_SREG]); + + if (tb[NFTA_PAYLOAD_CSUM_TYPE]) + priv->csum_type = + ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_TYPE])); + if (tb[NFTA_PAYLOAD_CSUM_OFFSET]) + priv->csum_offset = + ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_OFFSET])); + + switch (priv->csum_type) { + case NFT_PAYLOAD_CSUM_NONE: + case NFT_PAYLOAD_CSUM_INET: + break; + default: + return -EOPNOTSUPP; + } + + return nft_validate_register_load(priv->sreg, priv->len); +} + +static int nft_payload_set_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_payload_set *priv = nft_expr_priv(expr); + + if (nft_dump_register(skb, NFTA_PAYLOAD_SREG, priv->sreg) || + nla_put_be32(skb, NFTA_PAYLOAD_BASE, htonl(priv->base)) || + nla_put_be32(skb, NFTA_PAYLOAD_OFFSET, htonl(priv->offset)) || + nla_put_be32(skb, NFTA_PAYLOAD_LEN, htonl(priv->len)) || + nla_put_be32(skb, NFTA_PAYLOAD_CSUM_TYPE, htonl(priv->csum_type)) || + nla_put_be32(skb, NFTA_PAYLOAD_CSUM_OFFSET, + htonl(priv->csum_offset))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static const struct nft_expr_ops nft_payload_set_ops = { + .type = &nft_payload_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_payload_set)), + .eval = nft_payload_set_eval, + .init = nft_payload_set_init, + .dump = nft_payload_set_dump, +}; + static const struct nft_expr_ops * nft_payload_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) @@ -167,8 +282,7 @@ nft_payload_select_ops(const struct nft_ctx *ctx, enum nft_payload_bases base; unsigned int offset, len; - if (tb[NFTA_PAYLOAD_DREG] == NULL || - tb[NFTA_PAYLOAD_BASE] == NULL || + if (tb[NFTA_PAYLOAD_BASE] == NULL || tb[NFTA_PAYLOAD_OFFSET] == NULL || tb[NFTA_PAYLOAD_LEN] == NULL) return ERR_PTR(-EINVAL); @@ -183,6 +297,15 @@ nft_payload_select_ops(const struct nft_ctx *ctx, return ERR_PTR(-EOPNOTSUPP); } + if (tb[NFTA_PAYLOAD_SREG] != NULL) { + if (tb[NFTA_PAYLOAD_DREG] != NULL) + return ERR_PTR(-EINVAL); + return &nft_payload_set_ops; + } + + if (tb[NFTA_PAYLOAD_DREG] == NULL) + return ERR_PTR(-EINVAL); + offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET])); len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN])); diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index d4aaad747ea9..c8a0b7da5ff4 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -26,6 +26,7 @@ #include <linux/mm.h> #include <linux/slab.h> #include <linux/audit.h> +#include <linux/user_namespace.h> #include <net/net_namespace.h> #include <linux/netfilter/x_tables.h> @@ -1226,6 +1227,8 @@ int xt_proto_init(struct net *net, u_int8_t af) #ifdef CONFIG_PROC_FS char buf[XT_FUNCTION_MAXNAMELEN]; struct proc_dir_entry *proc; + kuid_t root_uid; + kgid_t root_gid; #endif if (af >= ARRAY_SIZE(xt_prefix)) @@ -1233,12 +1236,17 @@ int xt_proto_init(struct net *net, u_int8_t af) #ifdef CONFIG_PROC_FS + root_uid = make_kuid(net->user_ns, 0); + root_gid = make_kgid(net->user_ns, 0); + strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TABLES, sizeof(buf)); proc = proc_create_data(buf, 0440, net->proc_net, &xt_table_ops, (void *)(unsigned long)af); if (!proc) goto out; + if (uid_valid(root_uid) && gid_valid(root_gid)) + proc_set_user(proc, root_uid, root_gid); strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_MATCHES, sizeof(buf)); @@ -1246,6 +1254,8 @@ int xt_proto_init(struct net *net, u_int8_t af) (void *)(unsigned long)af); if (!proc) goto out_remove_tables; + if (uid_valid(root_uid) && gid_valid(root_gid)) + proc_set_user(proc, root_uid, root_gid); strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TARGETS, sizeof(buf)); @@ -1253,6 +1263,8 @@ int xt_proto_init(struct net *net, u_int8_t af) (void *)(unsigned long)af); if (!proc) goto out_remove_matches; + if (uid_valid(root_uid) && gid_valid(root_gid)) + proc_set_user(proc, root_uid, root_gid); #endif return 0; diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index e7ac07e53b59..6669e68d589e 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -143,7 +143,7 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par, goto out; } - timeout = timeout_find_get(timeout_name); + timeout = timeout_find_get(par->net, timeout_name); if (timeout == NULL) { ret = -ENOENT; pr_info("No such timeout policy \"%s\"\n", timeout_name); diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c index 54eaeb45ce99..a086a914865f 100644 --- a/net/netfilter/xt_cgroup.c +++ b/net/netfilter/xt_cgroup.c @@ -24,9 +24,9 @@ MODULE_DESCRIPTION("Xtables: process control group matching"); MODULE_ALIAS("ipt_cgroup"); MODULE_ALIAS("ip6t_cgroup"); -static int cgroup_mt_check(const struct xt_mtchk_param *par) +static int cgroup_mt_check_v0(const struct xt_mtchk_param *par) { - struct xt_cgroup_info *info = par->matchinfo; + struct xt_cgroup_info_v0 *info = par->matchinfo; if (info->invert & ~1) return -EINVAL; @@ -34,10 +34,41 @@ static int cgroup_mt_check(const struct xt_mtchk_param *par) return 0; } +static int cgroup_mt_check_v1(const struct xt_mtchk_param *par) +{ + struct xt_cgroup_info_v1 *info = par->matchinfo; + struct cgroup *cgrp; + + if ((info->invert_path & ~1) || (info->invert_classid & ~1)) + return -EINVAL; + + if (!info->has_path && !info->has_classid) { + pr_info("xt_cgroup: no path or classid specified\n"); + return -EINVAL; + } + + if (info->has_path && info->has_classid) { + pr_info("xt_cgroup: both path and classid specified\n"); + return -EINVAL; + } + + if (info->has_path) { + cgrp = cgroup_get_from_path(info->path); + if (IS_ERR(cgrp)) { + pr_info("xt_cgroup: invalid path, errno=%ld\n", + PTR_ERR(cgrp)); + return -EINVAL; + } + info->priv = cgrp; + } + + return 0; +} + static bool -cgroup_mt(const struct sk_buff *skb, struct xt_action_param *par) +cgroup_mt_v0(const struct sk_buff *skb, struct xt_action_param *par) { - const struct xt_cgroup_info *info = par->matchinfo; + const struct xt_cgroup_info_v0 *info = par->matchinfo; if (skb->sk == NULL || !sk_fullsock(skb->sk)) return false; @@ -46,27 +77,67 @@ cgroup_mt(const struct sk_buff *skb, struct xt_action_param *par) info->invert; } -static struct xt_match cgroup_mt_reg __read_mostly = { - .name = "cgroup", - .revision = 0, - .family = NFPROTO_UNSPEC, - .checkentry = cgroup_mt_check, - .match = cgroup_mt, - .matchsize = sizeof(struct xt_cgroup_info), - .me = THIS_MODULE, - .hooks = (1 << NF_INET_LOCAL_OUT) | - (1 << NF_INET_POST_ROUTING) | - (1 << NF_INET_LOCAL_IN), +static bool cgroup_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_cgroup_info_v1 *info = par->matchinfo; + struct sock_cgroup_data *skcd = &skb->sk->sk_cgrp_data; + struct cgroup *ancestor = info->priv; + + if (!skb->sk || !sk_fullsock(skb->sk)) + return false; + + if (ancestor) + return cgroup_is_descendant(sock_cgroup_ptr(skcd), ancestor) ^ + info->invert_path; + else + return (info->classid == sock_cgroup_classid(skcd)) ^ + info->invert_classid; +} + +static void cgroup_mt_destroy_v1(const struct xt_mtdtor_param *par) +{ + struct xt_cgroup_info_v1 *info = par->matchinfo; + + if (info->priv) + cgroup_put(info->priv); +} + +static struct xt_match cgroup_mt_reg[] __read_mostly = { + { + .name = "cgroup", + .revision = 0, + .family = NFPROTO_UNSPEC, + .checkentry = cgroup_mt_check_v0, + .match = cgroup_mt_v0, + .matchsize = sizeof(struct xt_cgroup_info_v0), + .me = THIS_MODULE, + .hooks = (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_IN), + }, + { + .name = "cgroup", + .revision = 1, + .family = NFPROTO_UNSPEC, + .checkentry = cgroup_mt_check_v1, + .match = cgroup_mt_v1, + .matchsize = sizeof(struct xt_cgroup_info_v1), + .destroy = cgroup_mt_destroy_v1, + .me = THIS_MODULE, + .hooks = (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_IN), + }, }; static int __init cgroup_mt_init(void) { - return xt_register_match(&cgroup_mt_reg); + return xt_register_matches(cgroup_mt_reg, ARRAY_SIZE(cgroup_mt_reg)); } static void __exit cgroup_mt_exit(void) { - xt_unregister_match(&cgroup_mt_reg); + xt_unregister_matches(cgroup_mt_reg, ARRAY_SIZE(cgroup_mt_reg)); } module_init(cgroup_mt_init); diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 3e8892216f94..8c106d144f06 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -305,10 +305,10 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key, u16 zone, struct sk_buff *skb) { struct ovs_skb_cb ovs_cb = *OVS_CB(skb); + int err; if (key->eth.type == htons(ETH_P_IP)) { enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone; - int err; memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); err = ip_defrag(net, skb, user); @@ -319,28 +319,13 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key, #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) } else if (key->eth.type == htons(ETH_P_IPV6)) { enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone; - struct sk_buff *reasm; memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); - reasm = nf_ct_frag6_gather(net, skb, user); - if (!reasm) - return -EINPROGRESS; - - if (skb == reasm) { - kfree_skb(skb); - return -EINVAL; - } - - /* Don't free 'skb' even though it is one of the original - * fragments, as we're going to morph it into the head. - */ - skb_get(skb); - nf_ct_frag6_consume_orig(reasm); + err = nf_ct_frag6_gather(net, skb, user); + if (err) + return err; - key->ip.proto = ipv6_hdr(reasm)->nexthdr; - skb_morph(skb, reasm); - skb->next = reasm->next; - consume_skb(reasm); + key->ip.proto = ipv6_hdr(skb)->nexthdr; ovs_cb.mru = IP6CB(skb)->frag_max_size; #endif } else { |