diff --git a/Makefile b/Makefile index b38abe9adef8..6a7492473a0d 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 8 -SUBLEVEL = 13 +SUBLEVEL = 14 EXTRAVERSION = NAME = Psychotic Stoned Sheep diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c index c3c12efe0bc0..9c0c8fd0b292 100644 --- a/arch/sparc/kernel/signal_32.c +++ b/arch/sparc/kernel/signal_32.c @@ -89,7 +89,7 @@ asmlinkage void do_sigreturn(struct pt_regs *regs) sf = (struct signal_frame __user *) regs->u_regs[UREG_FP]; /* 1. Make sure we are not getting garbage from the user */ - if (!invalid_frame_pointer(sf, sizeof(*sf))) + if (invalid_frame_pointer(sf, sizeof(*sf))) goto segv_and_exit; if (get_user(ufp, &sf->info.si_regs.u_regs[UREG_FP])) @@ -150,7 +150,7 @@ asmlinkage void do_rt_sigreturn(struct pt_regs *regs) synchronize_user_stack(); sf = (struct rt_signal_frame __user *) regs->u_regs[UREG_FP]; - if (!invalid_frame_pointer(sf, sizeof(*sf))) + if (invalid_frame_pointer(sf, sizeof(*sf))) goto segv; if (get_user(ufp, &sf->regs.u_regs[UREG_FP])) diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 7ac6b62fb7c1..05c770825386 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -802,8 +802,10 @@ struct mdesc_mblock { }; static struct mdesc_mblock *mblocks; static int num_mblocks; +static int find_numa_node_for_addr(unsigned long pa, + struct node_mem_mask *pnode_mask); -static unsigned long ra_to_pa(unsigned long addr) +static unsigned long __init ra_to_pa(unsigned long addr) { int i; @@ -819,8 +821,11 @@ static unsigned long ra_to_pa(unsigned long addr) return addr; } -static int find_node(unsigned long addr) +static int __init find_node(unsigned long addr) { + static bool search_mdesc = true; + static struct node_mem_mask last_mem_mask = { ~0UL, ~0UL }; + static int last_index; int i; addr = ra_to_pa(addr); @@ -830,13 +835,30 @@ static int find_node(unsigned long addr) if ((addr & p->mask) == p->val) return i; } - /* The following condition has been observed on LDOM guests.*/ - WARN_ONCE(1, "find_node: A physical address doesn't match a NUMA node" - " rule. Some physical memory will be owned by node 0."); - return 0; + /* The following condition has been observed on LDOM guests because + * node_masks only contains the best latency mask and value. + * LDOM guest's mdesc can contain a single latency group to + * cover multiple address range. Print warning message only if the + * address cannot be found in node_masks nor mdesc. + */ + if ((search_mdesc) && + ((addr & last_mem_mask.mask) != last_mem_mask.val)) { + /* find the available node in the mdesc */ + last_index = find_numa_node_for_addr(addr, &last_mem_mask); + numadbg("find_node: latency group for address 0x%lx is %d\n", + addr, last_index); + if ((last_index < 0) || (last_index >= num_node_masks)) { + /* WARN_ONCE() and use default group 0 */ + WARN_ONCE(1, "find_node: A physical address doesn't match a NUMA node rule. Some physical memory will be owned by node 0."); + search_mdesc = false; + last_index = 0; + } + } + + return last_index; } -static u64 memblock_nid_range(u64 start, u64 end, int *nid) +static u64 __init memblock_nid_range(u64 start, u64 end, int *nid) { *nid = find_node(start); start += PAGE_SIZE; @@ -1160,6 +1182,41 @@ int __node_distance(int from, int to) return numa_latency[from][to]; } +static int find_numa_node_for_addr(unsigned long pa, + struct node_mem_mask *pnode_mask) +{ + struct mdesc_handle *md = mdesc_grab(); + u64 node, arc; + int i = 0; + + node = mdesc_node_by_name(md, MDESC_NODE_NULL, "latency-groups"); + if (node == MDESC_NODE_NULL) + goto out; + + mdesc_for_each_node_by_name(md, node, "group") { + mdesc_for_each_arc(arc, md, node, MDESC_ARC_TYPE_FWD) { + u64 target = mdesc_arc_target(md, arc); + struct mdesc_mlgroup *m = find_mlgroup(target); + + if (!m) + continue; + if ((pa & m->mask) == m->match) { + if (pnode_mask) { + pnode_mask->mask = m->mask; + pnode_mask->val = m->match; + } + mdesc_release(md); + return i; + } + } + i++; + } + +out: + mdesc_release(md); + return -1; +} + static int __init find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp) { int i; diff --git a/block/blk-map.c b/block/blk-map.c index b8657fa8dc9a..27fd8d92892d 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -118,6 +118,9 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, struct iov_iter i; int ret; + if (!iter_is_iovec(iter)) + goto fail; + if (map_data) copy = true; else if (iov_iter_alignment(iter) & align) @@ -140,6 +143,7 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, unmap_rq: __blk_rq_unmap_user(bio); +fail: rq->bio = NULL; return -EINVAL; } diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index bda37d336736..b081929e80bc 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -904,9 +904,10 @@ static void b53_vlan_add(struct dsa_switch *ds, int port, vl->members |= BIT(port) | BIT(cpu_port); if (untagged) - vl->untag |= BIT(port) | BIT(cpu_port); + vl->untag |= BIT(port); else - vl->untag &= ~(BIT(port) | BIT(cpu_port)); + vl->untag &= ~BIT(port); + vl->untag &= ~BIT(cpu_port); b53_set_vlan_entry(dev, vid, vl); b53_fast_age_vlan(dev, vid); @@ -915,8 +916,6 @@ static void b53_vlan_add(struct dsa_switch *ds, int port, if (pvid) { b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), vlan->vid_end); - b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(cpu_port), - vlan->vid_end); b53_fast_age_vlan(dev, vid); } } @@ -926,7 +925,6 @@ static int b53_vlan_del(struct dsa_switch *ds, int port, { struct b53_device *dev = ds_to_priv(ds); bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; - unsigned int cpu_port = dev->cpu_port; struct b53_vlan *vl; u16 vid; u16 pvid; @@ -939,8 +937,6 @@ static int b53_vlan_del(struct dsa_switch *ds, int port, b53_get_vlan_entry(dev, vid, vl); vl->members &= ~BIT(port); - if ((vl->members & BIT(cpu_port)) == BIT(cpu_port)) - vl->members = 0; if (pvid == vid) { if (is5325(dev) || is5365(dev)) @@ -949,18 +945,14 @@ static int b53_vlan_del(struct dsa_switch *ds, int port, pvid = 0; } - if (untagged) { + if (untagged) vl->untag &= ~(BIT(port)); - if ((vl->untag & BIT(cpu_port)) == BIT(cpu_port)) - vl->untag = 0; - } b53_set_vlan_entry(dev, vid, vl); b53_fast_age_vlan(dev, vid); } b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), pvid); - b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(cpu_port), pvid); b53_fast_age_vlan(dev, pvid); return 0; diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index b2b838724a9b..4036865b7c08 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -1167,6 +1167,7 @@ static void bcm_sf2_sw_adjust_link(struct dsa_switch *ds, int port, struct phy_device *phydev) { struct bcm_sf2_priv *priv = ds_to_priv(ds); + struct ethtool_eee *p = &priv->port_sts[port].eee; u32 id_mode_dis = 0, port_mode; const char *str = NULL; u32 reg; @@ -1241,6 +1242,9 @@ force_link: reg |= DUPLX_MODE; core_writel(priv, reg, CORE_STS_OVERRIDE_GMIIP_PORT(port)); + + if (!phydev->is_pseudo_fixed_link) + p->eee_enabled = bcm_sf2_eee_init(ds, port, phydev); } static void bcm_sf2_sw_fixed_link_update(struct dsa_switch *ds, int port, diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 541456398dfb..842d8b90484e 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -1172,6 +1172,7 @@ static unsigned int __bcmgenet_tx_reclaim(struct net_device *dev, struct bcmgenet_tx_ring *ring) { struct bcmgenet_priv *priv = netdev_priv(dev); + struct device *kdev = &priv->pdev->dev; struct enet_cb *tx_cb_ptr; struct netdev_queue *txq; unsigned int pkts_compl = 0; @@ -1199,13 +1200,13 @@ static unsigned int __bcmgenet_tx_reclaim(struct net_device *dev, if (tx_cb_ptr->skb) { pkts_compl++; bytes_compl += GENET_CB(tx_cb_ptr->skb)->bytes_sent; - dma_unmap_single(&dev->dev, + dma_unmap_single(kdev, dma_unmap_addr(tx_cb_ptr, dma_addr), dma_unmap_len(tx_cb_ptr, dma_len), DMA_TO_DEVICE); bcmgenet_free_cb(tx_cb_ptr); } else if (dma_unmap_addr(tx_cb_ptr, dma_addr)) { - dma_unmap_page(&dev->dev, + dma_unmap_page(kdev, dma_unmap_addr(tx_cb_ptr, dma_addr), dma_unmap_len(tx_cb_ptr, dma_len), DMA_TO_DEVICE); @@ -1775,6 +1776,7 @@ static int bcmgenet_alloc_rx_buffers(struct bcmgenet_priv *priv, static void bcmgenet_free_rx_buffers(struct bcmgenet_priv *priv) { + struct device *kdev = &priv->pdev->dev; struct enet_cb *cb; int i; @@ -1782,7 +1784,7 @@ static void bcmgenet_free_rx_buffers(struct bcmgenet_priv *priv) cb = &priv->rx_cbs[i]; if (dma_unmap_addr(cb, dma_addr)) { - dma_unmap_single(&priv->dev->dev, + dma_unmap_single(kdev, dma_unmap_addr(cb, dma_addr), priv->rx_buf_len, DMA_FROM_DEVICE); dma_unmap_addr_set(cb, dma_addr, 0); diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index d954a97b0b0b..ef0dbcfa6ce6 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -959,6 +959,7 @@ static inline void macb_init_rx_ring(struct macb *bp) addr += bp->rx_buffer_size; } bp->rx_ring[RX_RING_SIZE - 1].addr |= MACB_BIT(RX_WRAP); + bp->rx_tail = 0; } static int macb_rx(struct macb *bp, int budget) @@ -1597,8 +1598,6 @@ static void macb_init_rings(struct macb *bp) bp->queues[0].tx_head = 0; bp->queues[0].tx_tail = 0; bp->queues[0].tx_ring[TX_RING_SIZE - 1].ctrl |= MACB_BIT(TX_WRAP); - - bp->rx_tail = 0; } static void macb_reset_hw(struct macb *bp) diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index 467138b423d3..d747e17d3429 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -5220,6 +5220,19 @@ static SIMPLE_DEV_PM_OPS(sky2_pm_ops, sky2_suspend, sky2_resume); static void sky2_shutdown(struct pci_dev *pdev) { + struct sky2_hw *hw = pci_get_drvdata(pdev); + int port; + + for (port = 0; port < hw->ports; port++) { + struct net_device *ndev = hw->dev[port]; + + rtnl_lock(); + if (netif_running(ndev)) { + dev_close(ndev); + netif_device_detach(ndev); + } + rtnl_unlock(); + } sky2_suspend(&pdev->dev); pci_wake_from_d3(pdev, device_may_wakeup(&pdev->dev)); pci_set_power_state(pdev, PCI_D3hot); diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 054e795df90f..92c9a95169c8 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -518,7 +518,7 @@ static struct sh_eth_cpu_data r7s72100_data = { .ecsr_value = ECSR_ICD, .ecsipr_value = ECSIPR_ICDIP, - .eesipr_value = 0xff7f009f, + .eesipr_value = 0xe77f009f, .tx_check = EESR_TC1 | EESR_FTC, .eesr_err_check = EESR_TWB1 | EESR_TWB | EESR_TABT | EESR_RABT | diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 16af1ce99233..5ad706b99af8 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -844,7 +844,6 @@ static netdev_tx_t geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, struct geneve_dev *geneve = netdev_priv(dev); struct geneve_sock *gs4 = geneve->sock4; struct rtable *rt = NULL; - const struct iphdr *iip; /* interior IP header */ int err = -EINVAL; struct flowi4 fl4; __u8 tos, ttl; @@ -871,8 +870,6 @@ static netdev_tx_t geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); skb_reset_mac_header(skb); - iip = ip_hdr(skb); - if (info) { const struct ip_tunnel_key *key = &info->key; u8 *opts = NULL; @@ -892,7 +889,7 @@ static netdev_tx_t geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, if (unlikely(err)) goto tx_error; - tos = ip_tunnel_ecn_encap(key->tos, iip, skb); + tos = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb); ttl = key->ttl; df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; } else { @@ -901,7 +898,7 @@ static netdev_tx_t geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, if (unlikely(err)) goto tx_error; - tos = ip_tunnel_ecn_encap(fl4.flowi4_tos, iip, skb); + tos = ip_tunnel_ecn_encap(fl4.flowi4_tos, ip_hdr(skb), skb); ttl = geneve->ttl; if (!ttl && IN_MULTICAST(ntohl(fl4.daddr))) ttl = 1; @@ -934,7 +931,6 @@ static netdev_tx_t geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, struct geneve_dev *geneve = netdev_priv(dev); struct geneve_sock *gs6 = geneve->sock6; struct dst_entry *dst = NULL; - const struct iphdr *iip; /* interior IP header */ int err = -EINVAL; struct flowi6 fl6; __u8 prio, ttl; @@ -959,8 +955,6 @@ static netdev_tx_t geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); skb_reset_mac_header(skb); - iip = ip_hdr(skb); - if (info) { const struct ip_tunnel_key *key = &info->key; u8 *opts = NULL; @@ -981,7 +975,7 @@ static netdev_tx_t geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, if (unlikely(err)) goto tx_error; - prio = ip_tunnel_ecn_encap(key->tos, iip, skb); + prio = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb); ttl = key->ttl; label = info->key.label; } else { @@ -991,7 +985,7 @@ static netdev_tx_t geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, goto tx_error; prio = ip_tunnel_ecn_encap(ip6_tclass(fl6.flowlabel), - iip, skb); + ip_hdr(skb), skb); ttl = geneve->ttl; if (!ttl && ipv6_addr_is_multicast(&fl6.daddr)) ttl = 1; diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index c47ec0a04c8e..dd623f674487 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -388,12 +388,6 @@ void usbnet_cdc_status(struct usbnet *dev, struct urb *urb) case USB_CDC_NOTIFY_NETWORK_CONNECTION: netif_dbg(dev, timer, dev->net, "CDC: carrier %s\n", event->wValue ? "on" : "off"); - - /* Work-around for devices with broken off-notifications */ - if (event->wValue && - !test_bit(__LINK_STATE_NOCARRIER, &dev->net->state)) - usbnet_link_change(dev, 0, 0); - usbnet_link_change(dev, !!event->wValue, 0); break; case USB_CDC_NOTIFY_SPEED_CHANGE: /* tx/rx rates */ @@ -466,6 +460,36 @@ static int usbnet_cdc_zte_rx_fixup(struct usbnet *dev, struct sk_buff *skb) return 1; } +/* Ensure correct link state + * + * Some devices (ZTE MF823/831/910) export two carrier on notifications when + * connected. This causes the link state to be incorrect. Work around this by + * always setting the state to off, then on. + */ +void usbnet_cdc_zte_status(struct usbnet *dev, struct urb *urb) +{ + struct usb_cdc_notification *event; + + if (urb->actual_length < sizeof(*event)) + return; + + event = urb->transfer_buffer; + + if (event->bNotificationType != USB_CDC_NOTIFY_NETWORK_CONNECTION) { + usbnet_cdc_status(dev, urb); + return; + } + + netif_dbg(dev, timer, dev->net, "CDC: carrier %s\n", + event->wValue ? "on" : "off"); + + if (event->wValue && + netif_carrier_ok(dev->net)) + netif_carrier_off(dev->net); + + usbnet_link_change(dev, !!event->wValue, 0); +} + static const struct driver_info cdc_info = { .description = "CDC Ethernet Device", .flags = FLAG_ETHER | FLAG_POINTTOPOINT, @@ -481,7 +505,7 @@ static const struct driver_info zte_cdc_info = { .flags = FLAG_ETHER | FLAG_POINTTOPOINT, .bind = usbnet_cdc_zte_bind, .unbind = usbnet_cdc_unbind, - .status = usbnet_cdc_status, + .status = usbnet_cdc_zte_status, .set_rx_mode = usbnet_cdc_update_filter, .manage_power = usbnet_manage_power, .rx_fixup = usbnet_cdc_zte_rx_fixup, diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index bf3fd34924bd..d8072092e5f0 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1468,6 +1468,11 @@ static void virtnet_free_queues(struct virtnet_info *vi) netif_napi_del(&vi->rq[i].napi); } + /* We called napi_hash_del() before netif_napi_del(), + * we need to respect an RCU grace period before freeing vi->rq + */ + synchronize_net(); + kfree(vi->rq); kfree(vi->sq); } diff --git a/include/linux/uio.h b/include/linux/uio.h index 75b4aaf31a9d..944e7baf17eb 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -102,12 +102,12 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages); const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags); -static inline size_t iov_iter_count(struct iov_iter *i) +static inline size_t iov_iter_count(const struct iov_iter *i) { return i->count; } -static inline bool iter_is_iovec(struct iov_iter *i) +static inline bool iter_is_iovec(const struct iov_iter *i) { return !(i->type & (ITER_BVEC | ITER_KVEC)); } diff --git a/include/net/gro_cells.h b/include/net/gro_cells.h index d15214d673b2..2a1abbf8da74 100644 --- a/include/net/gro_cells.h +++ b/include/net/gro_cells.h @@ -68,6 +68,9 @@ static inline int gro_cells_init(struct gro_cells *gcells, struct net_device *de struct gro_cell *cell = per_cpu_ptr(gcells->cells, i); __skb_queue_head_init(&cell->napi_skbs); + + set_bit(NAPI_STATE_NO_BUSY_POLL, &cell->napi.state); + netif_napi_add(dev, &cell->napi, gro_cell_poll, 64); napi_enable(&cell->napi); } diff --git a/net/core/flow.c b/net/core/flow.c index 3937b1b68d5b..18e8893d4be5 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -95,7 +95,6 @@ static void flow_cache_gc_task(struct work_struct *work) list_for_each_entry_safe(fce, n, &gc_list, u.gc_list) { flow_entry_kill(fce, xfrm); atomic_dec(&xfrm->flow_cache_gc_count); - WARN_ON(atomic_read(&xfrm->flow_cache_gc_count) < 0); } } @@ -236,9 +235,8 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, if (fcp->hash_count > fc->high_watermark) flow_cache_shrink(fc, fcp); - if (fcp->hash_count > 2 * fc->high_watermark || - atomic_read(&net->xfrm.flow_cache_gc_count) > fc->high_watermark) { - atomic_inc(&net->xfrm.flow_cache_genid); + if (atomic_read(&net->xfrm.flow_cache_gc_count) > + 2 * num_online_cpus() * fc->high_watermark) { flo = ERR_PTR(-ENOBUFS); goto ret_object; } diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 2c2eb1b629b1..2e9a1c2818c7 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -217,6 +217,8 @@ int peernet2id_alloc(struct net *net, struct net *peer) bool alloc; int id; + if (atomic_read(&net->count) == 0) + return NETNSA_NSID_NOT_ASSIGNED; spin_lock_irqsave(&net->nsid_lock, flags); alloc = atomic_read(&peer->count) == 0 ? false : true; id = __peernet2id_alloc(net, peer, &alloc); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 189cc78c77eb..08c3702f7074 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1578,7 +1578,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) head = &net->dev_index_head[h]; hlist_for_each_entry(dev, head, index_hlist) { if (link_dump_filtered(dev, master_idx, kind_ops)) - continue; + goto cont; if (idx < s_idx) goto cont; err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, @@ -2791,7 +2791,10 @@ nla_put_failure: static inline size_t rtnl_fdb_nlmsg_size(void) { - return NLMSG_ALIGN(sizeof(struct ndmsg)) + nla_total_size(ETH_ALEN); + return NLMSG_ALIGN(sizeof(struct ndmsg)) + + nla_total_size(ETH_ALEN) + /* NDA_LLADDR */ + nla_total_size(sizeof(u16)) + /* NDA_VLAN */ + 0; } static void rtnl_fdb_notify(struct net_device *dev, u8 *addr, u16 vid, int type, diff --git a/net/core/sock.c b/net/core/sock.c index 10acaccca5c8..ba27920b6bbc 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -715,7 +715,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname, val = min_t(u32, val, sysctl_wmem_max); set_sndbuf: sk->sk_userlocks |= SOCK_SNDBUF_LOCK; - sk->sk_sndbuf = max_t(u32, val * 2, SOCK_MIN_SNDBUF); + sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF); /* Wake up sending tasks if we upped the value. */ sk->sk_write_space(sk); break; @@ -751,7 +751,7 @@ set_rcvbuf: * returning the value we actually used in getsockopt * is the most desirable behavior. */ - sk->sk_rcvbuf = max_t(u32, val * 2, SOCK_MIN_RCVBUF); + sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF); break; case SO_RCVBUFFORCE: diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index b567c8725aea..edbe59d203ef 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -700,6 +700,7 @@ int dccp_invalid_packet(struct sk_buff *skb) { const struct dccp_hdr *dh; unsigned int cscov; + u8 dccph_doff; if (skb->pkt_type != PACKET_HOST) return 1; @@ -721,18 +722,19 @@ int dccp_invalid_packet(struct sk_buff *skb) /* * If P.Data Offset is too small for packet type, drop packet and return */ - if (dh->dccph_doff < dccp_hdr_len(skb) / sizeof(u32)) { - DCCP_WARN("P.Data Offset(%u) too small\n", dh->dccph_doff); + dccph_doff = dh->dccph_doff; + if (dccph_doff < dccp_hdr_len(skb) / sizeof(u32)) { + DCCP_WARN("P.Data Offset(%u) too small\n", dccph_doff); return 1; } /* * If P.Data Offset is too too large for packet, drop packet and return */ - if (!pskb_may_pull(skb, dh->dccph_doff * sizeof(u32))) { - DCCP_WARN("P.Data Offset(%u) too large\n", dh->dccph_doff); + if (!pskb_may_pull(skb, dccph_doff * sizeof(u32))) { + DCCP_WARN("P.Data Offset(%u) too large\n", dccph_doff); return 1; } - + dh = dccp_hdr(skb); /* * If P.type is not Data, Ack, or DataAck and P.X == 0 (the packet * has short sequence numbers), drop packet and return diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index f30bad9678f0..3bdecd2f9b88 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -28,8 +28,10 @@ static struct dsa_switch_tree *dsa_get_dst(u32 tree) struct dsa_switch_tree *dst; list_for_each_entry(dst, &dsa_switch_trees, list) - if (dst->tree == tree) + if (dst->tree == tree) { + kref_get(&dst->refcount); return dst; + } return NULL; } diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index eebbc0f2baa8..ed22af67c58a 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1237,7 +1237,7 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb, fixedid = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID); /* fixed ID is invalid if DF bit is not set */ - if (fixedid && !(iph->frag_off & htons(IP_DF))) + if (fixedid && !(ip_hdr(skb)->frag_off & htons(IP_DF))) goto out; } diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index d95631d09248..20fb25e3027b 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -476,7 +476,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) esph = (void *)skb_push(skb, 4); *seqhi = esph->spi; esph->spi = esph->seq_no; - esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.input.hi); + esph->seq_no = XFRM_SKB_CB(skb)->seq.input.hi; aead_request_set_callback(req, 0, esp_input_done_esn, skb); } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 1b25daf8c7f1..9301308528f8 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -157,7 +157,7 @@ static void fib_replace_table(struct net *net, struct fib_table *old, int fib_unmerge(struct net *net) { - struct fib_table *old, *new; + struct fib_table *old, *new, *main_table; /* attempt to fetch local table if it has been allocated */ old = fib_get_table(net, RT_TABLE_LOCAL); @@ -168,11 +168,21 @@ int fib_unmerge(struct net *net) if (!new) return -ENOMEM; + /* table is already unmerged */ + if (new == old) + return 0; + /* replace merged table with clean table */ - if (new != old) { - fib_replace_table(net, old, new); - fib_free_table(old); - } + fib_replace_table(net, old, new); + fib_free_table(old); + + /* attempt to fetch main table if it has been allocated */ + main_table = fib_get_table(net, RT_TABLE_MAIN); + if (!main_table) + return 0; + + /* flush local entries from main table */ + fib_table_flush_external(main_table); return 0; } diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 7ef703102dca..84fd727274cf 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -681,6 +681,13 @@ static unsigned char update_suffix(struct key_vector *tn) { unsigned char slen = tn->pos; unsigned long stride, i; + unsigned char slen_max; + + /* only vector 0 can have a suffix length greater than or equal to + * tn->pos + tn->bits, the second highest node will have a suffix + * length at most of tn->pos + tn->bits - 1 + */ + slen_max = min_t(unsigned char, tn->pos + tn->bits - 1, tn->slen); /* search though the list of children looking for nodes that might * have a suffix greater than the one we currently have. This is @@ -698,12 +705,8 @@ static unsigned char update_suffix(struct key_vector *tn) slen = n->slen; i &= ~(stride - 1); - /* if slen covers all but the last bit we can stop here - * there will be nothing longer than that since only node - * 0 and 1 << (bits - 1) could have that as their suffix - * length. - */ - if ((slen + 1) >= (tn->pos + tn->bits)) + /* stop searching if we have hit the maximum possible value */ + if (slen >= slen_max) break; } @@ -875,39 +878,27 @@ static struct key_vector *resize(struct trie *t, struct key_vector *tn) return collapse(t, tn); /* update parent in case halve failed */ - tp = node_parent(tn); - - /* Return if at least one deflate was run */ - if (max_work != MAX_WORK) - return tp; - - /* push the suffix length to the parent node */ - if (tn->slen > tn->pos) { - unsigned char slen = update_suffix(tn); - - if (slen > tp->slen) - tp->slen = slen; - } - - return tp; + return node_parent(tn); } -static void leaf_pull_suffix(struct key_vector *tp, struct key_vector *l) +static void node_pull_suffix(struct key_vector *tn, unsigned char slen) { - while ((tp->slen > tp->pos) && (tp->slen > l->slen)) { - if (update_suffix(tp) > l->slen) + unsigned char node_slen = tn->slen; + + while ((node_slen > tn->pos) && (node_slen > slen)) { + slen = update_suffix(tn); + if (node_slen == slen) break; - tp = node_parent(tp); + + tn = node_parent(tn); + node_slen = tn->slen; } } -static void leaf_push_suffix(struct key_vector *tn, struct key_vector *l) +static void node_push_suffix(struct key_vector *tn, unsigned char slen) { - /* if this is a new leaf then tn will be NULL and we can sort - * out parent suffix lengths as a part of trie_rebalance - */ - while (tn->slen < l->slen) { - tn->slen = l->slen; + while (tn->slen < slen) { + tn->slen = slen; tn = node_parent(tn); } } @@ -1028,6 +1019,7 @@ static int fib_insert_node(struct trie *t, struct key_vector *tp, } /* Case 3: n is NULL, and will just insert a new leaf */ + node_push_suffix(tp, new->fa_slen); NODE_INIT_PARENT(l, tp); put_child_root(tp, key, l); trie_rebalance(t, tp); @@ -1069,7 +1061,7 @@ static int fib_insert_alias(struct trie *t, struct key_vector *tp, /* if we added to the tail node then we need to update slen */ if (l->slen < new->fa_slen) { l->slen = new->fa_slen; - leaf_push_suffix(tp, l); + node_push_suffix(tp, new->fa_slen); } return 0; @@ -1470,6 +1462,8 @@ static void fib_remove_alias(struct trie *t, struct key_vector *tp, * out parent suffix lengths as a part of trie_rebalance */ if (hlist_empty(&l->leaf)) { + if (tp->slen == l->slen) + node_pull_suffix(tp, tp->pos); put_child_root(tp, l->key, NULL); node_free(l); trie_rebalance(t, tp); @@ -1482,7 +1476,7 @@ static void fib_remove_alias(struct trie *t, struct key_vector *tp, /* update the trie with the latest suffix length */ l->slen = fa->fa_slen; - leaf_pull_suffix(tp, l); + node_pull_suffix(tp, fa->fa_slen); } /* Caller must hold RTNL. */ @@ -1713,8 +1707,10 @@ struct fib_table *fib_trie_unmerge(struct fib_table *oldtb) local_l = fib_find_node(lt, &local_tp, l->key); if (fib_insert_alias(lt, local_tp, local_l, new_fa, - NULL, l->key)) + NULL, l->key)) { + kmem_cache_free(fn_alias_kmem, new_fa); goto out; + } } /* stop loop if key wrapped back to 0 */ @@ -1751,6 +1747,10 @@ void fib_table_flush_external(struct fib_table *tb) if (IS_TRIE(pn)) break; + /* update the suffix to address pulled leaves */ + if (pn->slen > pn->pos) + update_suffix(pn); + /* resize completed node */ pn = resize(t, pn); cindex = get_index(pkey, pn); @@ -1826,6 +1826,10 @@ int fib_table_flush(struct fib_table *tb) if (IS_TRIE(pn)) break; + /* update the suffix to address pulled leaves */ + if (pn->slen > pn->pos) + update_suffix(pn); + /* resize completed node */ pn = resize(t, pn); cindex = get_index(pkey, pn); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 307daed9a4b9..f4790c30c543 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -98,6 +98,9 @@ int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) iph->tot_len = htons(skb->len); ip_send_check(iph); + + skb->protocol = htons(ETH_P_IP); + return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk, skb, NULL, skb_dst(skb)->dev, dst_output); diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 66ddcb60519a..dcdd5aed7eb1 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -662,6 +662,10 @@ int ping_common_sendmsg(int family, struct msghdr *msg, size_t len, if (len > 0xFFFF) return -EMSGSIZE; + /* Must have at least a full ICMP header. */ + if (len < icmph_len) + return -EINVAL; + /* * Check the flags. */ diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c0d71e7d663e..a2d54f5b0fe0 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1451,7 +1451,7 @@ static void udp_v4_rehash(struct sock *sk) udp_lib_rehash(sk, new_hash); } -static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { int rc; diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h index 7e0fe4bdd967..feb50a16398d 100644 --- a/net/ipv4/udp_impl.h +++ b/net/ipv4/udp_impl.h @@ -25,7 +25,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, int flags, int *addr_len); int udp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); -int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); +int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); void udp_destroy_sock(struct sock *sk); #ifdef CONFIG_PROC_FS diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 2eea073e27ef..705d9fbf0bcf 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -50,7 +50,7 @@ struct proto udplite_prot = { .sendmsg = udp_sendmsg, .recvmsg = udp_recvmsg, .sendpage = udp_sendpage, - .backlog_rcv = udp_queue_rcv_skb, + .backlog_rcv = __udp_queue_rcv_skb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, .get_port = udp_v4_get_port, diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f5432d65e6bf..8f2e36f8afd9 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -163,7 +163,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, static void addrconf_dad_start(struct inet6_ifaddr *ifp); static void addrconf_dad_work(struct work_struct *w); -static void addrconf_dad_completed(struct inet6_ifaddr *ifp); +static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id); static void addrconf_dad_run(struct inet6_dev *idev); static void addrconf_rs_timer(unsigned long data); static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa); @@ -2893,6 +2893,7 @@ static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr, spin_lock_bh(&ifp->lock); ifp->flags &= ~IFA_F_TENTATIVE; spin_unlock_bh(&ifp->lock); + rt_genid_bump_ipv6(dev_net(idev->dev)); ipv6_ifa_notify(RTM_NEWADDR, ifp); in6_ifa_put(ifp); } @@ -3736,7 +3737,7 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp) { struct inet6_dev *idev = ifp->idev; struct net_device *dev = idev->dev; - bool notify = false; + bool bump_id, notify = false; addrconf_join_solict(dev, &ifp->addr); @@ -3751,11 +3752,12 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp) idev->cnf.accept_dad < 1 || !(ifp->flags&IFA_F_TENTATIVE) || ifp->flags & IFA_F_NODAD) { + bump_id = ifp->flags & IFA_F_TENTATIVE; ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED); spin_unlock(&ifp->lock); read_unlock_bh(&idev->lock); - addrconf_dad_completed(ifp); + addrconf_dad_completed(ifp, bump_id); return; } @@ -3815,8 +3817,8 @@ static void addrconf_dad_work(struct work_struct *w) struct inet6_ifaddr, dad_work); struct inet6_dev *idev = ifp->idev; + bool bump_id, disable_ipv6 = false; struct in6_addr mcaddr; - bool disable_ipv6 = false; enum { DAD_PROCESS, @@ -3886,11 +3888,12 @@ static void addrconf_dad_work(struct work_struct *w) * DAD was successful */ + bump_id = ifp->flags & IFA_F_TENTATIVE; ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED); spin_unlock(&ifp->lock); write_unlock_bh(&idev->lock); - addrconf_dad_completed(ifp); + addrconf_dad_completed(ifp, bump_id); goto out; } @@ -3927,7 +3930,7 @@ static bool ipv6_lonely_lladdr(struct inet6_ifaddr *ifp) return true; } -static void addrconf_dad_completed(struct inet6_ifaddr *ifp) +static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id) { struct net_device *dev = ifp->idev->dev; struct in6_addr lladdr; @@ -3978,6 +3981,9 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp) spin_unlock(&ifp->lock); write_unlock_bh(&ifp->idev->lock); } + + if (bump_id) + rt_genid_bump_ipv6(dev_net(dev)); } static void addrconf_dad_run(struct inet6_dev *idev) diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 060a60b2f8a6..111ba55fd512 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -418,7 +418,7 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) esph = (void *)skb_push(skb, 4); *seqhi = esph->spi; esph->spi = esph->seq_no; - esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.input.hi); + esph->seq_no = XFRM_SKB_CB(skb)->seq.input.hi; aead_request_set_callback(req, 0, esp_input_done_esn, skb); } diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index a09418bda1f8..93294cf4525c 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -98,7 +98,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, segs = ops->callbacks.gso_segment(skb, features); } - if (IS_ERR(segs)) + if (IS_ERR_OR_NULL(segs)) goto out; for (skb = segs; skb; skb = skb->next) { diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 41489f39c456..da4e7b377812 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1014,6 +1014,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, int mtu; unsigned int psh_hlen = sizeof(struct ipv6hdr) + t->encap_hlen; unsigned int max_headroom = psh_hlen; + bool use_cache = false; int err = -1; /* NBMA tunnel */ @@ -1038,7 +1039,15 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr)); neigh_release(neigh); - } else if (!fl6->flowi6_mark) + } else if (!(t->parms.flags & + (IP6_TNL_F_USE_ORIG_TCLASS | IP6_TNL_F_USE_ORIG_FWMARK))) { + /* enable the cache only only if the routing decision does + * not depend on the current inner header value + */ + use_cache = true; + } + + if (use_cache) dst = dst_cache_get(&t->dst_cache); if (!ip6_tnl_xmit_ctl(t, &fl6->saddr, &fl6->daddr)) @@ -1113,7 +1122,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, skb = new_skb; } - if (!fl6->flowi6_mark && ndst) + if (use_cache && ndst) dst_cache_set_ip6(&t->dst_cache, ndst, &fl6->saddr); skb_dst_set(skb, dst); @@ -1134,7 +1143,6 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, if (err) return err; - skb->protocol = htons(ETH_P_IPV6); skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); ipv6h = ipv6_hdr(skb); diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index 462f2a76b5c2..1d184322a7b1 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -148,6 +148,8 @@ int __ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) ipv6_hdr(skb)->payload_len = htons(len); IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); + skb->protocol = htons(ETH_P_IPV6); + return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, NULL, skb_dst(skb)->dev, dst_output); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index c2a8656c22eb..fa39ab8ec1fc 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -514,7 +514,7 @@ out: return; } -static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { int rc; diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h index 0682c031ccdc..3c1dbc9f74cf 100644 --- a/net/ipv6/udp_impl.h +++ b/net/ipv6/udp_impl.h @@ -26,7 +26,7 @@ int compat_udpv6_getsockopt(struct sock *sk, int level, int optname, int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, int flags, int *addr_len); -int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); +int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); void udpv6_destroy_sock(struct sock *sk); void udp_v6_clear_sk(struct sock *sk, int size); diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index fd6ef414899b..af2895c77ed6 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -45,7 +45,7 @@ struct proto udplitev6_prot = { .getsockopt = udpv6_getsockopt, .sendmsg = udpv6_sendmsg, .recvmsg = udpv6_recvmsg, - .backlog_rcv = udpv6_queue_rcv_skb, + .backlog_rcv = __udpv6_queue_rcv_skb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, .get_port = udp_v6_get_port, diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 42de4ccd159f..d0e906d39642 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -251,8 +251,6 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) int ret; int chk_addr_ret; - if (!sock_flag(sk, SOCK_ZAPPED)) - return -EINVAL; if (addr_len < sizeof(struct sockaddr_l2tpip)) return -EINVAL; if (addr->l2tp_family != AF_INET) @@ -267,6 +265,9 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) read_unlock_bh(&l2tp_ip_lock); lock_sock(sk); + if (!sock_flag(sk, SOCK_ZAPPED)) + goto out; + if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_l2tpip)) goto out; diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index ea2ae6664cc8..b9c6a412b806 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -269,8 +269,6 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) int addr_type; int err; - if (!sock_flag(sk, SOCK_ZAPPED)) - return -EINVAL; if (addr->l2tp_family != AF_INET6) return -EINVAL; if (addr_len < sizeof(*addr)) @@ -296,6 +294,9 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) lock_sock(sk); err = -EINVAL; + if (!sock_flag(sk, SOCK_ZAPPED)) + goto out_unlock; + if (sk->sk_state != TCP_CLOSE) goto out_unlock; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 62bea4591054..246f29d365c0 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -329,7 +329,6 @@ static void netlink_sock_destruct(struct sock *sk) if (nlk->cb_running) { if (nlk->cb.done) nlk->cb.done(&nlk->cb); - module_put(nlk->cb.module); kfree_skb(nlk->cb.skb); } @@ -346,6 +345,14 @@ static void netlink_sock_destruct(struct sock *sk) WARN_ON(nlk_sk(sk)->groups); } +static void netlink_sock_destruct_work(struct work_struct *work) +{ + struct netlink_sock *nlk = container_of(work, struct netlink_sock, + work); + + sk_free(&nlk->sk); +} + /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on * SMP. Look, when several writers sleep and reader wakes them up, all but one * immediately hit write lock and grab all the cpus. Exclusive sleep solves @@ -648,8 +655,18 @@ out_module: static void deferred_put_nlk_sk(struct rcu_head *head) { struct netlink_sock *nlk = container_of(head, struct netlink_sock, rcu); + struct sock *sk = &nlk->sk; + + if (!atomic_dec_and_test(&sk->sk_refcnt)) + return; + + if (nlk->cb_running && nlk->cb.done) { + INIT_WORK(&nlk->work, netlink_sock_destruct_work); + schedule_work(&nlk->work); + return; + } - sock_put(&nlk->sk); + sk_free(sk); } static int netlink_release(struct socket *sock) diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index 3cfd6cc60504..4fdb38318977 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -3,6 +3,7 @@ #include #include +#include #include #define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8) @@ -33,6 +34,7 @@ struct netlink_sock { struct rhash_head node; struct rcu_head rcu; + struct work_struct work; }; static inline struct netlink_sock *nlk_sk(struct sock *sk) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index d2238b204691..dd2332390c45 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3648,19 +3648,25 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv if (optlen != sizeof(val)) return -EINVAL; - if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) - return -EBUSY; if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; switch (val) { case TPACKET_V1: case TPACKET_V2: case TPACKET_V3: - po->tp_version = val; - return 0; + break; default: return -EINVAL; } + lock_sock(sk); + if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { + ret = -EBUSY; + } else { + po->tp_version = val; + ret = 0; + } + release_sock(sk); + return ret; } case PACKET_RESERVE: { @@ -4164,6 +4170,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, /* Added to avoid minimal code churn */ struct tpacket_req *req = &req_u->req; + lock_sock(sk); /* Opening a Tx-ring is NOT supported in TPACKET_V3 */ if (!closing && tx_ring && (po->tp_version > TPACKET_V2)) { net_warn_ratelimited("Tx-ring is not supported.\n"); @@ -4245,7 +4252,6 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, goto out; } - lock_sock(sk); /* Detach socket from network */ spin_lock(&po->bind_lock); @@ -4294,11 +4300,11 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, if (!tx_ring) prb_shutdown_retire_blk_timer(po, rb_queue); } - release_sock(sk); if (pg_vec) free_pg_vec(pg_vec, order, req->tp_block_nr); out: + release_sock(sk); return err; } diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index b54d56d4959b..cf9b2fe8eac6 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -108,6 +108,17 @@ static void tcf_pedit_cleanup(struct tc_action *a, int bind) kfree(keys); } +static bool offset_valid(struct sk_buff *skb, int offset) +{ + if (offset > 0 && offset > skb->len) + return false; + + if (offset < 0 && -offset > skb_headroom(skb)) + return false; + + return true; +} + static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { @@ -134,6 +145,11 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a, if (tkey->offmask) { char *d, _d; + if (!offset_valid(skb, off + tkey->at)) { + pr_info("tc filter pedit 'at' offset %d out of bounds\n", + off + tkey->at); + goto bad; + } d = skb_header_pointer(skb, off + tkey->at, 1, &_d); if (!d) @@ -146,10 +162,10 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a, " offset must be on 32 bit boundaries\n"); goto bad; } - if (offset > 0 && offset > skb->len) { - pr_info("tc filter pedit" - " offset %d can't exceed pkt length %d\n", - offset, skb->len); + + if (!offset_valid(skb, off + offset)) { + pr_info("tc filter pedit offset %d out of bounds\n", + offset); goto bad; } diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 0b8c3ace671f..1bf1f4517db6 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -62,9 +62,6 @@ static unsigned long basic_get(struct tcf_proto *tp, u32 handle) struct basic_head *head = rtnl_dereference(tp->root); struct basic_filter *f; - if (head == NULL) - return 0UL; - list_for_each_entry(f, &head->flist, link) { if (f->handle == handle) { l = (unsigned long) f; @@ -109,7 +106,6 @@ static bool basic_destroy(struct tcf_proto *tp, bool force) tcf_unbind_filter(tp, &f->res); call_rcu(&f->rcu, basic_delete_filter); } - RCU_INIT_POINTER(tp->root, NULL); kfree_rcu(head, rcu); return true; } diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index c3002c2c68bb..dbec45848fe1 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -200,7 +200,6 @@ static bool cls_bpf_destroy(struct tcf_proto *tp, bool force) call_rcu(&prog->rcu, __cls_bpf_delete_prog); } - RCU_INIT_POINTER(tp->root, NULL); kfree_rcu(head, rcu); return true; } @@ -211,9 +210,6 @@ static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle) struct cls_bpf_prog *prog; unsigned long ret = 0UL; - if (head == NULL) - return 0UL; - list_for_each_entry(prog, &head->plist, link) { if (prog->handle == handle) { ret = (unsigned long) prog; diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 4c85bd3a750c..c104c2019feb 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -130,11 +130,10 @@ static bool cls_cgroup_destroy(struct tcf_proto *tp, bool force) if (!force) return false; - - if (head) { - RCU_INIT_POINTER(tp->root, NULL); + /* Head can still be NULL due to cls_cgroup_init(). */ + if (head) call_rcu(&head->rcu, cls_cgroup_destroy_rcu); - } + return true; } diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index fbfec6a18839..d7ba2b4ff0f3 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -583,7 +583,6 @@ static bool flow_destroy(struct tcf_proto *tp, bool force) list_del_rcu(&f->list); call_rcu(&f->rcu, flow_destroy_filter); } - RCU_INIT_POINTER(tp->root, NULL); kfree_rcu(head, rcu); return true; } diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 5060801a2f6d..a411571c4d4a 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -55,7 +56,10 @@ struct cls_fl_head { bool mask_assigned; struct list_head filters; struct rhashtable_params ht_params; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; struct cls_fl_filter { @@ -239,6 +243,24 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f) dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc); } +static void fl_destroy_sleepable(struct work_struct *work) +{ + struct cls_fl_head *head = container_of(work, struct cls_fl_head, + work); + if (head->mask_assigned) + rhashtable_destroy(&head->ht); + kfree(head); + module_put(THIS_MODULE); +} + +static void fl_destroy_rcu(struct rcu_head *rcu) +{ + struct cls_fl_head *head = container_of(rcu, struct cls_fl_head, rcu); + + INIT_WORK(&head->work, fl_destroy_sleepable); + schedule_work(&head->work); +} + static bool fl_destroy(struct tcf_proto *tp, bool force) { struct cls_fl_head *head = rtnl_dereference(tp->root); @@ -252,10 +274,9 @@ static bool fl_destroy(struct tcf_proto *tp, bool force) list_del_rcu(&f->list); call_rcu(&f->rcu, fl_destroy_filter); } - RCU_INIT_POINTER(tp->root, NULL); - if (head->mask_assigned) - rhashtable_destroy(&head->ht); - kfree_rcu(head, rcu); + + __module_get(THIS_MODULE); + call_rcu(&head->rcu, fl_destroy_rcu); return true; } diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index 25927b6c4436..f935429bd5ef 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -114,7 +114,6 @@ static bool mall_destroy(struct tcf_proto *tp, bool force) call_rcu(&f->rcu, mall_destroy_filter); } - RCU_INIT_POINTER(tp->root, NULL); kfree_rcu(head, rcu); return true; } diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index f9c9fc075fe6..9992dfac6938 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -152,7 +152,8 @@ static int rsvp_classify(struct sk_buff *skb, const struct tcf_proto *tp, return -1; nhptr = ip_hdr(skb); #endif - + if (unlikely(!head)) + return -1; restart: #if RSVP_DST_LEN == 4 diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 944c8ff45055..403746b20263 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -503,7 +503,6 @@ static bool tcindex_destroy(struct tcf_proto *tp, bool force) walker.fn = tcindex_destroy_element; tcindex_walk(tp, &walker); - RCU_INIT_POINTER(tp->root, NULL); call_rcu(&p->rcu, __tcindex_destroy); return true; } diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 65b1bbf133bd..616769983bcd 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -402,6 +402,10 @@ int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b, dev = dev_get_by_name(net, driver_name); if (!dev) return -ENODEV; + if (tipc_mtu_bad(dev, 0)) { + dev_put(dev); + return -EINVAL; + } /* Associate TIPC bearer with L2 bearer */ rcu_assign_pointer(b->media_ptr, dev); @@ -606,8 +610,6 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, if (!b) return NOTIFY_DONE; - b->mtu = dev->mtu; - switch (evt) { case NETDEV_CHANGE: if (netif_carrier_ok(dev)) @@ -621,6 +623,11 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, tipc_reset_bearer(net, b); break; case NETDEV_CHANGEMTU: + if (tipc_mtu_bad(dev, 0)) { + bearer_disable(net, b); + break; + } + b->mtu = dev->mtu; tipc_reset_bearer(net, b); break; case NETDEV_CHANGEADDR: diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 43757f1f9cb3..d93f1f1a21e6 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -39,6 +39,7 @@ #include "netlink.h" #include "core.h" +#include "msg.h" #include #define MAX_MEDIA 3 @@ -59,6 +60,9 @@ #define TIPC_MEDIA_TYPE_IB 2 #define TIPC_MEDIA_TYPE_UDP 3 +/* minimum bearer MTU */ +#define TIPC_MIN_BEARER_MTU (MAX_H_SIZE + INT_H_SIZE) + /** * struct tipc_media_addr - destination address used by TIPC bearers * @value: address info (format defined by media) @@ -213,4 +217,13 @@ void tipc_bearer_xmit(struct net *net, u32 bearer_id, void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id, struct sk_buff_head *xmitq); +/* check if device MTU is too low for tipc headers */ +static inline bool tipc_mtu_bad(struct net_device *dev, unsigned int reserve) +{ + if (dev->mtu >= TIPC_MIN_BEARER_MTU + reserve) + return false; + netdev_warn(dev, "MTU too low for tipc bearer\n"); + return true; +} + #endif /* _TIPC_BEARER_H */ diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index ae7e14cae085..f60f346e75b3 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -372,6 +372,11 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, udp_conf.local_ip.s_addr = htonl(INADDR_ANY); udp_conf.use_udp_checksums = false; ub->ifindex = dev->ifindex; + if (tipc_mtu_bad(dev, sizeof(struct iphdr) + + sizeof(struct udphdr))) { + err = -EINVAL; + goto err; + } b->mtu = dev->mtu - sizeof(struct iphdr) - sizeof(struct udphdr); #if IS_ENABLED(CONFIG_IPV6) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 8309687a56b0..568f307afdcf 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2199,7 +2199,8 @@ out: * Sleep until more data has arrived. But check for races.. */ static long unix_stream_data_wait(struct sock *sk, long timeo, - struct sk_buff *last, unsigned int last_len) + struct sk_buff *last, unsigned int last_len, + bool freezable) { struct sk_buff *tail; DEFINE_WAIT(wait); @@ -2220,7 +2221,10 @@ static long unix_stream_data_wait(struct sock *sk, long timeo, sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); unix_state_unlock(sk); - timeo = freezable_schedule_timeout(timeo); + if (freezable) + timeo = freezable_schedule_timeout(timeo); + else + timeo = schedule_timeout(timeo); unix_state_lock(sk); if (sock_flag(sk, SOCK_DEAD)) @@ -2250,7 +2254,8 @@ struct unix_stream_read_state { unsigned int splice_flags; }; -static int unix_stream_read_generic(struct unix_stream_read_state *state) +static int unix_stream_read_generic(struct unix_stream_read_state *state, + bool freezable) { struct scm_cookie scm; struct socket *sock = state->socket; @@ -2330,7 +2335,7 @@ again: mutex_unlock(&u->iolock); timeo = unix_stream_data_wait(sk, timeo, last, - last_len); + last_len, freezable); if (signal_pending(current)) { err = sock_intr_errno(timeo); @@ -2472,7 +2477,7 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, .flags = flags }; - return unix_stream_read_generic(&state); + return unix_stream_read_generic(&state, true); } static ssize_t skb_unix_socket_splice(struct sock *sk, @@ -2518,7 +2523,7 @@ static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos, flags & SPLICE_F_NONBLOCK) state.flags = MSG_DONTWAIT; - return unix_stream_read_generic(&state); + return unix_stream_read_generic(&state, false); } static int unix_shutdown(struct socket *sock, int mode)