diff --git a/Makefile b/Makefile index 633b5f0f11a0..65c7c8756803 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 3 PATCHLEVEL = 19 -SUBLEVEL = 5 +SUBLEVEL = 6 EXTRAVERSION = NAME = Diseased Newt diff --git a/arch/arm/mm/hugetlbpage.c b/arch/arm/mm/hugetlbpage.c index 66781bf34077..c72412415093 100644 --- a/arch/arm/mm/hugetlbpage.c +++ b/arch/arm/mm/hugetlbpage.c @@ -36,12 +36,6 @@ * of type casting from pmd_t * to pte_t *. */ -struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, - int write) -{ - return ERR_PTR(-EINVAL); -} - int pud_huge(pud_t pud) { return 0; diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 023747bf4dd7..2de9d2e59d96 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -38,12 +38,6 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) } #endif -struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, - int write) -{ - return ERR_PTR(-EINVAL); -} - int pmd_huge(pmd_t pmd) { return !(pmd_val(pmd) & PMD_TABLE_BIT); diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c index 76069c18ee42..52b7604b5215 100644 --- a/arch/ia64/mm/hugetlbpage.c +++ b/arch/ia64/mm/hugetlbpage.c @@ -114,12 +114,6 @@ int pud_huge(pud_t pud) return 0; } -struct page * -follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int write) -{ - return NULL; -} - void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling) diff --git a/arch/metag/mm/hugetlbpage.c b/arch/metag/mm/hugetlbpage.c index 3c32075d2945..7ca80ac42ed5 100644 --- a/arch/metag/mm/hugetlbpage.c +++ b/arch/metag/mm/hugetlbpage.c @@ -94,12 +94,6 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) return 0; } -struct page *follow_huge_addr(struct mm_struct *mm, - unsigned long address, int write) -{ - return ERR_PTR(-EINVAL); -} - int pmd_huge(pmd_t pmd) { return pmd_page_shift(pmd) > PAGE_SHIFT; diff --git a/arch/mips/mm/hugetlbpage.c b/arch/mips/mm/hugetlbpage.c index 4ec8ee10d371..06e0f421b41b 100644 --- a/arch/mips/mm/hugetlbpage.c +++ b/arch/mips/mm/hugetlbpage.c @@ -68,12 +68,6 @@ int is_aligned_hugepage_range(unsigned long addr, unsigned long len) return 0; } -struct page * -follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) -{ - return ERR_PTR(-EINVAL); -} - int pmd_huge(pmd_t pmd) { return (pmd_val(pmd) & _PAGE_HUGE) != 0; @@ -83,15 +77,3 @@ int pud_huge(pud_t pud) { return (pud_val(pud) & _PAGE_HUGE) != 0; } - -struct page * -follow_huge_pmd(struct mm_struct *mm, unsigned long address, - pmd_t *pmd, int write) -{ - struct page *page; - - page = pte_page(*(pte_t *)pmd); - if (page) - page += ((address & ~HPAGE_MASK) >> PAGE_SHIFT); - return page; -} diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 620d0ec93e6f..7e408bfc7948 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -714,6 +714,14 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, return NULL; } +struct page * +follow_huge_pud(struct mm_struct *mm, unsigned long address, + pud_t *pud, int write) +{ + BUG(); + return NULL; +} + static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end, unsigned long sz) { diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 3c80d2e38f03..210ffede0153 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -192,12 +192,6 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) return 0; } -struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, - int write) -{ - return ERR_PTR(-EINVAL); -} - int pmd_huge(pmd_t pmd) { if (!MACHINE_HAS_HPAGE) @@ -210,17 +204,3 @@ int pud_huge(pud_t pud) { return 0; } - -struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, - pmd_t *pmdp, int write) -{ - struct page *page; - - if (!MACHINE_HAS_HPAGE) - return NULL; - - page = pmd_page(*pmdp); - if (page) - page += ((address & ~HPAGE_MASK) >> PAGE_SHIFT); - return page; -} diff --git a/arch/sh/mm/hugetlbpage.c b/arch/sh/mm/hugetlbpage.c index d7762349ea48..534bc978af8a 100644 --- a/arch/sh/mm/hugetlbpage.c +++ b/arch/sh/mm/hugetlbpage.c @@ -67,12 +67,6 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) return 0; } -struct page *follow_huge_addr(struct mm_struct *mm, - unsigned long address, int write) -{ - return ERR_PTR(-EINVAL); -} - int pmd_huge(pmd_t pmd) { return 0; @@ -82,9 +76,3 @@ int pud_huge(pud_t pud) { return 0; } - -struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, - pmd_t *pmd, int write) -{ - return NULL; -} diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c index d329537739c6..4242eab12e10 100644 --- a/arch/sparc/mm/hugetlbpage.c +++ b/arch/sparc/mm/hugetlbpage.c @@ -215,12 +215,6 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, return entry; } -struct page *follow_huge_addr(struct mm_struct *mm, - unsigned long address, int write) -{ - return ERR_PTR(-EINVAL); -} - int pmd_huge(pmd_t pmd) { return 0; @@ -230,9 +224,3 @@ int pud_huge(pud_t pud) { return 0; } - -struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, - pmd_t *pmd, int write) -{ - return NULL; -} diff --git a/arch/tile/mm/hugetlbpage.c b/arch/tile/mm/hugetlbpage.c index 3270e0019266..8416240c322c 100644 --- a/arch/tile/mm/hugetlbpage.c +++ b/arch/tile/mm/hugetlbpage.c @@ -150,12 +150,6 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) return NULL; } -struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, - int write) -{ - return ERR_PTR(-EINVAL); -} - int pmd_huge(pmd_t pmd) { return !!(pmd_val(pmd) & _PAGE_HUGE_PAGE); @@ -166,28 +160,6 @@ int pud_huge(pud_t pud) return !!(pud_val(pud) & _PAGE_HUGE_PAGE); } -struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, - pmd_t *pmd, int write) -{ - struct page *page; - - page = pte_page(*(pte_t *)pmd); - if (page) - page += ((address & ~PMD_MASK) >> PAGE_SHIFT); - return page; -} - -struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address, - pud_t *pud, int write) -{ - struct page *page; - - page = pte_page(*(pte_t *)pud); - if (page) - page += ((address & ~PUD_MASK) >> PAGE_SHIFT); - return page; -} - int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) { return 0; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d4c58d884838..312446418b36 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2404,8 +2404,7 @@ static __init void nested_vmx_setup_ctls_msrs(void) if (enable_ept) { /* nested EPT: emulate EPT also to L1 */ - nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT | - SECONDARY_EXEC_UNRESTRICTED_GUEST; + nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT; nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT | VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT | VMX_EPT_INVEPT_BIT; @@ -2419,6 +2418,10 @@ static __init void nested_vmx_setup_ctls_msrs(void) } else nested_vmx_ept_caps = 0; + if (enable_unrestricted_guest) + nested_vmx_secondary_ctls_high |= + SECONDARY_EXEC_UNRESTRICTED_GUEST; + /* miscellaneous data */ rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high); nested_vmx_misc_low &= VMX_MISC_SAVE_EFER_LMA; diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 006cc914994b..9161f764121e 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -52,20 +52,8 @@ int pud_huge(pud_t pud) return 0; } -struct page * -follow_huge_pmd(struct mm_struct *mm, unsigned long address, - pmd_t *pmd, int write) -{ - return NULL; -} #else -struct page * -follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) -{ - return ERR_PTR(-EINVAL); -} - /* * pmd_huge() returns 1 if @pmd is hugetlb related entry, that is normal * hugetlb entry or non-present (migration or hwpoisoned) hugetlb entry. diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 0dceba1a2ba1..68ad39a4b221 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3797,7 +3797,8 @@ static inline int bond_slave_override(struct bonding *bond, /* Find out if any slaves have the same mapping as this skb. */ bond_for_each_slave_rcu(bond, slave, iter) { if (slave->queue_id == skb->queue_mapping) { - if (bond_slave_can_tx(slave)) { + if (bond_slave_is_up(slave) && + slave->link == BOND_LINK_UP) { bond_dev_queue_xmit(bond, skb, slave->dev); return 0; } diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index c3a6072134f5..2559206d8704 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -531,20 +531,8 @@ struct bnx2x_fastpath { struct napi_struct napi; #ifdef CONFIG_NET_RX_BUSY_POLL - unsigned int state; -#define BNX2X_FP_STATE_IDLE 0 -#define BNX2X_FP_STATE_NAPI (1 << 0) /* NAPI owns this FP */ -#define BNX2X_FP_STATE_POLL (1 << 1) /* poll owns this FP */ -#define BNX2X_FP_STATE_DISABLED (1 << 2) -#define BNX2X_FP_STATE_NAPI_YIELD (1 << 3) /* NAPI yielded this FP */ -#define BNX2X_FP_STATE_POLL_YIELD (1 << 4) /* poll yielded this FP */ -#define BNX2X_FP_OWNED (BNX2X_FP_STATE_NAPI | BNX2X_FP_STATE_POLL) -#define BNX2X_FP_YIELD (BNX2X_FP_STATE_NAPI_YIELD | BNX2X_FP_STATE_POLL_YIELD) -#define BNX2X_FP_LOCKED (BNX2X_FP_OWNED | BNX2X_FP_STATE_DISABLED) -#define BNX2X_FP_USER_PEND (BNX2X_FP_STATE_POLL | BNX2X_FP_STATE_POLL_YIELD) - /* protect state */ - spinlock_t lock; -#endif /* CONFIG_NET_RX_BUSY_POLL */ + unsigned long busy_poll_state; +#endif union host_hc_status_block status_blk; /* chip independent shortcuts into sb structure */ @@ -619,104 +607,83 @@ struct bnx2x_fastpath { #define bnx2x_fp_qstats(bp, fp) (&((bp)->fp_stats[(fp)->index].eth_q_stats)) #ifdef CONFIG_NET_RX_BUSY_POLL -static inline void bnx2x_fp_init_lock(struct bnx2x_fastpath *fp) + +enum bnx2x_fp_state { + BNX2X_STATE_FP_NAPI = BIT(0), /* NAPI handler owns the queue */ + + BNX2X_STATE_FP_NAPI_REQ_BIT = 1, /* NAPI would like to own the queue */ + BNX2X_STATE_FP_NAPI_REQ = BIT(1), + + BNX2X_STATE_FP_POLL_BIT = 2, + BNX2X_STATE_FP_POLL = BIT(2), /* busy_poll owns the queue */ + + BNX2X_STATE_FP_DISABLE_BIT = 3, /* queue is dismantled */ +}; + +static inline void bnx2x_fp_busy_poll_init(struct bnx2x_fastpath *fp) { - spin_lock_init(&fp->lock); - fp->state = BNX2X_FP_STATE_IDLE; + WRITE_ONCE(fp->busy_poll_state, 0); } /* called from the device poll routine to get ownership of a FP */ static inline bool bnx2x_fp_lock_napi(struct bnx2x_fastpath *fp) { - bool rc = true; - - spin_lock_bh(&fp->lock); - if (fp->state & BNX2X_FP_LOCKED) { - WARN_ON(fp->state & BNX2X_FP_STATE_NAPI); - fp->state |= BNX2X_FP_STATE_NAPI_YIELD; - rc = false; - } else { - /* we don't care if someone yielded */ - fp->state = BNX2X_FP_STATE_NAPI; + unsigned long prev, old = READ_ONCE(fp->busy_poll_state); + + while (1) { + switch (old) { + case BNX2X_STATE_FP_POLL: + /* make sure bnx2x_fp_lock_poll() wont starve us */ + set_bit(BNX2X_STATE_FP_NAPI_REQ_BIT, + &fp->busy_poll_state); + /* fallthrough */ + case BNX2X_STATE_FP_POLL | BNX2X_STATE_FP_NAPI_REQ: + return false; + default: + break; + } + prev = cmpxchg(&fp->busy_poll_state, old, BNX2X_STATE_FP_NAPI); + if (unlikely(prev != old)) { + old = prev; + continue; + } + return true; } - spin_unlock_bh(&fp->lock); - return rc; } -/* returns true is someone tried to get the FP while napi had it */ -static inline bool bnx2x_fp_unlock_napi(struct bnx2x_fastpath *fp) +static inline void bnx2x_fp_unlock_napi(struct bnx2x_fastpath *fp) { - bool rc = false; - - spin_lock_bh(&fp->lock); - WARN_ON(fp->state & - (BNX2X_FP_STATE_POLL | BNX2X_FP_STATE_NAPI_YIELD)); - - if (fp->state & BNX2X_FP_STATE_POLL_YIELD) - rc = true; - - /* state ==> idle, unless currently disabled */ - fp->state &= BNX2X_FP_STATE_DISABLED; - spin_unlock_bh(&fp->lock); - return rc; + smp_wmb(); + fp->busy_poll_state = 0; } /* called from bnx2x_low_latency_poll() */ static inline bool bnx2x_fp_lock_poll(struct bnx2x_fastpath *fp) { - bool rc = true; - - spin_lock_bh(&fp->lock); - if ((fp->state & BNX2X_FP_LOCKED)) { - fp->state |= BNX2X_FP_STATE_POLL_YIELD; - rc = false; - } else { - /* preserve yield marks */ - fp->state |= BNX2X_FP_STATE_POLL; - } - spin_unlock_bh(&fp->lock); - return rc; + return cmpxchg(&fp->busy_poll_state, 0, BNX2X_STATE_FP_POLL) == 0; } -/* returns true if someone tried to get the FP while it was locked */ -static inline bool bnx2x_fp_unlock_poll(struct bnx2x_fastpath *fp) +static inline void bnx2x_fp_unlock_poll(struct bnx2x_fastpath *fp) { - bool rc = false; - - spin_lock_bh(&fp->lock); - WARN_ON(fp->state & BNX2X_FP_STATE_NAPI); - - if (fp->state & BNX2X_FP_STATE_POLL_YIELD) - rc = true; - - /* state ==> idle, unless currently disabled */ - fp->state &= BNX2X_FP_STATE_DISABLED; - spin_unlock_bh(&fp->lock); - return rc; + smp_mb__before_atomic(); + clear_bit(BNX2X_STATE_FP_POLL_BIT, &fp->busy_poll_state); } -/* true if a socket is polling, even if it did not get the lock */ +/* true if a socket is polling */ static inline bool bnx2x_fp_ll_polling(struct bnx2x_fastpath *fp) { - WARN_ON(!(fp->state & BNX2X_FP_OWNED)); - return fp->state & BNX2X_FP_USER_PEND; + return READ_ONCE(fp->busy_poll_state) & BNX2X_STATE_FP_POLL; } /* false if fp is currently owned */ static inline bool bnx2x_fp_ll_disable(struct bnx2x_fastpath *fp) { - int rc = true; - - spin_lock_bh(&fp->lock); - if (fp->state & BNX2X_FP_OWNED) - rc = false; - fp->state |= BNX2X_FP_STATE_DISABLED; - spin_unlock_bh(&fp->lock); + set_bit(BNX2X_STATE_FP_DISABLE_BIT, &fp->busy_poll_state); + return !bnx2x_fp_ll_polling(fp); - return rc; } #else -static inline void bnx2x_fp_init_lock(struct bnx2x_fastpath *fp) +static inline void bnx2x_fp_busy_poll_init(struct bnx2x_fastpath *fp) { } @@ -725,9 +692,8 @@ static inline bool bnx2x_fp_lock_napi(struct bnx2x_fastpath *fp) return true; } -static inline bool bnx2x_fp_unlock_napi(struct bnx2x_fastpath *fp) +static inline void bnx2x_fp_unlock_napi(struct bnx2x_fastpath *fp) { - return false; } static inline bool bnx2x_fp_lock_poll(struct bnx2x_fastpath *fp) @@ -735,9 +701,8 @@ static inline bool bnx2x_fp_lock_poll(struct bnx2x_fastpath *fp) return false; } -static inline bool bnx2x_fp_unlock_poll(struct bnx2x_fastpath *fp) +static inline void bnx2x_fp_unlock_poll(struct bnx2x_fastpath *fp) { - return false; } static inline bool bnx2x_fp_ll_polling(struct bnx2x_fastpath *fp) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index e468ed3f210f..2b8e8b2ce0b6 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -1849,7 +1849,7 @@ static void bnx2x_napi_enable_cnic(struct bnx2x *bp) int i; for_each_rx_queue_cnic(bp, i) { - bnx2x_fp_init_lock(&bp->fp[i]); + bnx2x_fp_busy_poll_init(&bp->fp[i]); napi_enable(&bnx2x_fp(bp, i, napi)); } } @@ -1859,7 +1859,7 @@ static void bnx2x_napi_enable(struct bnx2x *bp) int i; for_each_eth_queue(bp, i) { - bnx2x_fp_init_lock(&bp->fp[i]); + bnx2x_fp_busy_poll_init(&bp->fp[i]); napi_enable(&bnx2x_fp(bp, i, napi)); } } @@ -3191,9 +3191,10 @@ static int bnx2x_poll(struct napi_struct *napi, int budget) } } + bnx2x_fp_unlock_napi(fp); + /* Fall out from the NAPI loop if needed */ - if (!bnx2x_fp_unlock_napi(fp) && - !(bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) { + if (!(bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) { /* No need to update SB for FCoE L2 ring as long as * it's connected to the default SB and the SB diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 96bf01ba32dd..05ae12690117 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -17868,8 +17868,10 @@ static int tg3_init_one(struct pci_dev *pdev, */ if ((tr32(HOSTCC_MODE) & HOSTCC_MODE_ENABLE) || (tr32(WDMAC_MODE) & WDMAC_MODE_ENABLE)) { + tg3_full_lock(tp, 0); tw32(MEMARB_MODE, MEMARB_MODE_ENABLE); tg3_halt(tp, RESET_KIND_SHUTDOWN, 1); + tg3_full_unlock(tp); } err = tg3_test_dma(tp); diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 5c93d1451c44..9842bf963648 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -585,7 +585,8 @@ static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param, * on the host, we deprecate the error message for this * specific command/input_mod/opcode_mod/fw-status to be debug. */ - if (op == MLX4_CMD_SET_PORT && in_modifier == 1 && + if (op == MLX4_CMD_SET_PORT && + (in_modifier == 1 || in_modifier == 2) && op_modifier == 0 && context->fw_status == CMD_STAT_BAD_SIZE) mlx4_dbg(dev, "command 0x%x failed: fw status = 0x%x\n", op, context->fw_status); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index ac6a8f1eea6c..2617c9d68d9b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2627,13 +2627,6 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, netif_carrier_off(dev); mlx4_en_set_default_moderation(priv); - err = register_netdev(dev); - if (err) { - en_err(priv, "Netdev registration failed for port %d\n", port); - goto out; - } - priv->registered = 1; - en_warn(priv, "Using %d TX rings\n", prof->tx_ring_num); en_warn(priv, "Using %d RX rings\n", prof->rx_ring_num); @@ -2673,6 +2666,14 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, queue_delayed_work(mdev->workqueue, &priv->service_task, SERVICE_TASK_DELAY); + err = register_netdev(dev); + if (err) { + en_err(priv, "Netdev registration failed for port %d\n", port); + goto out; + } + + priv->registered = 1; + return 0; out: diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index 2f398fa4b9e6..24c028473383 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -4305,10 +4305,16 @@ static int rocker_port_master_changed(struct net_device *dev) struct net_device *master = netdev_master_upper_dev_get(dev); int err = 0; + /* There are currently three cases handled here: + * 1. Joining a bridge + * 2. Leaving a previously joined bridge + * 3. Other, e.g. being added to or removed from a bond or openvswitch, + * in which case nothing is done + */ if (master && master->rtnl_link_ops && !strcmp(master->rtnl_link_ops->kind, "bridge")) err = rocker_port_bridge_join(rocker_port, master); - else + else if (rocker_port_is_bridged(rocker_port)) err = rocker_port_bridge_leave(rocker_port); return err; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 10f9e4021b5a..9a409a8f3b19 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1368,7 +1368,7 @@ static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile, skb = __skb_recv_datagram(tfile->socket.sk, noblock ? MSG_DONTWAIT : 0, &peeked, &off, &err); if (!skb) - return 0; + return err; ret = tun_put_user(tun, tfile, skb, to); if (unlikely(ret < 0)) diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c index 5c55f11572ba..75d6f26729a3 100644 --- a/drivers/net/usb/asix_common.c +++ b/drivers/net/usb/asix_common.c @@ -188,6 +188,8 @@ struct sk_buff *asix_tx_fixup(struct usbnet *dev, struct sk_buff *skb, memcpy(skb_tail_pointer(skb), &padbytes, sizeof(padbytes)); skb_put(skb, sizeof(padbytes)); } + + usbnet_set_skb_tx_stats(skb, 1, 0); return skb; } diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 80a844e0ae03..c3e4da9e79ca 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -1172,17 +1172,17 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) /* return skb */ ctx->tx_curr_skb = NULL; - dev->net->stats.tx_packets += ctx->tx_curr_frame_num; /* keep private stats: framing overhead and number of NTBs */ ctx->tx_overhead += skb_out->len - ctx->tx_curr_frame_payload; ctx->tx_ntbs++; - /* usbnet has already counted all the framing overhead. + /* usbnet will count all the framing overhead by default. * Adjust the stats so that the tx_bytes counter show real * payload data instead. */ - dev->net->stats.tx_bytes -= skb_out->len - ctx->tx_curr_frame_payload; + usbnet_set_skb_tx_stats(skb_out, n, + ctx->tx_curr_frame_payload - skb_out->len); return skb_out; diff --git a/drivers/net/usb/sr9800.c b/drivers/net/usb/sr9800.c index b94a0fbb8b3b..953de13267df 100644 --- a/drivers/net/usb/sr9800.c +++ b/drivers/net/usb/sr9800.c @@ -144,6 +144,7 @@ static struct sk_buff *sr_tx_fixup(struct usbnet *dev, struct sk_buff *skb, skb_put(skb, sizeof(padbytes)); } + usbnet_set_skb_tx_stats(skb, 1, 0); return skb; } diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 3a6770a65d78..e7ed2513b1d1 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1189,8 +1189,7 @@ static void tx_complete (struct urb *urb) struct usbnet *dev = entry->dev; if (urb->status == 0) { - if (!(dev->driver_info->flags & FLAG_MULTI_PACKET)) - dev->net->stats.tx_packets++; + dev->net->stats.tx_packets += entry->packets; dev->net->stats.tx_bytes += entry->length; } else { dev->net->stats.tx_errors++; @@ -1348,7 +1347,19 @@ netdev_tx_t usbnet_start_xmit (struct sk_buff *skb, } else urb->transfer_flags |= URB_ZERO_PACKET; } - entry->length = urb->transfer_buffer_length = length; + urb->transfer_buffer_length = length; + + if (info->flags & FLAG_MULTI_PACKET) { + /* Driver has set number of packets and a length delta. + * Calculate the complete length and ensure that it's + * positive. + */ + entry->length += length; + if (WARN_ON_ONCE(entry->length <= 0)) + entry->length = length; + } else { + usbnet_set_skb_tx_stats(skb, 1, length); + } spin_lock_irqsave(&dev->txq.lock, flags); retval = usb_autopm_get_interface_async(dev->intf); diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index a8c755dcab14..6c83846f914c 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1578,12 +1578,6 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs, int err; bool udp_sum = !udp_get_no_check6_tx(vs->sock->sk); - skb = udp_tunnel_handle_offloads(skb, udp_sum); - if (IS_ERR(skb)) { - err = -EINVAL; - goto err; - } - skb_scrub_packet(skb, xnet); min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len @@ -1603,6 +1597,12 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs, goto err; } + skb = udp_tunnel_handle_offloads(skb, udp_sum); + if (IS_ERR(skb)) { + err = -EINVAL; + goto err; + } + vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); vxh->vx_flags = htonl(VXLAN_FLAGS); vxh->vx_vni = vni; @@ -1628,10 +1628,6 @@ int vxlan_xmit_skb(struct vxlan_sock *vs, int err; bool udp_sum = !vs->sock->sk->sk_no_check_tx; - skb = udp_tunnel_handle_offloads(skb, udp_sum); - if (IS_ERR(skb)) - return PTR_ERR(skb); - min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len + VXLAN_HLEN + sizeof(struct iphdr) + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); @@ -1647,6 +1643,10 @@ int vxlan_xmit_skb(struct vxlan_sock *vs, if (WARN_ON(!skb)) return -ENOMEM; + skb = udp_tunnel_handle_offloads(skb, udp_sum); + if (IS_ERR(skb)) + return PTR_ERR(skb); + vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); vxh->vx_flags = htonl(VXLAN_FLAGS); vxh->vx_vni = vni; diff --git a/drivers/net/wireless/rtlwifi/pci.c b/drivers/net/wireless/rtlwifi/pci.c index a5186bb7c63e..8c45cf44ce24 100644 --- a/drivers/net/wireless/rtlwifi/pci.c +++ b/drivers/net/wireless/rtlwifi/pci.c @@ -578,6 +578,13 @@ static void _rtl_pci_tx_isr(struct ieee80211_hw *hw, int prio) else entry = (u8 *)(&ring->desc[ring->idx]); + if (rtlpriv->cfg->ops->get_available_desc && + rtlpriv->cfg->ops->get_available_desc(hw, prio) <= 1) { + RT_TRACE(rtlpriv, (COMP_INTR | COMP_SEND), DBG_DMESG, + "no available desc!\n"); + return; + } + if (!rtlpriv->cfg->ops->is_tx_desc_closed(hw, prio, ring->idx)) return; ring->idx = (ring->idx + 1) % ring->entries; @@ -641,10 +648,9 @@ static void _rtl_pci_tx_isr(struct ieee80211_hw *hw, int prio) ieee80211_tx_status_irqsafe(hw, skb); - if ((ring->entries - skb_queue_len(&ring->queue)) - == 2) { + if ((ring->entries - skb_queue_len(&ring->queue)) <= 4) { - RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, + RT_TRACE(rtlpriv, COMP_ERR, DBG_DMESG, "more desc left, wake skb_queue@%d, ring->idx = %d, skb_queue_len = 0x%x\n", prio, ring->idx, skb_queue_len(&ring->queue)); @@ -793,7 +799,7 @@ static void _rtl_pci_rx_interrupt(struct ieee80211_hw *hw) rx_remained_cnt = rtlpriv->cfg->ops->rx_desc_buff_remained_cnt(hw, hw_queue); - if (rx_remained_cnt < 1) + if (rx_remained_cnt == 0) return; } else { /* rx descriptor */ @@ -845,18 +851,18 @@ static void _rtl_pci_rx_interrupt(struct ieee80211_hw *hw) else skb_reserve(skb, stats.rx_drvinfo_size + stats.rx_bufshift); - } else { RT_TRACE(rtlpriv, COMP_ERR, DBG_WARNING, "skb->end - skb->tail = %d, len is %d\n", skb->end - skb->tail, len); - break; + dev_kfree_skb_any(skb); + goto new_trx_end; } /* handle command packet here */ if (rtlpriv->cfg->ops->rx_command_packet && rtlpriv->cfg->ops->rx_command_packet(hw, stats, skb)) { dev_kfree_skb_any(skb); - goto end; + goto new_trx_end; } /* @@ -906,6 +912,7 @@ static void _rtl_pci_rx_interrupt(struct ieee80211_hw *hw) } else { dev_kfree_skb_any(skb); } +new_trx_end: if (rtlpriv->use_new_trx_flow) { rtlpci->rx_ring[hw_queue].next_rx_rp += 1; rtlpci->rx_ring[hw_queue].next_rx_rp %= @@ -921,7 +928,6 @@ static void _rtl_pci_rx_interrupt(struct ieee80211_hw *hw) rtlpriv->enter_ps = false; schedule_work(&rtlpriv->works.lps_change_work); } -end: skb = new_skb; no_new: if (rtlpriv->use_new_trx_flow) { @@ -1695,6 +1701,15 @@ static int rtl_pci_tx(struct ieee80211_hw *hw, } } + if (rtlpriv->cfg->ops->get_available_desc && + rtlpriv->cfg->ops->get_available_desc(hw, hw_queue) == 0) { + RT_TRACE(rtlpriv, COMP_ERR, DBG_WARNING, + "get_available_desc fail\n"); + spin_unlock_irqrestore(&rtlpriv->locks.irq_th_lock, + flags); + return skb->len; + } + if (ieee80211_is_data_qos(fc)) { tid = rtl_get_tid(skb); if (sta) { diff --git a/drivers/net/wireless/rtlwifi/rtl8192ee/sw.c b/drivers/net/wireless/rtlwifi/rtl8192ee/sw.c index 9b5a7d5be121..c31c6bfb536d 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192ee/sw.c +++ b/drivers/net/wireless/rtlwifi/rtl8192ee/sw.c @@ -113,8 +113,6 @@ int rtl92ee_init_sw_vars(struct ieee80211_hw *hw) RCR_HTC_LOC_CTRL | RCR_AMF | RCR_ACF | - RCR_ADF | - RCR_AICV | RCR_ACRC32 | RCR_AB | RCR_AM | @@ -241,6 +239,7 @@ static struct rtl_hal_ops rtl8192ee_hal_ops = { .set_desc = rtl92ee_set_desc, .get_desc = rtl92ee_get_desc, .is_tx_desc_closed = rtl92ee_is_tx_desc_closed, + .get_available_desc = rtl92ee_get_available_desc, .tx_polling = rtl92ee_tx_polling, .enable_hw_sec = rtl92ee_enable_hw_security_config, .set_key = rtl92ee_set_key, diff --git a/drivers/net/wireless/rtlwifi/rtl8192ee/trx.c b/drivers/net/wireless/rtlwifi/rtl8192ee/trx.c index 00690040be37..1f6d160877e1 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192ee/trx.c +++ b/drivers/net/wireless/rtlwifi/rtl8192ee/trx.c @@ -707,7 +707,7 @@ static u16 get_desc_addr_fr_q_idx(u16 queue_index) return desc_address; } -void rtl92ee_get_available_desc(struct ieee80211_hw *hw, u8 q_idx) +u16 rtl92ee_get_available_desc(struct ieee80211_hw *hw, u8 q_idx) { struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); struct rtl_priv *rtlpriv = rtl_priv(hw); @@ -721,11 +721,12 @@ void rtl92ee_get_available_desc(struct ieee80211_hw *hw, u8 q_idx) current_tx_write_point = (u16)((tmp_4byte) & 0x0fff); point_diff = ((current_tx_read_point > current_tx_write_point) ? - (current_tx_read_point - current_tx_write_point) : - (TX_DESC_NUM_92E - current_tx_write_point + + (current_tx_read_point - current_tx_write_point - 1) : + (TX_DESC_NUM_92E - 1 - current_tx_write_point + current_tx_read_point)); rtlpci->tx_ring[q_idx].avl_desc = point_diff; + return point_diff; } void rtl92ee_pre_fill_tx_bd_desc(struct ieee80211_hw *hw, diff --git a/drivers/net/wireless/rtlwifi/rtl8192ee/trx.h b/drivers/net/wireless/rtlwifi/rtl8192ee/trx.h index 8effef9b13dd..b489dd9c8401 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192ee/trx.h +++ b/drivers/net/wireless/rtlwifi/rtl8192ee/trx.h @@ -831,7 +831,7 @@ void rtl92ee_rx_check_dma_ok(struct ieee80211_hw *hw, u8 *header_desc, u8 queue_index); u16 rtl92ee_rx_desc_buff_remained_cnt(struct ieee80211_hw *hw, u8 queue_index); -void rtl92ee_get_available_desc(struct ieee80211_hw *hw, u8 queue_index); +u16 rtl92ee_get_available_desc(struct ieee80211_hw *hw, u8 queue_index); void rtl92ee_pre_fill_tx_bd_desc(struct ieee80211_hw *hw, u8 *tx_bd_desc, u8 *desc, u8 queue_index, struct sk_buff *skb, dma_addr_t addr); diff --git a/drivers/net/wireless/rtlwifi/wifi.h b/drivers/net/wireless/rtlwifi/wifi.h index 6866dcf24340..27822fe34d9a 100644 --- a/drivers/net/wireless/rtlwifi/wifi.h +++ b/drivers/net/wireless/rtlwifi/wifi.h @@ -2161,6 +2161,7 @@ struct rtl_hal_ops { void (*add_wowlan_pattern)(struct ieee80211_hw *hw, struct rtl_wow_pattern *rtl_pattern, u8 index); + u16 (*get_available_desc)(struct ieee80211_hw *hw, u8 q_idx); }; struct rtl_intf_ops { diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index d8c10764f130..76ce69cc1382 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1062,8 +1062,7 @@ err: static int xennet_change_mtu(struct net_device *dev, int mtu) { - int max = xennet_can_sg(dev) ? - XEN_NETIF_MAX_TX_SIZE - MAX_TCP_HEADER : ETH_DATA_LEN; + int max = xennet_can_sg(dev) ? XEN_NETIF_MAX_TX_SIZE : ETH_DATA_LEN; if (mtu > max) return -EINVAL; @@ -1333,8 +1332,6 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev) netdev->ethtool_ops = &xennet_ethtool_ops; SET_NETDEV_DEV(netdev, &dev->dev); - netif_set_gso_max_size(netdev, XEN_NETIF_MAX_TX_SIZE - MAX_TCP_HEADER); - np->netdev = netdev; netif_carrier_off(netdev); diff --git a/drivers/staging/comedi/drivers/adv_pci1710.c b/drivers/staging/comedi/drivers/adv_pci1710.c index d02df7d0c629..57b7bc2e037a 100644 --- a/drivers/staging/comedi/drivers/adv_pci1710.c +++ b/drivers/staging/comedi/drivers/adv_pci1710.c @@ -455,7 +455,6 @@ static int pci171x_insn_read_ai(struct comedi_device *dev, struct comedi_insn *insn, unsigned int *data) { struct pci1710_private *devpriv = dev->private; - unsigned int chan = CR_CHAN(insn->chanspec); int ret = 0; int i; @@ -477,7 +476,7 @@ static int pci171x_insn_read_ai(struct comedi_device *dev, break; val = inw(dev->iobase + PCI171x_AD_DATA); - ret = pci171x_ai_dropout(dev, s, chan, val); + ret = pci171x_ai_dropout(dev, s, 0, val); if (ret) break; diff --git a/fs/exec.c b/fs/exec.c index ad8798e26be9..4617a4ec52e3 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1259,6 +1259,53 @@ static void check_unsafe_exec(struct linux_binprm *bprm) spin_unlock(&p->fs->lock); } +static void bprm_fill_uid(struct linux_binprm *bprm) +{ + struct inode *inode; + unsigned int mode; + kuid_t uid; + kgid_t gid; + + /* clear any previous set[ug]id data from a previous binary */ + bprm->cred->euid = current_euid(); + bprm->cred->egid = current_egid(); + + if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) + return; + + if (task_no_new_privs(current)) + return; + + inode = file_inode(bprm->file); + mode = READ_ONCE(inode->i_mode); + if (!(mode & (S_ISUID|S_ISGID))) + return; + + /* Be careful if suid/sgid is set */ + mutex_lock(&inode->i_mutex); + + /* reload atomically mode/uid/gid now that lock held */ + mode = inode->i_mode; + uid = inode->i_uid; + gid = inode->i_gid; + mutex_unlock(&inode->i_mutex); + + /* We ignore suid/sgid if there are no mappings for them in the ns */ + if (!kuid_has_mapping(bprm->cred->user_ns, uid) || + !kgid_has_mapping(bprm->cred->user_ns, gid)) + return; + + if (mode & S_ISUID) { + bprm->per_clear |= PER_CLEAR_ON_SETID; + bprm->cred->euid = uid; + } + + if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) { + bprm->per_clear |= PER_CLEAR_ON_SETID; + bprm->cred->egid = gid; + } +} + /* * Fill the binprm structure from the inode. * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes @@ -1267,36 +1314,9 @@ static void check_unsafe_exec(struct linux_binprm *bprm) */ int prepare_binprm(struct linux_binprm *bprm) { - struct inode *inode = file_inode(bprm->file); - umode_t mode = inode->i_mode; int retval; - - /* clear any previous set[ug]id data from a previous binary */ - bprm->cred->euid = current_euid(); - bprm->cred->egid = current_egid(); - - if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) && - !task_no_new_privs(current) && - kuid_has_mapping(bprm->cred->user_ns, inode->i_uid) && - kgid_has_mapping(bprm->cred->user_ns, inode->i_gid)) { - /* Set-uid? */ - if (mode & S_ISUID) { - bprm->per_clear |= PER_CLEAR_ON_SETID; - bprm->cred->euid = inode->i_uid; - } - - /* Set-gid? */ - /* - * If setgid is set but no group execute bit then this - * is a candidate for mandatory locking, not a setgid - * executable. - */ - if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) { - bprm->per_clear |= PER_CLEAR_ON_SETID; - bprm->cred->egid = inode->i_gid; - } - } + bprm_fill_uid(bprm); /* fill in binprm security blob */ retval = security_bprm_set_creds(bprm); diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 431b7fc605c9..e235ec5f1f28 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -99,9 +99,9 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep); struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, int write); struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, - pmd_t *pmd, int write); + pmd_t *pmd, int flags); struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address, - pud_t *pud, int write); + pud_t *pud, int flags); int pmd_huge(pmd_t pmd); int pud_huge(pud_t pmd); unsigned long hugetlb_change_protection(struct vm_area_struct *vma, @@ -133,8 +133,8 @@ static inline void hugetlb_report_meminfo(struct seq_file *m) static inline void hugetlb_show_meminfo(void) { } -#define follow_huge_pmd(mm, addr, pmd, write) NULL -#define follow_huge_pud(mm, addr, pud, write) NULL +#define follow_huge_pmd(mm, addr, pmd, flags) NULL +#define follow_huge_pud(mm, addr, pud, flags) NULL #define prepare_hugepage_range(file, addr, len) (-EINVAL) #define pmd_huge(x) 0 #define pud_huge(x) 0 diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 52fd8e8694cf..840fb7f7c3de 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2159,6 +2159,12 @@ void netdev_freemem(struct net_device *dev); void synchronize_net(void); int init_dummy_netdev(struct net_device *dev); +DECLARE_PER_CPU(int, xmit_recursion); +static inline int dev_recursion_level(void) +{ + return this_cpu_read(xmit_recursion); +} + struct net_device *dev_get_by_index(struct net *net, int ifindex); struct net_device *__dev_get_by_index(struct net *net, int ifindex); struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 6adfb7bfbf44..e288d5c016a7 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -137,6 +137,8 @@ static inline void make_migration_entry_read(swp_entry_t *entry) *entry = swp_entry(SWP_MIGRATION_READ, swp_offset(*entry)); } +extern void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep, + spinlock_t *ptl); extern void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, unsigned long address); extern void migration_entry_wait_huge(struct vm_area_struct *vma, @@ -150,6 +152,8 @@ static inline int is_migration_entry(swp_entry_t swp) } #define migration_entry_to_page(swp) NULL static inline void make_migration_entry_read(swp_entry_t *entryp) { } +static inline void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep, + spinlock_t *ptl) { } static inline void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, unsigned long address) { } static inline void migration_entry_wait_huge(struct vm_area_struct *vma, diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index d9a4905e01d0..6e0ce8c7b8cb 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -227,9 +227,23 @@ struct skb_data { /* skb->cb is one of these */ struct urb *urb; struct usbnet *dev; enum skb_state state; - size_t length; + long length; + unsigned long packets; }; +/* Drivers that set FLAG_MULTI_PACKET must call this in their + * tx_fixup method before returning an skb. + */ +static inline void +usbnet_set_skb_tx_stats(struct sk_buff *skb, + unsigned long packets, long bytes_delta) +{ + struct skb_data *entry = (struct skb_data *) skb->cb; + + entry->packets = packets; + entry->length = bytes_delta; +} + extern int usbnet_open(struct net_device *net); extern int usbnet_stop(struct net_device *net); extern netdev_tx_t usbnet_start_xmit(struct sk_buff *skb, diff --git a/include/net/ip.h b/include/net/ip.h index 09cf5aebb283..c0c26c3deeb5 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -453,22 +453,6 @@ static __inline__ void inet_reset_saddr(struct sock *sk) #endif -static inline int sk_mc_loop(struct sock *sk) -{ - if (!sk) - return 1; - switch (sk->sk_family) { - case AF_INET: - return inet_sk(sk)->mc_loop; -#if IS_ENABLED(CONFIG_IPV6) - case AF_INET6: - return inet6_sk(sk)->mc_loop; -#endif - } - WARN_ON(1); - return 1; -} - bool ip_call_ra_chain(struct sk_buff *skb); /* diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 1d09b46c1e48..eda131d179d9 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -174,7 +174,8 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)); static inline int ip6_skb_dst_mtu(struct sk_buff *skb) { - struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; + struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? + inet6_sk(skb->sk) : NULL; return (np && np->pmtudisc >= IPV6_PMTUDISC_PROBE) ? skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb)); diff --git a/include/net/sock.h b/include/net/sock.h index 2210fec65669..45b54d3fcb04 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1812,6 +1812,8 @@ struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie); struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie); +bool sk_mc_loop(struct sock *sk); + static inline bool sk_can_gso(const struct sock *sk) { return net_gso_ok(sk->sk_route_caps, sk->sk_gso_type); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a28e09c7825d..36508e69e92a 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1380,7 +1380,8 @@ peek_stack: /* tell verifier to check for equivalent states * after every call and jump */ - env->explored_states[t + 1] = STATE_LIST_MARK; + if (t + 1 < insn_cnt) + env->explored_states[t + 1] = STATE_LIST_MARK; } else { /* conditional jump with two edges */ ret = push_insn(t, t + 1, FALLTHROUGH, env); diff --git a/mm/gup.c b/mm/gup.c index 9b2afbfe67e3..e29c3745a893 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -167,10 +167,10 @@ struct page *follow_page_mask(struct vm_area_struct *vma, if (pud_none(*pud)) return no_page_table(vma, flags); if (pud_huge(*pud) && vma->vm_flags & VM_HUGETLB) { - if (flags & FOLL_GET) - return NULL; - page = follow_huge_pud(mm, address, pud, flags & FOLL_WRITE); - return page; + page = follow_huge_pud(mm, address, pud, flags); + if (page) + return page; + return no_page_table(vma, flags); } if (unlikely(pud_bad(*pud))) return no_page_table(vma, flags); @@ -179,19 +179,10 @@ struct page *follow_page_mask(struct vm_area_struct *vma, if (pmd_none(*pmd)) return no_page_table(vma, flags); if (pmd_huge(*pmd) && vma->vm_flags & VM_HUGETLB) { - page = follow_huge_pmd(mm, address, pmd, flags & FOLL_WRITE); - if (flags & FOLL_GET) { - /* - * Refcount on tail pages are not well-defined and - * shouldn't be taken. The caller should handle a NULL - * return when trying to follow tail pages. - */ - if (PageHead(page)) - get_page(page); - else - page = NULL; - } - return page; + page = follow_huge_pmd(mm, address, pmd, flags); + if (page) + return page; + return no_page_table(vma, flags); } if ((flags & FOLL_NUMA) && pmd_numa(*pmd)) return no_page_table(vma, flags); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 267e41971100..a2bfd02e289f 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3700,44 +3700,64 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) return (pte_t *) pmd; } -struct page * -follow_huge_pmd(struct mm_struct *mm, unsigned long address, - pmd_t *pmd, int write) -{ - struct page *page; +#endif /* CONFIG_ARCH_WANT_GENERAL_HUGETLB */ - if (!pmd_present(*pmd)) - return NULL; - page = pte_page(*(pte_t *)pmd); - if (page) - page += ((address & ~PMD_MASK) >> PAGE_SHIFT); - return page; +/* + * These functions are overwritable if your architecture needs its own + * behavior. + */ +struct page * __weak +follow_huge_addr(struct mm_struct *mm, unsigned long address, + int write) +{ + return ERR_PTR(-EINVAL); } -struct page * -follow_huge_pud(struct mm_struct *mm, unsigned long address, - pud_t *pud, int write) +struct page * __weak +follow_huge_pmd(struct mm_struct *mm, unsigned long address, + pmd_t *pmd, int flags) { - struct page *page; - - page = pte_page(*(pte_t *)pud); - if (page) - page += ((address & ~PUD_MASK) >> PAGE_SHIFT); + struct page *page = NULL; + spinlock_t *ptl; +retry: + ptl = pmd_lockptr(mm, pmd); + spin_lock(ptl); + /* + * make sure that the address range covered by this pmd is not + * unmapped from other threads. + */ + if (!pmd_huge(*pmd)) + goto out; + if (pmd_present(*pmd)) { + page = pte_page(*(pte_t *)pmd) + + ((address & ~PMD_MASK) >> PAGE_SHIFT); + if (flags & FOLL_GET) + get_page(page); + } else { + if (is_hugetlb_entry_migration(huge_ptep_get((pte_t *)pmd))) { + spin_unlock(ptl); + __migration_entry_wait(mm, (pte_t *)pmd, ptl); + goto retry; + } + /* + * hwpoisoned entry is treated as no_page_table in + * follow_page_mask(). + */ + } +out: + spin_unlock(ptl); return page; } -#else /* !CONFIG_ARCH_WANT_GENERAL_HUGETLB */ - -/* Can be overriden by architectures */ struct page * __weak follow_huge_pud(struct mm_struct *mm, unsigned long address, - pud_t *pud, int write) + pud_t *pud, int flags) { - BUG(); - return NULL; -} + if (flags & FOLL_GET) + return NULL; -#endif /* CONFIG_ARCH_WANT_GENERAL_HUGETLB */ + return pte_page(*(pte_t *)pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT); +} #ifdef CONFIG_MEMORY_FAILURE diff --git a/mm/migrate.c b/mm/migrate.c index 344cdf692fc8..be6d1edcfcb7 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -229,7 +229,7 @@ static void remove_migration_ptes(struct page *old, struct page *new) * get to the page and wait until migration is finished. * When we return from this function the fault will be retried. */ -static void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep, +void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep, spinlock_t *ptl) { pte_t pte; @@ -1268,7 +1268,8 @@ static int do_move_page_to_node_array(struct mm_struct *mm, goto put_and_set; if (PageHuge(page)) { - isolate_huge_page(page, &pagelist); + if (PageHead(page)) + isolate_huge_page(page, &pagelist); goto put_and_set; } diff --git a/net/core/dev.c b/net/core/dev.c index 4ff46f8054d4..5dd905ca2654 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2821,7 +2821,9 @@ static void skb_update_prio(struct sk_buff *skb) #define skb_update_prio(skb) #endif -static DEFINE_PER_CPU(int, xmit_recursion); +DEFINE_PER_CPU(int, xmit_recursion); +EXPORT_SYMBOL(xmit_recursion); + #define RECURSION_LIMIT 10 /** diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 62c67bebcaf5..39c444c1206d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4141,18 +4141,20 @@ EXPORT_SYMBOL(skb_try_coalesce); */ void skb_scrub_packet(struct sk_buff *skb, bool xnet) { - if (xnet) - skb_orphan(skb); skb->tstamp.tv64 = 0; skb->pkt_type = PACKET_HOST; skb->skb_iif = 0; skb->ignore_df = 0; skb_dst_drop(skb); - skb->mark = 0; - skb_init_secmark(skb); secpath_reset(skb); nf_reset(skb); nf_reset_trace(skb); + + if (!xnet) + return; + + skb_orphan(skb); + skb->mark = 0; } EXPORT_SYMBOL_GPL(skb_scrub_packet); diff --git a/net/core/sock.c b/net/core/sock.c index 1c7a33db1314..a91f99f26420 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -651,6 +651,25 @@ static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool) sock_reset_flag(sk, bit); } +bool sk_mc_loop(struct sock *sk) +{ + if (dev_recursion_level()) + return false; + if (!sk) + return true; + switch (sk->sk_family) { + case AF_INET: + return inet_sk(sk)->mc_loop; +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: + return inet6_sk(sk)->mc_loop; +#endif + } + WARN_ON(1); + return true; +} +EXPORT_SYMBOL(sk_mc_loop); + /* * This is meant for all protocols to use and covers goings on * at the socket level. Everything here is generic. diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c index 394a200f93c1..69711d81a88b 100644 --- a/net/ipv4/geneve.c +++ b/net/ipv4/geneve.c @@ -121,10 +121,6 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt, int min_headroom; int err; - skb = udp_tunnel_handle_offloads(skb, !gs->sock->sk->sk_no_check_tx); - if (IS_ERR(skb)) - return PTR_ERR(skb); - min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len + GENEVE_BASE_HLEN + opt_len + sizeof(struct iphdr) + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); @@ -139,6 +135,10 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt, if (unlikely(!skb)) return -ENOMEM; + skb = udp_tunnel_handle_offloads(skb, !gs->sock->sk->sk_no_check_tx); + if (IS_ERR(skb)) + return PTR_ERR(skb); + gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) + opt_len); geneve_build_header(gnvh, tun_flags, vni, opt_len, opt); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 075ab4d5af5e..08ccca6a8035 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3104,10 +3104,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, if (!first_ackt.v64) first_ackt = last_ackt; - if (!(sacked & TCPCB_SACKED_ACKED)) + if (!(sacked & TCPCB_SACKED_ACKED)) { reord = min(pkts_acked, reord); - if (!after(scb->end_seq, tp->high_seq)) - flag |= FLAG_ORIG_SACK_ACKED; + if (!after(scb->end_seq, tp->high_seq)) + flag |= FLAG_ORIG_SACK_ACKED; + } } if (sacked & TCPCB_SACKED_ACKED) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index d22f54482bab..982347eee104 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1516,7 +1516,7 @@ void tcp_v4_early_demux(struct sk_buff *skb) skb->sk = sk; skb->destructor = sock_edemux; if (sk->sk_state != TCP_TIME_WAIT) { - struct dst_entry *dst = sk->sk_rx_dst; + struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst); if (dst) dst = dst_check(dst, 0); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 9790f396ce5e..9f29453049dc 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2931,6 +2931,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, } #endif + /* Do not fool tcpdump (if any), clean our debris */ + skb->tstamp.tv64 = 0; return skb; } EXPORT_SYMBOL(tcp_make_synack); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 3f5aa9959076..0bf56e562f76 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -541,7 +541,8 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) { struct sk_buff *frag; struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); - struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; + struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? + inet6_sk(skb->sk) : NULL; struct ipv6hdr *tmp_hdr; struct frag_hdr *fh; unsigned int mtu, hlen, left, len; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 682866777d53..d375ce60463e 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1216,7 +1216,14 @@ static void ndisc_router_discovery(struct sk_buff *skb) if (rt) rt6_set_expires(rt, jiffies + (HZ * lifetime)); if (ra_msg->icmph.icmp6_hop_limit) { - in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit; + /* Only set hop_limit on the interface if it is higher than + * the current hop_limit. + */ + if (in6_dev->cnf.hop_limit < ra_msg->icmph.icmp6_hop_limit) { + in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit; + } else { + ND_PRINTK(2, warn, "RA: Got route advertisement with lower hop_limit than current\n"); + } if (rt) dst_metric_set(&rt->dst, RTAX_HOPLIMIT, ra_msg->icmph.icmp6_hop_limit); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 9c0b54e87b47..b89979312fbb 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1409,6 +1409,15 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, TCP_SKB_CB(skb)->sacked = 0; } +static void tcp_v6_restore_cb(struct sk_buff *skb) +{ + /* We need to move header back to the beginning if xfrm6_policy_check() + * and tcp_v6_fill_cb() are going to be called again. + */ + memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6, + sizeof(struct inet6_skb_parm)); +} + static int tcp_v6_rcv(struct sk_buff *skb) { const struct tcphdr *th; @@ -1541,6 +1550,7 @@ do_time_wait: inet_twsk_deschedule(tw, &tcp_death_row); inet_twsk_put(tw); sk = sk2; + tcp_v6_restore_cb(skb); goto process; } /* Fall through to ACK */ @@ -1549,6 +1559,7 @@ do_time_wait: tcp_v6_timewait_ack(sk, skb); break; case TCP_TW_RST: + tcp_v6_restore_cb(skb); goto no_tcp_socket; case TCP_TW_SUCCESS: ; @@ -1583,7 +1594,7 @@ static void tcp_v6_early_demux(struct sk_buff *skb) skb->sk = sk; skb->destructor = sock_edemux; if (sk->sk_state != TCP_TIME_WAIT) { - struct dst_entry *dst = sk->sk_rx_dst; + struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst); if (dst) dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie); diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 2034c6d9cb5a..296cc246f0a3 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -274,10 +274,8 @@ void ovs_vport_del(struct vport *vport) ASSERT_OVSL(); hlist_del_rcu(&vport->hash_node); - - vport->ops->destroy(vport); - module_put(vport->ops->owner); + vport->ops->destroy(vport); } /**