diff --git a/Makefile b/Makefile index 78bd926c8439..3cd40f1a8f75 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 2 -SUBLEVEL = 5 +SUBLEVEL = 6 EXTRAVERSION = NAME = Bobtail Squid diff --git a/arch/sh/boards/Kconfig b/arch/sh/boards/Kconfig index b9a37057b77a..cee24c308337 100644 --- a/arch/sh/boards/Kconfig +++ b/arch/sh/boards/Kconfig @@ -8,27 +8,19 @@ config SH_ALPHA_BOARD bool config SH_DEVICE_TREE - bool "Board Described by Device Tree" + bool select OF select OF_EARLY_FLATTREE select TIMER_OF select COMMON_CLK select GENERIC_CALIBRATE_DELAY - help - Select Board Described by Device Tree to build a kernel that - does not hard-code any board-specific knowledge but instead uses - a device tree blob provided by the boot-loader. You must enable - drivers for any hardware you want to use separately. At this - time, only boards based on the open-hardware J-Core processors - have sufficient driver coverage to use this option; do not - select it if you are using original SuperH hardware. config SH_JCORE_SOC bool "J-Core SoC" - depends on SH_DEVICE_TREE && (CPU_SH2 || CPU_J2) + select SH_DEVICE_TREE select CLKSRC_JCORE_PIT select JCORE_AIC - default y if CPU_J2 + depends on CPU_J2 help Select this option to include drivers core components of the J-Core SoC, including interrupt controllers and timers. diff --git a/drivers/bluetooth/hci_ath.c b/drivers/bluetooth/hci_ath.c index a55be205b91a..dbfe34664633 100644 --- a/drivers/bluetooth/hci_ath.c +++ b/drivers/bluetooth/hci_ath.c @@ -98,6 +98,9 @@ static int ath_open(struct hci_uart *hu) BT_DBG("hu %p", hu); + if (!hci_uart_has_flow_control(hu)) + return -EOPNOTSUPP; + ath = kzalloc(sizeof(*ath), GFP_KERNEL); if (!ath) return -ENOMEM; diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c index 8905ad2edde7..ae2624fce913 100644 --- a/drivers/bluetooth/hci_bcm.c +++ b/drivers/bluetooth/hci_bcm.c @@ -406,6 +406,9 @@ static int bcm_open(struct hci_uart *hu) bt_dev_dbg(hu->hdev, "hu %p", hu); + if (!hci_uart_has_flow_control(hu)) + return -EOPNOTSUPP; + bcm = kzalloc(sizeof(*bcm), GFP_KERNEL); if (!bcm) return -ENOMEM; diff --git a/drivers/bluetooth/hci_intel.c b/drivers/bluetooth/hci_intel.c index 207bae5e0d46..31f25153087d 100644 --- a/drivers/bluetooth/hci_intel.c +++ b/drivers/bluetooth/hci_intel.c @@ -391,6 +391,9 @@ static int intel_open(struct hci_uart *hu) BT_DBG("hu %p", hu); + if (!hci_uart_has_flow_control(hu)) + return -EOPNOTSUPP; + intel = kzalloc(sizeof(*intel), GFP_KERNEL); if (!intel) return -ENOMEM; diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c index c84f985f348d..c953f14656b5 100644 --- a/drivers/bluetooth/hci_ldisc.c +++ b/drivers/bluetooth/hci_ldisc.c @@ -284,6 +284,19 @@ static int hci_uart_send_frame(struct hci_dev *hdev, struct sk_buff *skb) return 0; } +/* Check the underlying device or tty has flow control support */ +bool hci_uart_has_flow_control(struct hci_uart *hu) +{ + /* serdev nodes check if the needed operations are present */ + if (hu->serdev) + return true; + + if (hu->tty->driver->ops->tiocmget && hu->tty->driver->ops->tiocmset) + return true; + + return false; +} + /* Flow control or un-flow control the device */ void hci_uart_set_flow_control(struct hci_uart *hu, bool enable) { diff --git a/drivers/bluetooth/hci_mrvl.c b/drivers/bluetooth/hci_mrvl.c index 50212ac629e3..49dcf198ffd8 100644 --- a/drivers/bluetooth/hci_mrvl.c +++ b/drivers/bluetooth/hci_mrvl.c @@ -52,6 +52,9 @@ static int mrvl_open(struct hci_uart *hu) BT_DBG("hu %p", hu); + if (!hci_uart_has_flow_control(hu)) + return -EOPNOTSUPP; + mrvl = kzalloc(sizeof(*mrvl), GFP_KERNEL); if (!mrvl) return -ENOMEM; diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 9d273cdde563..f41fb2c02e4f 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -458,6 +458,9 @@ static int qca_open(struct hci_uart *hu) BT_DBG("hu %p qca_open", hu); + if (!hci_uart_has_flow_control(hu)) + return -EOPNOTSUPP; + qca = kzalloc(sizeof(struct qca_data), GFP_KERNEL); if (!qca) return -ENOMEM; diff --git a/drivers/bluetooth/hci_uart.h b/drivers/bluetooth/hci_uart.h index d8cf005e3c5d..22c278b13ab9 100644 --- a/drivers/bluetooth/hci_uart.h +++ b/drivers/bluetooth/hci_uart.h @@ -103,6 +103,7 @@ int hci_uart_tx_wakeup(struct hci_uart *hu); int hci_uart_init_ready(struct hci_uart *hu); void hci_uart_init_work(struct work_struct *work); void hci_uart_set_baudrate(struct hci_uart *hu, unsigned int speed); +bool hci_uart_has_flow_control(struct hci_uart *hu); void hci_uart_set_flow_control(struct hci_uart *hu, bool enable); void hci_uart_set_speeds(struct hci_uart *hu, unsigned int init_speed, unsigned int oper_speed); diff --git a/drivers/isdn/hardware/mISDN/hfcsusb.c b/drivers/isdn/hardware/mISDN/hfcsusb.c index 4c99739b937e..0e224232f746 100644 --- a/drivers/isdn/hardware/mISDN/hfcsusb.c +++ b/drivers/isdn/hardware/mISDN/hfcsusb.c @@ -1955,6 +1955,9 @@ hfcsusb_probe(struct usb_interface *intf, const struct usb_device_id *id) /* get endpoint base */ idx = ((ep_addr & 0x7f) - 1) * 2; + if (idx > 15) + return -EIO; + if (ep_addr & 0x80) idx++; attr = ep->desc.bmAttributes; diff --git a/drivers/media/radio/radio-raremono.c b/drivers/media/radio/radio-raremono.c index 5e782b3c2fa9..bf1ee654df80 100644 --- a/drivers/media/radio/radio-raremono.c +++ b/drivers/media/radio/radio-raremono.c @@ -271,6 +271,14 @@ static int vidioc_g_frequency(struct file *file, void *priv, return 0; } +static void raremono_device_release(struct v4l2_device *v4l2_dev) +{ + struct raremono_device *radio = to_raremono_dev(v4l2_dev); + + kfree(radio->buffer); + kfree(radio); +} + /* File system interface */ static const struct v4l2_file_operations usb_raremono_fops = { .owner = THIS_MODULE, @@ -295,12 +303,14 @@ static int usb_raremono_probe(struct usb_interface *intf, struct raremono_device *radio; int retval = 0; - radio = devm_kzalloc(&intf->dev, sizeof(struct raremono_device), GFP_KERNEL); - if (radio) - radio->buffer = devm_kmalloc(&intf->dev, BUFFER_LENGTH, GFP_KERNEL); - - if (!radio || !radio->buffer) + radio = kzalloc(sizeof(*radio), GFP_KERNEL); + if (!radio) + return -ENOMEM; + radio->buffer = kmalloc(BUFFER_LENGTH, GFP_KERNEL); + if (!radio->buffer) { + kfree(radio); return -ENOMEM; + } radio->usbdev = interface_to_usbdev(intf); radio->intf = intf; @@ -324,7 +334,8 @@ static int usb_raremono_probe(struct usb_interface *intf, if (retval != 3 || (get_unaligned_be16(&radio->buffer[1]) & 0xfff) == 0x0242) { dev_info(&intf->dev, "this is not Thanko's Raremono.\n"); - return -ENODEV; + retval = -ENODEV; + goto free_mem; } dev_info(&intf->dev, "Thanko's Raremono connected: (%04X:%04X)\n", @@ -333,7 +344,7 @@ static int usb_raremono_probe(struct usb_interface *intf, retval = v4l2_device_register(&intf->dev, &radio->v4l2_dev); if (retval < 0) { dev_err(&intf->dev, "couldn't register v4l2_device\n"); - return retval; + goto free_mem; } mutex_init(&radio->lock); @@ -345,6 +356,7 @@ static int usb_raremono_probe(struct usb_interface *intf, radio->vdev.ioctl_ops = &usb_raremono_ioctl_ops; radio->vdev.lock = &radio->lock; radio->vdev.release = video_device_release_empty; + radio->v4l2_dev.release = raremono_device_release; usb_set_intfdata(intf, &radio->v4l2_dev); @@ -360,6 +372,10 @@ static int usb_raremono_probe(struct usb_interface *intf, } dev_err(&intf->dev, "could not register video device\n"); v4l2_device_unregister(&radio->v4l2_dev); + +free_mem: + kfree(radio->buffer); + kfree(radio); return retval; } diff --git a/drivers/media/usb/au0828/au0828-core.c b/drivers/media/usb/au0828/au0828-core.c index f746f6e2f686..a8a72d5fbd12 100644 --- a/drivers/media/usb/au0828/au0828-core.c +++ b/drivers/media/usb/au0828/au0828-core.c @@ -719,6 +719,12 @@ static int au0828_usb_probe(struct usb_interface *interface, /* Setup */ au0828_card_setup(dev); + /* + * Store the pointer to the au0828_dev so it can be accessed in + * au0828_usb_disconnect + */ + usb_set_intfdata(interface, dev); + /* Analog TV */ retval = au0828_analog_register(dev, interface); if (retval) { @@ -737,12 +743,6 @@ static int au0828_usb_probe(struct usb_interface *interface, /* Remote controller */ au0828_rc_register(dev); - /* - * Store the pointer to the au0828_dev so it can be accessed in - * au0828_usb_disconnect - */ - usb_set_intfdata(interface, dev); - pr_info("Registered device AU0828 [%s]\n", dev->board.name == NULL ? "Unset" : dev->board.name); diff --git a/drivers/media/usb/cpia2/cpia2_usb.c b/drivers/media/usb/cpia2/cpia2_usb.c index b2268981c963..17468f7d78ed 100644 --- a/drivers/media/usb/cpia2/cpia2_usb.c +++ b/drivers/media/usb/cpia2/cpia2_usb.c @@ -893,7 +893,6 @@ static void cpia2_usb_disconnect(struct usb_interface *intf) cpia2_unregister_camera(cam); v4l2_device_disconnect(&cam->v4l2_dev); mutex_unlock(&cam->v4l2_lock); - v4l2_device_put(&cam->v4l2_dev); if(cam->buffers) { DBG("Wakeup waiting processes\n"); @@ -902,6 +901,8 @@ static void cpia2_usb_disconnect(struct usb_interface *intf) wake_up_interruptible(&cam->wq_stream); } + v4l2_device_put(&cam->v4l2_dev); + LOG("CPiA2 camera disconnected.\n"); } diff --git a/drivers/media/usb/pvrusb2/pvrusb2-hdw.c b/drivers/media/usb/pvrusb2/pvrusb2-hdw.c index 70b5cb08d65b..bbf361ce0bd0 100644 --- a/drivers/media/usb/pvrusb2/pvrusb2-hdw.c +++ b/drivers/media/usb/pvrusb2/pvrusb2-hdw.c @@ -1670,7 +1670,7 @@ static int pvr2_decoder_enable(struct pvr2_hdw *hdw,int enablefl) } if (!hdw->flag_decoder_missed) { pvr2_trace(PVR2_TRACE_ERROR_LEGS, - "WARNING: No decoder present"); + "***WARNING*** No decoder present"); hdw->flag_decoder_missed = !0; trace_stbit("flag_decoder_missed", hdw->flag_decoder_missed); @@ -2356,7 +2356,7 @@ struct pvr2_hdw *pvr2_hdw_create(struct usb_interface *intf, if (hdw_desc->flag_is_experimental) { pvr2_trace(PVR2_TRACE_INFO, "**********"); pvr2_trace(PVR2_TRACE_INFO, - "WARNING: Support for this device (%s) is experimental.", + "***WARNING*** Support for this device (%s) is experimental.", hdw_desc->description); pvr2_trace(PVR2_TRACE_INFO, "Important functionality might not be entirely working."); diff --git a/drivers/media/usb/pvrusb2/pvrusb2-i2c-core.c b/drivers/media/usb/pvrusb2/pvrusb2-i2c-core.c index 68e323f8d9cf..275394bafe7d 100644 --- a/drivers/media/usb/pvrusb2/pvrusb2-i2c-core.c +++ b/drivers/media/usb/pvrusb2/pvrusb2-i2c-core.c @@ -333,11 +333,11 @@ static int i2c_hack_cx25840(struct pvr2_hdw *hdw, if ((ret != 0) || (*rdata == 0x04) || (*rdata == 0x0a)) { pvr2_trace(PVR2_TRACE_ERROR_LEGS, - "WARNING: Detected a wedged cx25840 chip; the device will not work."); + "***WARNING*** Detected a wedged cx25840 chip; the device will not work."); pvr2_trace(PVR2_TRACE_ERROR_LEGS, - "WARNING: Try power cycling the pvrusb2 device."); + "***WARNING*** Try power cycling the pvrusb2 device."); pvr2_trace(PVR2_TRACE_ERROR_LEGS, - "WARNING: Disabling further access to the device to prevent other foul-ups."); + "***WARNING*** Disabling further access to the device to prevent other foul-ups."); // This blocks all further communication with the part. hdw->i2c_func[0x44] = NULL; pvr2_hdw_render_useless(hdw); diff --git a/drivers/media/usb/pvrusb2/pvrusb2-std.c b/drivers/media/usb/pvrusb2/pvrusb2-std.c index 447279b4a545..e7ab41401577 100644 --- a/drivers/media/usb/pvrusb2/pvrusb2-std.c +++ b/drivers/media/usb/pvrusb2/pvrusb2-std.c @@ -343,7 +343,7 @@ struct v4l2_standard *pvr2_std_create_enum(unsigned int *countptr, bcnt = pvr2_std_id_to_str(buf,sizeof(buf),fmsk); pvr2_trace( PVR2_TRACE_ERROR_LEGS, - "WARNING: Failed to classify the following standard(s): %.*s", + "***WARNING*** Failed to classify the following standard(s): %.*s", bcnt,buf); } diff --git a/drivers/net/wireless/ath/ath10k/usb.c b/drivers/net/wireless/ath/ath10k/usb.c index 970cf69ac35f..a3ecf7d77949 100644 --- a/drivers/net/wireless/ath/ath10k/usb.c +++ b/drivers/net/wireless/ath/ath10k/usb.c @@ -1016,7 +1016,7 @@ static int ath10k_usb_probe(struct usb_interface *interface, } /* TODO: remove this once USB support is fully implemented */ - ath10k_warn(ar, "WARNING: ath10k USB support is incomplete, don't expect anything to work!\n"); + ath10k_warn(ar, "Warning: ath10k USB support is incomplete, don't expect anything to work!\n"); return 0; diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 499acf07d61a..e942b3e84068 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -12,11 +12,6 @@ module_param(multipath, bool, 0444); MODULE_PARM_DESC(multipath, "turn on native support for multiple controllers per subsystem"); -inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl) -{ - return multipath && ctrl->subsys && (ctrl->subsys->cmic & (1 << 3)); -} - /* * If multipathing is enabled we need to always use the subsystem instance * number for numbering our devices to avoid conflicts between subsystems that @@ -614,7 +609,8 @@ int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) { int error; - if (!nvme_ctrl_use_ana(ctrl)) + /* check if multipath is enabled and we have the capability */ + if (!multipath || !ctrl->subsys || !(ctrl->subsys->cmic & (1 << 3))) return 0; ctrl->anacap = id->anacap; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 55553d293a98..7391cd0a7739 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -472,7 +472,11 @@ extern const struct attribute_group *nvme_ns_id_attr_groups[]; extern const struct block_device_operations nvme_ns_head_ops; #ifdef CONFIG_NVME_MULTIPATH -bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl); +static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl) +{ + return ctrl->ana_log_buf != NULL; +} + void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns, struct nvme_ctrl *ctrl, int *flags); void nvme_failover_req(struct request *req); diff --git a/drivers/pps/pps.c b/drivers/pps/pps.c index 3a546ec10d90..22a65ad4e46e 100644 --- a/drivers/pps/pps.c +++ b/drivers/pps/pps.c @@ -152,6 +152,14 @@ static long pps_cdev_ioctl(struct file *file, pps->params.mode |= PPS_CANWAIT; pps->params.api_version = PPS_API_VERS; + /* + * Clear unused fields of pps_kparams to avoid leaking + * uninitialized data of the PPS_SETPARAMS caller via + * PPS_GETPARAMS + */ + pps->params.assert_off_tu.flags = 0; + pps->params.clear_off_tu.flags = 0; + spin_unlock_irq(&pps->lock); break; diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 0176241eaea7..7754d7679122 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1263,20 +1263,22 @@ static int send_cap_msg(struct cap_msg_args *arg) } /* - * Queue cap releases when an inode is dropped from our cache. Since - * inode is about to be destroyed, there is no need for i_ceph_lock. + * Queue cap releases when an inode is dropped from our cache. */ -void __ceph_remove_caps(struct inode *inode) +void __ceph_remove_caps(struct ceph_inode_info *ci) { - struct ceph_inode_info *ci = ceph_inode(inode); struct rb_node *p; + /* lock i_ceph_lock, because ceph_d_revalidate(..., LOOKUP_RCU) + * may call __ceph_caps_issued_mask() on a freeing inode. */ + spin_lock(&ci->i_ceph_lock); p = rb_first(&ci->i_caps); while (p) { struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node); p = rb_next(p); __ceph_remove_cap(cap, true); } + spin_unlock(&ci->i_ceph_lock); } /* diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 5b7d4881a4f8..3c7a32779574 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -536,7 +536,7 @@ void ceph_evict_inode(struct inode *inode) ceph_fscache_unregister_inode_cookie(ci); - __ceph_remove_caps(inode); + __ceph_remove_caps(ci); if (__ceph_has_any_quota(ci)) ceph_adjust_quota_realms_count(inode, false); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 048409fba1a8..edec39aa5ce2 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -1000,7 +1000,7 @@ extern void ceph_add_cap(struct inode *inode, unsigned cap, unsigned seq, u64 realmino, int flags, struct ceph_cap **new_cap); extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release); -extern void __ceph_remove_caps(struct inode* inode); +extern void __ceph_remove_caps(struct ceph_inode_info *ci); extern void ceph_put_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap); extern int ceph_is_any_caps(struct inode *inode); diff --git a/fs/exec.c b/fs/exec.c index 89a500bb897a..39902cc9eb6f 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1828,7 +1828,7 @@ static int __do_execve_file(int fd, struct filename *filename, membarrier_execve(current); rseq_execve(current); acct_update_integrals(current); - task_numa_free(current); + task_numa_free(current, false); free_bprm(bprm); kfree(pathbuf); if (filename) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index d7e4f0848e28..4d90f5bf0b0a 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -406,10 +406,10 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init) clp = nfs_match_client(cl_init); if (clp) { spin_unlock(&nn->nfs_client_lock); - if (IS_ERR(clp)) - return clp; if (new) new->rpc_ops->free_client(new); + if (IS_ERR(clp)) + return clp; return nfs_found_client(cl_init, clp); } if (new) { diff --git a/fs/proc/base.c b/fs/proc/base.c index 03517154fe0f..e781af70d10d 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -209,12 +209,53 @@ static int proc_root_link(struct dentry *dentry, struct path *path) return result; } +/* + * If the user used setproctitle(), we just get the string from + * user space at arg_start, and limit it to a maximum of one page. + */ +static ssize_t get_mm_proctitle(struct mm_struct *mm, char __user *buf, + size_t count, unsigned long pos, + unsigned long arg_start) +{ + char *page; + int ret, got; + + if (pos >= PAGE_SIZE) + return 0; + + page = (char *)__get_free_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + ret = 0; + got = access_remote_vm(mm, arg_start, page, PAGE_SIZE, FOLL_ANON); + if (got > 0) { + int len = strnlen(page, got); + + /* Include the NUL character if it was found */ + if (len < got) + len++; + + if (len > pos) { + len -= pos; + if (len > count) + len = count; + len -= copy_to_user(buf, page+pos, len); + if (!len) + len = -EFAULT; + ret = len; + } + } + free_page((unsigned long)page); + return ret; +} + static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf, size_t count, loff_t *ppos) { unsigned long arg_start, arg_end, env_start, env_end; unsigned long pos, len; - char *page; + char *page, c; /* Check if process spawned far enough to have cmdline. */ if (!mm->env_end) @@ -231,28 +272,42 @@ static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf, return 0; /* - * We have traditionally allowed the user to re-write - * the argument strings and overflow the end result - * into the environment section. But only do that if - * the environment area is contiguous to the arguments. + * We allow setproctitle() to overwrite the argument + * strings, and overflow past the original end. But + * only when it overflows into the environment area. */ - if (env_start != arg_end || env_start >= env_end) + if (env_start != arg_end || env_end < env_start) env_start = env_end = arg_end; - - /* .. and limit it to a maximum of one page of slop */ - if (env_end >= arg_end + PAGE_SIZE) - env_end = arg_end + PAGE_SIZE - 1; + len = env_end - arg_start; /* We're not going to care if "*ppos" has high bits set */ - pos = arg_start + *ppos; - - /* .. but we do check the result is in the proper range */ - if (pos < arg_start || pos >= env_end) + pos = *ppos; + if (pos >= len) return 0; + if (count > len - pos) + count = len - pos; + if (!count) + return 0; + + /* + * Magical special case: if the argv[] end byte is not + * zero, the user has overwritten it with setproctitle(3). + * + * Possible future enhancement: do this only once when + * pos is 0, and set a flag in the 'struct file'. + */ + if (access_remote_vm(mm, arg_end-1, &c, 1, FOLL_ANON) == 1 && c) + return get_mm_proctitle(mm, buf, count, pos, arg_start); - /* .. and we never go past env_end */ - if (env_end - pos < count) - count = env_end - pos; + /* + * For the non-setproctitle() case we limit things strictly + * to the [arg_start, arg_end[ range. + */ + pos += arg_start; + if (pos < arg_start || pos >= arg_end) + return 0; + if (count > arg_end - pos) + count = arg_end - pos; page = (char *)__get_free_page(GFP_KERNEL); if (!page) @@ -262,48 +317,11 @@ static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf, while (count) { int got; size_t size = min_t(size_t, PAGE_SIZE, count); - long offset; - /* - * Are we already starting past the official end? - * We always include the last byte that is *supposed* - * to be NUL - */ - offset = (pos >= arg_end) ? pos - arg_end + 1 : 0; - - got = access_remote_vm(mm, pos - offset, page, size + offset, FOLL_ANON); - if (got <= offset) + got = access_remote_vm(mm, pos, page, size, FOLL_ANON); + if (got <= 0) break; - got -= offset; - - /* Don't walk past a NUL character once you hit arg_end */ - if (pos + got >= arg_end) { - int n = 0; - - /* - * If we started before 'arg_end' but ended up - * at or after it, we start the NUL character - * check at arg_end-1 (where we expect the normal - * EOF to be). - * - * NOTE! This is smaller than 'got', because - * pos + got >= arg_end - */ - if (pos < arg_end) - n = arg_end - pos - 1; - - /* Cut off at first NUL after 'n' */ - got = n + strnlen(page+n, offset+got-n); - if (got < offset) - break; - got -= offset; - - /* Include the NUL if it existed */ - if (got < size) - got++; - } - - got -= copy_to_user(buf, page+offset, got); + got -= copy_to_user(buf, page, got); if (unlikely(!got)) { if (!len) len = -EFAULT; diff --git a/include/linux/sched.h b/include/linux/sched.h index 11837410690f..1157f6e245af 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1026,7 +1026,15 @@ struct task_struct { u64 last_sum_exec_runtime; struct callback_head numa_work; - struct numa_group *numa_group; + /* + * This pointer is only modified for current in syscall and + * pagefault context (and for tasks being destroyed), so it can be read + * from any of the following contexts: + * - RCU read-side critical section + * - current->numa_group from everywhere + * - task's runqueue locked, task not running + */ + struct numa_group __rcu *numa_group; /* * numa_faults is an array split into four regions: diff --git a/include/linux/sched/numa_balancing.h b/include/linux/sched/numa_balancing.h index e7dd04a84ba8..3988762efe15 100644 --- a/include/linux/sched/numa_balancing.h +++ b/include/linux/sched/numa_balancing.h @@ -19,7 +19,7 @@ extern void task_numa_fault(int last_node, int node, int pages, int flags); extern pid_t task_numa_group_id(struct task_struct *p); extern void set_numabalancing_state(bool enabled); -extern void task_numa_free(struct task_struct *p); +extern void task_numa_free(struct task_struct *p, bool final); extern bool should_numa_migrate_memory(struct task_struct *p, struct page *page, int src_nid, int dst_cpu); #else @@ -34,7 +34,7 @@ static inline pid_t task_numa_group_id(struct task_struct *p) static inline void set_numabalancing_state(bool enabled) { } -static inline void task_numa_free(struct task_struct *p) +static inline void task_numa_free(struct task_struct *p, bool final) { } static inline bool should_numa_migrate_memory(struct task_struct *p, diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index cad09858a5f2..546ebee39e2a 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -1928,8 +1928,8 @@ static int btf_array_resolve(struct btf_verifier_env *env, /* Check array->index_type */ index_type_id = array->index_type; index_type = btf_type_by_id(btf, index_type_id); - if (btf_type_is_resolve_source_only(index_type) || - btf_type_nosize_or_null(index_type)) { + if (btf_type_nosize_or_null(index_type) || + btf_type_is_resolve_source_only(index_type)) { btf_verifier_log_type(env, v->t, "Invalid index"); return -EINVAL; } @@ -1948,8 +1948,8 @@ static int btf_array_resolve(struct btf_verifier_env *env, /* Check array->type */ elem_type_id = array->type; elem_type = btf_type_by_id(btf, elem_type_id); - if (btf_type_is_resolve_source_only(elem_type) || - btf_type_nosize_or_null(elem_type)) { + if (btf_type_nosize_or_null(elem_type) || + btf_type_is_resolve_source_only(elem_type)) { btf_verifier_log_type(env, v->t, "Invalid elem"); return -EINVAL; @@ -2170,8 +2170,8 @@ static int btf_struct_resolve(struct btf_verifier_env *env, const struct btf_type *member_type = btf_type_by_id(env->btf, member_type_id); - if (btf_type_is_resolve_source_only(member_type) || - btf_type_nosize_or_null(member_type)) { + if (btf_type_nosize_or_null(member_type) || + btf_type_is_resolve_source_only(member_type)) { btf_verifier_log_member(env, v->t, member, "Invalid member"); return -EINVAL; diff --git a/kernel/fork.c b/kernel/fork.c index fe83343da24b..d3f006ed2f9d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -727,7 +727,7 @@ void __put_task_struct(struct task_struct *tsk) WARN_ON(tsk == current); cgroup_free(tsk); - task_numa_free(tsk); + task_numa_free(tsk, true); security_task_free(tsk); exit_creds(tsk); delayacct_tsk_free(tsk); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index f35930f5e528..9ecf1e4c624b 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1067,6 +1067,21 @@ struct numa_group { unsigned long faults[0]; }; +/* + * For functions that can be called in multiple contexts that permit reading + * ->numa_group (see struct task_struct for locking rules). + */ +static struct numa_group *deref_task_numa_group(struct task_struct *p) +{ + return rcu_dereference_check(p->numa_group, p == current || + (lockdep_is_held(&task_rq(p)->lock) && !READ_ONCE(p->on_cpu))); +} + +static struct numa_group *deref_curr_numa_group(struct task_struct *p) +{ + return rcu_dereference_protected(p->numa_group, p == current); +} + static inline unsigned long group_faults_priv(struct numa_group *ng); static inline unsigned long group_faults_shared(struct numa_group *ng); @@ -1110,10 +1125,12 @@ static unsigned int task_scan_start(struct task_struct *p) { unsigned long smin = task_scan_min(p); unsigned long period = smin; + struct numa_group *ng; /* Scale the maximum scan period with the amount of shared memory. */ - if (p->numa_group) { - struct numa_group *ng = p->numa_group; + rcu_read_lock(); + ng = rcu_dereference(p->numa_group); + if (ng) { unsigned long shared = group_faults_shared(ng); unsigned long private = group_faults_priv(ng); @@ -1121,6 +1138,7 @@ static unsigned int task_scan_start(struct task_struct *p) period *= shared + 1; period /= private + shared + 1; } + rcu_read_unlock(); return max(smin, period); } @@ -1129,13 +1147,14 @@ static unsigned int task_scan_max(struct task_struct *p) { unsigned long smin = task_scan_min(p); unsigned long smax; + struct numa_group *ng; /* Watch for min being lower than max due to floor calculations */ smax = sysctl_numa_balancing_scan_period_max / task_nr_scan_windows(p); /* Scale the maximum scan period with the amount of shared memory. */ - if (p->numa_group) { - struct numa_group *ng = p->numa_group; + ng = deref_curr_numa_group(p); + if (ng) { unsigned long shared = group_faults_shared(ng); unsigned long private = group_faults_priv(ng); unsigned long period = smax; @@ -1167,7 +1186,7 @@ void init_numa_balancing(unsigned long clone_flags, struct task_struct *p) p->numa_scan_period = sysctl_numa_balancing_scan_delay; p->numa_work.next = &p->numa_work; p->numa_faults = NULL; - p->numa_group = NULL; + RCU_INIT_POINTER(p->numa_group, NULL); p->last_task_numa_placement = 0; p->last_sum_exec_runtime = 0; @@ -1214,7 +1233,16 @@ static void account_numa_dequeue(struct rq *rq, struct task_struct *p) pid_t task_numa_group_id(struct task_struct *p) { - return p->numa_group ? p->numa_group->gid : 0; + struct numa_group *ng; + pid_t gid = 0; + + rcu_read_lock(); + ng = rcu_dereference(p->numa_group); + if (ng) + gid = ng->gid; + rcu_read_unlock(); + + return gid; } /* @@ -1239,11 +1267,13 @@ static inline unsigned long task_faults(struct task_struct *p, int nid) static inline unsigned long group_faults(struct task_struct *p, int nid) { - if (!p->numa_group) + struct numa_group *ng = deref_task_numa_group(p); + + if (!ng) return 0; - return p->numa_group->faults[task_faults_idx(NUMA_MEM, nid, 0)] + - p->numa_group->faults[task_faults_idx(NUMA_MEM, nid, 1)]; + return ng->faults[task_faults_idx(NUMA_MEM, nid, 0)] + + ng->faults[task_faults_idx(NUMA_MEM, nid, 1)]; } static inline unsigned long group_faults_cpu(struct numa_group *group, int nid) @@ -1381,12 +1411,13 @@ static inline unsigned long task_weight(struct task_struct *p, int nid, static inline unsigned long group_weight(struct task_struct *p, int nid, int dist) { + struct numa_group *ng = deref_task_numa_group(p); unsigned long faults, total_faults; - if (!p->numa_group) + if (!ng) return 0; - total_faults = p->numa_group->total_faults; + total_faults = ng->total_faults; if (!total_faults) return 0; @@ -1400,7 +1431,7 @@ static inline unsigned long group_weight(struct task_struct *p, int nid, bool should_numa_migrate_memory(struct task_struct *p, struct page * page, int src_nid, int dst_cpu) { - struct numa_group *ng = p->numa_group; + struct numa_group *ng = deref_curr_numa_group(p); int dst_nid = cpu_to_node(dst_cpu); int last_cpupid, this_cpupid; @@ -1583,13 +1614,14 @@ static bool load_too_imbalanced(long src_load, long dst_load, static void task_numa_compare(struct task_numa_env *env, long taskimp, long groupimp, bool maymove) { + struct numa_group *cur_ng, *p_ng = deref_curr_numa_group(env->p); struct rq *dst_rq = cpu_rq(env->dst_cpu); + long imp = p_ng ? groupimp : taskimp; struct task_struct *cur; long src_load, dst_load; - long load; - long imp = env->p->numa_group ? groupimp : taskimp; - long moveimp = imp; int dist = env->dist; + long moveimp = imp; + long load; if (READ_ONCE(dst_rq->numa_migrate_on)) return; @@ -1628,21 +1660,22 @@ static void task_numa_compare(struct task_numa_env *env, * If dst and source tasks are in the same NUMA group, or not * in any group then look only at task weights. */ - if (cur->numa_group == env->p->numa_group) { + cur_ng = rcu_dereference(cur->numa_group); + if (cur_ng == p_ng) { imp = taskimp + task_weight(cur, env->src_nid, dist) - task_weight(cur, env->dst_nid, dist); /* * Add some hysteresis to prevent swapping the * tasks within a group over tiny differences. */ - if (cur->numa_group) + if (cur_ng) imp -= imp / 16; } else { /* * Compare the group weights. If a task is all by itself * (not part of a group), use the task weight instead. */ - if (cur->numa_group && env->p->numa_group) + if (cur_ng && p_ng) imp += group_weight(cur, env->src_nid, dist) - group_weight(cur, env->dst_nid, dist); else @@ -1740,11 +1773,12 @@ static int task_numa_migrate(struct task_struct *p) .best_imp = 0, .best_cpu = -1, }; + unsigned long taskweight, groupweight; struct sched_domain *sd; + long taskimp, groupimp; + struct numa_group *ng; struct rq *best_rq; - unsigned long taskweight, groupweight; int nid, ret, dist; - long taskimp, groupimp; /* * Pick the lowest SD_NUMA domain, as that would have the smallest @@ -1790,7 +1824,8 @@ static int task_numa_migrate(struct task_struct *p) * multiple NUMA nodes; in order to better consolidate the group, * we need to check other locations. */ - if (env.best_cpu == -1 || (p->numa_group && p->numa_group->active_nodes > 1)) { + ng = deref_curr_numa_group(p); + if (env.best_cpu == -1 || (ng && ng->active_nodes > 1)) { for_each_online_node(nid) { if (nid == env.src_nid || nid == p->numa_preferred_nid) continue; @@ -1823,7 +1858,7 @@ static int task_numa_migrate(struct task_struct *p) * A task that migrated to a second choice node will be better off * trying for a better one later. Do not set the preferred node here. */ - if (p->numa_group) { + if (ng) { if (env.best_cpu == -1) nid = env.src_nid; else @@ -2118,6 +2153,7 @@ static void task_numa_placement(struct task_struct *p) unsigned long total_faults; u64 runtime, period; spinlock_t *group_lock = NULL; + struct numa_group *ng; /* * The p->mm->numa_scan_seq field gets updated without @@ -2135,8 +2171,9 @@ static void task_numa_placement(struct task_struct *p) runtime = numa_get_avg_runtime(p, &period); /* If the task is part of a group prevent parallel updates to group stats */ - if (p->numa_group) { - group_lock = &p->numa_group->lock; + ng = deref_curr_numa_group(p); + if (ng) { + group_lock = &ng->lock; spin_lock_irq(group_lock); } @@ -2177,7 +2214,7 @@ static void task_numa_placement(struct task_struct *p) p->numa_faults[cpu_idx] += f_diff; faults += p->numa_faults[mem_idx]; p->total_numa_faults += diff; - if (p->numa_group) { + if (ng) { /* * safe because we can only change our own group * @@ -2185,14 +2222,14 @@ static void task_numa_placement(struct task_struct *p) * nid and priv in a specific region because it * is at the beginning of the numa_faults array. */ - p->numa_group->faults[mem_idx] += diff; - p->numa_group->faults_cpu[mem_idx] += f_diff; - p->numa_group->total_faults += diff; - group_faults += p->numa_group->faults[mem_idx]; + ng->faults[mem_idx] += diff; + ng->faults_cpu[mem_idx] += f_diff; + ng->total_faults += diff; + group_faults += ng->faults[mem_idx]; } } - if (!p->numa_group) { + if (!ng) { if (faults > max_faults) { max_faults = faults; max_nid = nid; @@ -2203,8 +2240,8 @@ static void task_numa_placement(struct task_struct *p) } } - if (p->numa_group) { - numa_group_count_active_nodes(p->numa_group); + if (ng) { + numa_group_count_active_nodes(ng); spin_unlock_irq(group_lock); max_nid = preferred_group_nid(p, max_nid); } @@ -2238,7 +2275,7 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags, int cpu = cpupid_to_cpu(cpupid); int i; - if (unlikely(!p->numa_group)) { + if (unlikely(!deref_curr_numa_group(p))) { unsigned int size = sizeof(struct numa_group) + 4*nr_node_ids*sizeof(unsigned long); @@ -2274,7 +2311,7 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags, if (!grp) goto no_join; - my_grp = p->numa_group; + my_grp = deref_curr_numa_group(p); if (grp == my_grp) goto no_join; @@ -2336,13 +2373,24 @@ no_join: return; } -void task_numa_free(struct task_struct *p) +/* + * Get rid of NUMA staticstics associated with a task (either current or dead). + * If @final is set, the task is dead and has reached refcount zero, so we can + * safely free all relevant data structures. Otherwise, there might be + * concurrent reads from places like load balancing and procfs, and we should + * reset the data back to default state without freeing ->numa_faults. + */ +void task_numa_free(struct task_struct *p, bool final) { - struct numa_group *grp = p->numa_group; - void *numa_faults = p->numa_faults; + /* safe: p either is current or is being freed by current */ + struct numa_group *grp = rcu_dereference_raw(p->numa_group); + unsigned long *numa_faults = p->numa_faults; unsigned long flags; int i; + if (!numa_faults) + return; + if (grp) { spin_lock_irqsave(&grp->lock, flags); for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) @@ -2355,8 +2403,14 @@ void task_numa_free(struct task_struct *p) put_numa_group(grp); } - p->numa_faults = NULL; - kfree(numa_faults); + if (final) { + p->numa_faults = NULL; + kfree(numa_faults); + } else { + p->total_numa_faults = 0; + for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) + numa_faults[i] = 0; + } } /* @@ -2409,7 +2463,7 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags) * actively using should be counted as local. This allows the * scan rate to slow down when a workload has settled down. */ - ng = p->numa_group; + ng = deref_curr_numa_group(p); if (!priv && !local && ng && ng->active_nodes > 1 && numa_is_active_node(cpu_node, ng) && numa_is_active_node(mem_node, ng)) @@ -10708,18 +10762,22 @@ void show_numa_stats(struct task_struct *p, struct seq_file *m) { int node; unsigned long tsf = 0, tpf = 0, gsf = 0, gpf = 0; + struct numa_group *ng; + rcu_read_lock(); + ng = rcu_dereference(p->numa_group); for_each_online_node(node) { if (p->numa_faults) { tsf = p->numa_faults[task_faults_idx(NUMA_MEM, node, 0)]; tpf = p->numa_faults[task_faults_idx(NUMA_MEM, node, 1)]; } - if (p->numa_group) { - gsf = p->numa_group->faults[task_faults_idx(NUMA_MEM, node, 0)], - gpf = p->numa_group->faults[task_faults_idx(NUMA_MEM, node, 1)]; + if (ng) { + gsf = ng->faults[task_faults_idx(NUMA_MEM, node, 0)], + gpf = ng->faults[task_faults_idx(NUMA_MEM, node, 1)]; } print_numa_stats(m, node, tsf, tpf, gsf, gpf); } + rcu_read_unlock(); } #endif /* CONFIG_NUMA_BALANCING */ #endif /* CONFIG_SCHED_DEBUG */ diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 169112f8aa1e..ab47bf3ab66e 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -274,7 +274,8 @@ EXPORT_SYMBOL_GPL(vsock_insert_connected); void vsock_remove_bound(struct vsock_sock *vsk) { spin_lock_bh(&vsock_table_lock); - __vsock_remove_bound(vsk); + if (__vsock_in_bound_table(vsk)) + __vsock_remove_bound(vsk); spin_unlock_bh(&vsock_table_lock); } EXPORT_SYMBOL_GPL(vsock_remove_bound); @@ -282,7 +283,8 @@ EXPORT_SYMBOL_GPL(vsock_remove_bound); void vsock_remove_connected(struct vsock_sock *vsk) { spin_lock_bh(&vsock_table_lock); - __vsock_remove_connected(vsk); + if (__vsock_in_connected_table(vsk)) + __vsock_remove_connected(vsk); spin_unlock_bh(&vsock_table_lock); } EXPORT_SYMBOL_GPL(vsock_remove_connected); @@ -318,35 +320,10 @@ struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, } EXPORT_SYMBOL_GPL(vsock_find_connected_socket); -static bool vsock_in_bound_table(struct vsock_sock *vsk) -{ - bool ret; - - spin_lock_bh(&vsock_table_lock); - ret = __vsock_in_bound_table(vsk); - spin_unlock_bh(&vsock_table_lock); - - return ret; -} - -static bool vsock_in_connected_table(struct vsock_sock *vsk) -{ - bool ret; - - spin_lock_bh(&vsock_table_lock); - ret = __vsock_in_connected_table(vsk); - spin_unlock_bh(&vsock_table_lock); - - return ret; -} - void vsock_remove_sock(struct vsock_sock *vsk) { - if (vsock_in_bound_table(vsk)) - vsock_remove_bound(vsk); - - if (vsock_in_connected_table(vsk)) - vsock_remove_connected(vsk); + vsock_remove_bound(vsk); + vsock_remove_connected(vsk); } EXPORT_SYMBOL_GPL(vsock_remove_sock); @@ -477,8 +454,7 @@ static void vsock_pending_work(struct work_struct *work) * incoming packets can't find this socket, and to reduce the reference * count. */ - if (vsock_in_connected_table(vsk)) - vsock_remove_connected(vsk); + vsock_remove_connected(vsk); sk->sk_state = TCP_CLOSE; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index b1694d5d15d3..82be7780bbe8 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1280,13 +1280,17 @@ static void xfrm_hash_rebuild(struct work_struct *work) hlist_for_each_entry_safe(policy, n, &net->xfrm.policy_inexact[dir], - bydst_inexact_list) + bydst_inexact_list) { + hlist_del_rcu(&policy->bydst); hlist_del_init(&policy->bydst_inexact_list); + } hmask = net->xfrm.policy_bydst[dir].hmask; odst = net->xfrm.policy_bydst[dir].table; - for (i = hmask; i >= 0; i--) - INIT_HLIST_HEAD(odst + i); + for (i = hmask; i >= 0; i--) { + hlist_for_each_entry_safe(policy, n, odst + i, bydst) + hlist_del_rcu(&policy->bydst); + } if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) { /* dir out => dst = remote, src = local */ net->xfrm.policy_bydst[dir].dbits4 = rbits4; @@ -1315,8 +1319,6 @@ static void xfrm_hash_rebuild(struct work_struct *work) chain = policy_hash_bysel(net, &policy->selector, policy->family, dir); - hlist_del_rcu(&policy->bydst); - if (!chain) { void *p = xfrm_policy_inexact_insert(policy, dir, 0); diff --git a/tools/testing/selftests/net/xfrm_policy.sh b/tools/testing/selftests/net/xfrm_policy.sh index 71d7fdc513c1..5445943bf07f 100755 --- a/tools/testing/selftests/net/xfrm_policy.sh +++ b/tools/testing/selftests/net/xfrm_policy.sh @@ -257,6 +257,29 @@ check_exceptions() return $lret } +check_hthresh_repeat() +{ + local log=$1 + i=0 + + for i in $(seq 1 10);do + ip -net ns1 xfrm policy update src e000:0001::0000 dst ff01::0014:0000:0001 dir in tmpl src :: dst :: proto esp mode tunnel priority 100 action allow || break + ip -net ns1 xfrm policy set hthresh6 0 28 || break + + ip -net ns1 xfrm policy update src e000:0001::0000 dst ff01::01 dir in tmpl src :: dst :: proto esp mode tunnel priority 100 action allow || break + ip -net ns1 xfrm policy set hthresh6 0 28 || break + done + + if [ $i -ne 10 ] ;then + echo "FAIL: $log" 1>&2 + ret=1 + return 1 + fi + + echo "PASS: $log" + return 0 +} + #check for needed privileges if [ "$(id -u)" -ne 0 ];then echo "SKIP: Need root privileges" @@ -404,7 +427,9 @@ for n in ns3 ns4;do ip -net $n xfrm policy set hthresh4 32 32 hthresh6 128 128 sleep $((RANDOM%5)) done -check_exceptions "exceptions and block policies after hresh change to normal" +check_exceptions "exceptions and block policies after htresh change to normal" + +check_hthresh_repeat "policies with repeated htresh change" for i in 1 2 3 4;do ip netns del ns$i;done