diff --git a/Makefile b/Makefile index 3f84029f2b31..8f73b417dc1a 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 3 PATCHLEVEL = 18 -SUBLEVEL = 1 +SUBLEVEL = 2 EXTRAVERSION = NAME = Diseased Newt diff --git a/arch/arm/boot/dts/armada-370-db.dts b/arch/arm/boot/dts/armada-370-db.dts index a495e5821ab8..d5051358fb1b 100644 --- a/arch/arm/boot/dts/armada-370-db.dts +++ b/arch/arm/boot/dts/armada-370-db.dts @@ -102,30 +102,6 @@ broken-cd; }; - pinctrl { - /* - * These pins might be muxed as I2S by - * the bootloader, but it conflicts - * with the real I2S pins that are - * muxed using i2s_pins. We must mux - * those pins to a function other than - * I2S. - */ - pinctrl-0 = <&hog_pins1 &hog_pins2>; - pinctrl-names = "default"; - - hog_pins1: hog-pins1 { - marvell,pins = "mpp6", "mpp8", "mpp10", - "mpp12", "mpp13"; - marvell,function = "gpio"; - }; - - hog_pins2: hog-pins2 { - marvell,pins = "mpp5", "mpp7", "mpp9"; - marvell,function = "gpo"; - }; - }; - usb@50000 { status = "okay"; }; diff --git a/arch/arm/boot/dts/armada-370.dtsi b/arch/arm/boot/dts/armada-370.dtsi index 6b3c23b1e138..7513410f7b89 100644 --- a/arch/arm/boot/dts/armada-370.dtsi +++ b/arch/arm/boot/dts/armada-370.dtsi @@ -106,11 +106,6 @@ reg = <0x11100 0x20>; }; - system-controller@18200 { - compatible = "marvell,armada-370-xp-system-controller"; - reg = <0x18200 0x100>; - }; - pinctrl { compatible = "marvell,mv88f6710-pinctrl"; reg = <0x18000 0x38>; @@ -205,6 +200,11 @@ interrupts = <91>; }; + system-controller@18200 { + compatible = "marvell,armada-370-xp-system-controller"; + reg = <0x18200 0x100>; + }; + gateclk: clock-gating-control@18220 { compatible = "marvell,armada-370-gating-clock"; reg = <0x18220 0x4>; diff --git a/arch/arm/include/asm/arch_timer.h b/arch/arm/include/asm/arch_timer.h index 92793ba69c40..d4ebf5679f1f 100644 --- a/arch/arm/include/asm/arch_timer.h +++ b/arch/arm/include/asm/arch_timer.h @@ -78,6 +78,15 @@ static inline u32 arch_timer_get_cntfrq(void) return val; } +static inline u64 arch_counter_get_cntpct(void) +{ + u64 cval; + + isb(); + asm volatile("mrrc p15, 0, %Q0, %R0, c14" : "=r" (cval)); + return cval; +} + static inline u64 arch_counter_get_cntvct(void) { u64 cval; diff --git a/arch/arm/mach-mvebu/coherency.c b/arch/arm/mach-mvebu/coherency.c index 044b51185fcc..c31f4c00b1fc 100644 --- a/arch/arm/mach-mvebu/coherency.c +++ b/arch/arm/mach-mvebu/coherency.c @@ -361,25 +361,41 @@ static int coherency_type(void) { struct device_node *np; const struct of_device_id *match; + int type; - np = of_find_matching_node_and_match(NULL, of_coherency_table, &match); - if (np) { - int type = (int) match->data; + /* + * The coherency fabric is needed: + * - For coherency between processors on Armada XP, so only + * when SMP is enabled. + * - For coherency between the processor and I/O devices, but + * this coherency requires many pre-requisites (write + * allocate cache policy, shareable pages, SMP bit set) that + * are only meant in SMP situations. + * + * Note that this means that on Armada 370, there is currently + * no way to use hardware I/O coherency, because even when + * CONFIG_SMP is enabled, is_smp() returns false due to the + * Armada 370 being a single-core processor. To lift this + * limitation, we would have to find a way to make the cache + * policy set to write-allocate (on all Armada SoCs), and to + * set the shareable attribute in page tables (on all Armada + * SoCs except the Armada 370). Unfortunately, such decisions + * are taken very early in the kernel boot process, at a point + * where we don't know yet on which SoC we are running. - /* Armada 370/XP coherency works in both UP and SMP */ - if (type == COHERENCY_FABRIC_TYPE_ARMADA_370_XP) - return type; + */ + if (!is_smp()) + return COHERENCY_FABRIC_TYPE_NONE; - /* Armada 375 coherency works only on SMP */ - else if (type == COHERENCY_FABRIC_TYPE_ARMADA_375 && is_smp()) - return type; + np = of_find_matching_node_and_match(NULL, of_coherency_table, &match); + if (!np) + return COHERENCY_FABRIC_TYPE_NONE; - /* Armada 380 coherency works only on SMP */ - else if (type == COHERENCY_FABRIC_TYPE_ARMADA_380 && is_smp()) - return type; - } + type = (int) match->data; + + of_node_put(np); - return COHERENCY_FABRIC_TYPE_NONE; + return type; } int coherency_available(void) diff --git a/arch/arm/mach-mvebu/coherency_ll.S b/arch/arm/mach-mvebu/coherency_ll.S index f5d881b5d0f7..8b2fbc8b6bc6 100644 --- a/arch/arm/mach-mvebu/coherency_ll.S +++ b/arch/arm/mach-mvebu/coherency_ll.S @@ -24,7 +24,10 @@ #include .text -/* Returns the coherency base address in r1 (r0 is untouched) */ +/* + * Returns the coherency base address in r1 (r0 is untouched), or 0 if + * the coherency fabric is not enabled. + */ ENTRY(ll_get_coherency_base) mrc p15, 0, r1, c1, c0, 0 tst r1, #CR_M @ Check MMU bit enabled @@ -32,8 +35,13 @@ ENTRY(ll_get_coherency_base) /* * MMU is disabled, use the physical address of the coherency - * base address. + * base address. However, if the coherency fabric isn't mapped + * (i.e its virtual address is zero), it means coherency is + * not enabled, so we return 0. */ + ldr r1, =coherency_base + cmp r1, #0 + beq 2f adr r1, 3f ldr r3, [r1] ldr r1, [r1, r3] @@ -85,6 +93,9 @@ ENTRY(ll_add_cpu_to_smp_group) */ mov r0, lr bl ll_get_coherency_base + /* Bail out if the coherency is not enabled */ + cmp r1, #0 + reteq r0 bl ll_get_coherency_cpumask mov lr, r0 add r0, r1, #ARMADA_XP_CFB_CFG_REG_OFFSET @@ -107,6 +118,9 @@ ENTRY(ll_enable_coherency) */ mov r0, lr bl ll_get_coherency_base + /* Bail out if the coherency is not enabled */ + cmp r1, #0 + reteq r0 bl ll_get_coherency_cpumask mov lr, r0 add r0, r1, #ARMADA_XP_CFB_CTL_REG_OFFSET @@ -131,6 +145,9 @@ ENTRY(ll_disable_coherency) */ mov r0, lr bl ll_get_coherency_base + /* Bail out if the coherency is not enabled */ + cmp r1, #0 + reteq r0 bl ll_get_coherency_cpumask mov lr, r0 add r0, r1, #ARMADA_XP_CFB_CTL_REG_OFFSET diff --git a/arch/arm/mach-tegra/reset-handler.S b/arch/arm/mach-tegra/reset-handler.S index 7b2baab0f0bd..71be4af5e975 100644 --- a/arch/arm/mach-tegra/reset-handler.S +++ b/arch/arm/mach-tegra/reset-handler.S @@ -51,6 +51,7 @@ ENTRY(tegra_resume) THUMB( it ne ) bne cpu_resume @ no + tegra_get_soc_id TEGRA_APB_MISC_BASE, r6 /* Are we on Tegra20? */ cmp r6, #TEGRA20 beq 1f @ Yes diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h index f19097134b02..b1fa4e614718 100644 --- a/arch/arm64/include/asm/arch_timer.h +++ b/arch/arm64/include/asm/arch_timer.h @@ -104,6 +104,15 @@ static inline void arch_timer_set_cntkctl(u32 cntkctl) asm volatile("msr cntkctl_el1, %0" : : "r" (cntkctl)); } +static inline u64 arch_counter_get_cntpct(void) +{ + /* + * AArch64 kernel and user space mandate the use of CNTVCT. + */ + BUG(); + return 0; +} + static inline u64 arch_counter_get_cntvct(void) { u64 cval; diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 024c46183c3c..0ad735166d9f 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -30,6 +30,7 @@ #define COMPAT_HWCAP_IDIVA (1 << 17) #define COMPAT_HWCAP_IDIVT (1 << 18) #define COMPAT_HWCAP_IDIV (COMPAT_HWCAP_IDIVA|COMPAT_HWCAP_IDIVT) +#define COMPAT_HWCAP_LPAE (1 << 20) #define COMPAT_HWCAP_EVTSTRM (1 << 21) #define COMPAT_HWCAP2_AES (1 << 0) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 2437196cc5d4..f9620154bfb0 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -72,7 +72,8 @@ EXPORT_SYMBOL_GPL(elf_hwcap); COMPAT_HWCAP_FAST_MULT|COMPAT_HWCAP_EDSP|\ COMPAT_HWCAP_TLS|COMPAT_HWCAP_VFP|\ COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\ - COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV) + COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV|\ + COMPAT_HWCAP_LPAE) unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT; unsigned int compat_elf_hwcap2 __read_mostly; #endif diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 41f1e3e2ea24..edba042b2325 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -60,7 +60,7 @@ struct jit_ctx { const struct bpf_prog *prog; int idx; int tmp_used; - int body_offset; + int epilogue_offset; int *offset; u32 *image; }; @@ -130,8 +130,8 @@ static void jit_fill_hole(void *area, unsigned int size) static inline int epilogue_offset(const struct jit_ctx *ctx) { - int to = ctx->offset[ctx->prog->len - 1]; - int from = ctx->idx - ctx->body_offset; + int to = ctx->epilogue_offset; + int from = ctx->idx; return to - from; } @@ -463,6 +463,8 @@ emit_cond_jmp: } /* function return */ case BPF_JMP | BPF_EXIT: + /* Optimization: when last instruction is EXIT, + simply fallthrough to epilogue. */ if (i == ctx->prog->len - 1) break; jmp_offset = epilogue_offset(ctx); @@ -685,11 +687,13 @@ void bpf_int_jit_compile(struct bpf_prog *prog) /* 1. Initial fake pass to compute ctx->idx. */ - /* Fake pass to fill in ctx->offset. */ + /* Fake pass to fill in ctx->offset and ctx->tmp_used. */ if (build_body(&ctx)) goto out; build_prologue(&ctx); + + ctx.epilogue_offset = ctx.idx; build_epilogue(&ctx); /* Now we know the actual image size. */ @@ -706,7 +710,6 @@ void bpf_int_jit_compile(struct bpf_prog *prog) build_prologue(&ctx); - ctx.body_offset = ctx.idx; if (build_body(&ctx)) { bpf_jit_binary_free(header); goto out; diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index ca38139423ae..437e61159279 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -249,7 +249,7 @@ COMPAT_SYSCALL_DEFINE2(s390_setgroups16, int, gidsetsize, u16 __user *, grouplis struct group_info *group_info; int retval; - if (!capable(CAP_SETGID)) + if (!may_setgroups()) return -EPERM; if ((unsigned)gidsetsize > NGROUPS_MAX) return -EINVAL; diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 64dc362506b7..201b520521ed 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -78,6 +78,7 @@ static inline void __exit exit_amd_microcode(void) {} extern void __init load_ucode_bsp(void); extern void load_ucode_ap(void); extern int __init save_microcode_in_initrd(void); +void reload_early_microcode(void); #else static inline void __init load_ucode_bsp(void) {} static inline void load_ucode_ap(void) {} @@ -85,6 +86,7 @@ static inline int __init save_microcode_in_initrd(void) { return 0; } +static inline void reload_early_microcode(void) {} #endif #endif /* _ASM_X86_MICROCODE_H */ diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h index b7b10b82d3e5..af935397e053 100644 --- a/arch/x86/include/asm/microcode_amd.h +++ b/arch/x86/include/asm/microcode_amd.h @@ -59,7 +59,7 @@ static inline u16 find_equiv_id(struct equiv_cpu_entry *equiv_cpu_table, extern int __apply_microcode_amd(struct microcode_amd *mc_amd); extern int apply_microcode_amd(int cpu); -extern enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size); +extern enum ucode_state load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size); #define PATCH_MAX_SIZE PAGE_SIZE extern u8 amd_ucode_patch[PATCH_MAX_SIZE]; @@ -68,10 +68,12 @@ extern u8 amd_ucode_patch[PATCH_MAX_SIZE]; extern void __init load_ucode_amd_bsp(void); extern void load_ucode_amd_ap(void); extern int __init save_microcode_in_initrd_amd(void); +void reload_ucode_amd(void); #else static inline void __init load_ucode_amd_bsp(void) {} static inline void load_ucode_amd_ap(void) {} static inline int __init save_microcode_in_initrd_amd(void) { return -EINVAL; } +void reload_ucode_amd(void) {} #endif #endif /* _ASM_X86_MICROCODE_AMD_H */ diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h index bbe296e0bce1..dd4c20043ce7 100644 --- a/arch/x86/include/asm/microcode_intel.h +++ b/arch/x86/include/asm/microcode_intel.h @@ -68,11 +68,13 @@ extern void __init load_ucode_intel_bsp(void); extern void load_ucode_intel_ap(void); extern void show_ucode_info_early(void); extern int __init save_microcode_in_initrd_intel(void); +void reload_ucode_intel(void); #else static inline __init void load_ucode_intel_bsp(void) {} static inline void load_ucode_intel_ap(void) {} static inline void show_ucode_info_early(void) {} static inline int __init save_microcode_in_initrd_intel(void) { return -EINVAL; } +static inline void reload_ucode_intel(void) {} #endif #if defined(CONFIG_MICROCODE_INTEL_EARLY) && defined(CONFIG_HOTPLUG_CPU) diff --git a/arch/x86/include/uapi/asm/ldt.h b/arch/x86/include/uapi/asm/ldt.h index 46727eb37bfe..6e1aaf73852a 100644 --- a/arch/x86/include/uapi/asm/ldt.h +++ b/arch/x86/include/uapi/asm/ldt.h @@ -28,6 +28,13 @@ struct user_desc { unsigned int seg_not_present:1; unsigned int useable:1; #ifdef __x86_64__ + /* + * Because this bit is not present in 32-bit user code, user + * programs can pass uninitialized values here. Therefore, in + * any context in which a user_desc comes from a 32-bit program, + * the kernel must act as though lm == 0, regardless of the + * actual value. + */ unsigned int lm:1; #endif }; diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 8fffd845e22b..bfbbe6195e2d 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -376,7 +376,7 @@ static enum ucode_state __load_microcode_amd(u8 family, const u8 *data, return UCODE_OK; } -enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size) +enum ucode_state load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size) { enum ucode_state ret; @@ -390,8 +390,8 @@ enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size) #if defined(CONFIG_MICROCODE_AMD_EARLY) && defined(CONFIG_X86_32) /* save BSP's matching patch for early load */ - if (cpu_data(smp_processor_id()).cpu_index == boot_cpu_data.cpu_index) { - struct ucode_patch *p = find_patch(smp_processor_id()); + if (cpu_data(cpu).cpu_index == boot_cpu_data.cpu_index) { + struct ucode_patch *p = find_patch(cpu); if (p) { memset(amd_ucode_patch, 0, PATCH_MAX_SIZE); memcpy(amd_ucode_patch, p->data, min_t(u32, ksize(p->data), @@ -444,7 +444,7 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device, goto fw_release; } - ret = load_microcode_amd(c->x86, fw->data, fw->size); + ret = load_microcode_amd(cpu, c->x86, fw->data, fw->size); fw_release: release_firmware(fw); diff --git a/arch/x86/kernel/cpu/microcode/amd_early.c b/arch/x86/kernel/cpu/microcode/amd_early.c index 06674473b0e6..737737edbd1e 100644 --- a/arch/x86/kernel/cpu/microcode/amd_early.c +++ b/arch/x86/kernel/cpu/microcode/amd_early.c @@ -389,7 +389,7 @@ int __init save_microcode_in_initrd_amd(void) eax = cpuid_eax(0x00000001); eax = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff); - ret = load_microcode_amd(eax, container, container_size); + ret = load_microcode_amd(smp_processor_id(), eax, container, container_size); if (ret != UCODE_OK) retval = -EINVAL; @@ -402,3 +402,21 @@ int __init save_microcode_in_initrd_amd(void) return retval; } + +void reload_ucode_amd(void) +{ + struct microcode_amd *mc; + u32 rev, eax; + + rdmsr(MSR_AMD64_PATCH_LEVEL, rev, eax); + + mc = (struct microcode_amd *)amd_ucode_patch; + + if (mc && rev < mc->hdr.patch_id) { + if (!__apply_microcode_amd(mc)) { + ucode_new_rev = mc->hdr.patch_id; + pr_info("microcode: reload patch_level=0x%08x\n", + ucode_new_rev); + } + } +} diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 08fe6e8a726e..15c29096136b 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -465,16 +465,8 @@ static void mc_bp_resume(void) if (uci->valid && uci->mc) microcode_ops->apply_microcode(cpu); -#ifdef CONFIG_X86_64 else if (!uci->mc) - /* - * We might resume and not have applied late microcode but still - * have a newer patch stashed from the early loader. We don't - * have it in uci->mc so we have to load it the same way we're - * applying patches early on the APs. - */ - load_ucode_ap(); -#endif + reload_early_microcode(); } static struct syscore_ops mc_syscore_ops = { @@ -559,7 +551,7 @@ static int __init microcode_init(void) struct cpuinfo_x86 *c = &cpu_data(0); int error; - if (dis_ucode_ldr) + if (paravirt_enabled() || dis_ucode_ldr) return 0; if (c->x86_vendor == X86_VENDOR_INTEL) diff --git a/arch/x86/kernel/cpu/microcode/core_early.c b/arch/x86/kernel/cpu/microcode/core_early.c index 2c017f242a78..d45df4bd16ab 100644 --- a/arch/x86/kernel/cpu/microcode/core_early.c +++ b/arch/x86/kernel/cpu/microcode/core_early.c @@ -176,3 +176,24 @@ int __init save_microcode_in_initrd(void) return 0; } + +void reload_early_microcode(void) +{ + int vendor, x86; + + vendor = x86_vendor(); + x86 = x86_family(); + + switch (vendor) { + case X86_VENDOR_INTEL: + if (x86 >= 6) + reload_ucode_intel(); + break; + case X86_VENDOR_AMD: + if (x86 >= 0x10) + reload_ucode_amd(); + break; + default: + break; + } +} diff --git a/arch/x86/kernel/cpu/microcode/intel_early.c b/arch/x86/kernel/cpu/microcode/intel_early.c index b88343f7a3b3..ec9df6f9cd47 100644 --- a/arch/x86/kernel/cpu/microcode/intel_early.c +++ b/arch/x86/kernel/cpu/microcode/intel_early.c @@ -650,8 +650,7 @@ static inline void print_ucode(struct ucode_cpu_info *uci) } #endif -static int apply_microcode_early(struct mc_saved_data *mc_saved_data, - struct ucode_cpu_info *uci) +static int apply_microcode_early(struct ucode_cpu_info *uci, bool early) { struct microcode_intel *mc_intel; unsigned int val[2]; @@ -680,7 +679,10 @@ static int apply_microcode_early(struct mc_saved_data *mc_saved_data, #endif uci->cpu_sig.rev = val[1]; - print_ucode(uci); + if (early) + print_ucode(uci); + else + print_ucode_info(uci, mc_intel->hdr.date); return 0; } @@ -715,12 +717,17 @@ _load_ucode_intel_bsp(struct mc_saved_data *mc_saved_data, unsigned long initrd_end_early, struct ucode_cpu_info *uci) { + enum ucode_state ret; + collect_cpu_info_early(uci); scan_microcode(initrd_start_early, initrd_end_early, mc_saved_data, mc_saved_in_initrd, uci); - load_microcode(mc_saved_data, mc_saved_in_initrd, - initrd_start_early, uci); - apply_microcode_early(mc_saved_data, uci); + + ret = load_microcode(mc_saved_data, mc_saved_in_initrd, + initrd_start_early, uci); + + if (ret == UCODE_OK) + apply_microcode_early(uci, true); } void __init @@ -749,7 +756,8 @@ load_ucode_intel_bsp(void) initrd_end_early = initrd_start_early + ramdisk_size; _load_ucode_intel_bsp(&mc_saved_data, mc_saved_in_initrd, - initrd_start_early, initrd_end_early, &uci); + initrd_start_early, initrd_end_early, + &uci); #endif } @@ -783,5 +791,23 @@ void load_ucode_intel_ap(void) collect_cpu_info_early(&uci); load_microcode(mc_saved_data_p, mc_saved_in_initrd_p, initrd_start_addr, &uci); - apply_microcode_early(mc_saved_data_p, &uci); + apply_microcode_early(&uci, true); +} + +void reload_ucode_intel(void) +{ + struct ucode_cpu_info uci; + enum ucode_state ret; + + if (!mc_saved_data.mc_saved_count) + return; + + collect_cpu_info_early(&uci); + + ret = generic_load_microcode_early(mc_saved_data.mc_saved, + mc_saved_data.mc_saved_count, &uci); + if (ret != UCODE_OK) + return; + + apply_microcode_early(&uci, false); } diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index f6945bef2cd1..94f643484300 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -283,7 +283,14 @@ NOKPROBE_SYMBOL(do_async_page_fault); static void __init paravirt_ops_setup(void) { pv_info.name = "KVM"; - pv_info.paravirt_enabled = 1; + + /* + * KVM isn't paravirt in the sense of paravirt_enabled. A KVM + * guest kernel works like a bare metal kernel with additional + * features, and paravirt_enabled is about features that are + * missing. + */ + pv_info.paravirt_enabled = 0; if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY)) pv_cpu_ops.io_delay = kvm_io_delay; diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index d9156ceecdff..a2de9bc7ac0b 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -263,7 +263,6 @@ void __init kvmclock_init(void) #endif kvm_get_preset_lpj(); clocksource_register_hz(&kvm_clock, NSEC_PER_SEC); - pv_info.paravirt_enabled = 1; pv_info.name = "KVM"; if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 3ed4a68d4013..5a2c02913af3 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -283,24 +283,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) fpu = switch_fpu_prepare(prev_p, next_p, cpu); - /* - * Reload esp0, LDT and the page table pointer: - */ + /* Reload esp0 and ss1. */ load_sp0(tss, next); - /* - * Switch DS and ES. - * This won't pick up thread selector changes, but I guess that is ok. - */ - savesegment(es, prev->es); - if (unlikely(next->es | prev->es)) - loadsegment(es, next->es); - - savesegment(ds, prev->ds); - if (unlikely(next->ds | prev->ds)) - loadsegment(ds, next->ds); - - /* We must save %fs and %gs before load_TLS() because * %fs and %gs may be cleared by load_TLS(). * @@ -309,41 +294,101 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) savesegment(fs, fsindex); savesegment(gs, gsindex); + /* + * Load TLS before restoring any segments so that segment loads + * reference the correct GDT entries. + */ load_TLS(next, cpu); /* - * Leave lazy mode, flushing any hypercalls made here. - * This must be done before restoring TLS segments so - * the GDT and LDT are properly updated, and must be - * done before math_state_restore, so the TS bit is up - * to date. + * Leave lazy mode, flushing any hypercalls made here. This + * must be done after loading TLS entries in the GDT but before + * loading segments that might reference them, and and it must + * be done before math_state_restore, so the TS bit is up to + * date. */ arch_end_context_switch(next_p); + /* Switch DS and ES. + * + * Reading them only returns the selectors, but writing them (if + * nonzero) loads the full descriptor from the GDT or LDT. The + * LDT for next is loaded in switch_mm, and the GDT is loaded + * above. + * + * We therefore need to write new values to the segment + * registers on every context switch unless both the new and old + * values are zero. + * + * Note that we don't need to do anything for CS and SS, as + * those are saved and restored as part of pt_regs. + */ + savesegment(es, prev->es); + if (unlikely(next->es | prev->es)) + loadsegment(es, next->es); + + savesegment(ds, prev->ds); + if (unlikely(next->ds | prev->ds)) + loadsegment(ds, next->ds); + /* * Switch FS and GS. * - * Segment register != 0 always requires a reload. Also - * reload when it has changed. When prev process used 64bit - * base always reload to avoid an information leak. + * These are even more complicated than FS and GS: they have + * 64-bit bases are that controlled by arch_prctl. Those bases + * only differ from the values in the GDT or LDT if the selector + * is 0. + * + * Loading the segment register resets the hidden base part of + * the register to 0 or the value from the GDT / LDT. If the + * next base address zero, writing 0 to the segment register is + * much faster than using wrmsr to explicitly zero the base. + * + * The thread_struct.fs and thread_struct.gs values are 0 + * if the fs and gs bases respectively are not overridden + * from the values implied by fsindex and gsindex. They + * are nonzero, and store the nonzero base addresses, if + * the bases are overridden. + * + * (fs != 0 && fsindex != 0) || (gs != 0 && gsindex != 0) should + * be impossible. + * + * Therefore we need to reload the segment registers if either + * the old or new selector is nonzero, and we need to override + * the base address if next thread expects it to be overridden. + * + * This code is unnecessarily slow in the case where the old and + * new indexes are zero and the new base is nonzero -- it will + * unnecessarily write 0 to the selector before writing the new + * base address. + * + * Note: This all depends on arch_prctl being the only way that + * user code can override the segment base. Once wrfsbase and + * wrgsbase are enabled, most of this code will need to change. */ if (unlikely(fsindex | next->fsindex | prev->fs)) { loadsegment(fs, next->fsindex); + /* - * Check if the user used a selector != 0; if yes - * clear 64bit base, since overloaded base is always - * mapped to the Null selector + * If user code wrote a nonzero value to FS, then it also + * cleared the overridden base address. + * + * XXX: if user code wrote 0 to FS and cleared the base + * address itself, we won't notice and we'll incorrectly + * restore the prior base address next time we reschdule + * the process. */ if (fsindex) prev->fs = 0; } - /* when next process has a 64bit base use it */ if (next->fs) wrmsrl(MSR_FS_BASE, next->fs); prev->fsindex = fsindex; if (unlikely(gsindex | next->gsindex | prev->gs)) { load_gs_index(next->gsindex); + + /* This works (and fails) the same way as fsindex above. */ if (gsindex) prev->gs = 0; } diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c index f7fec09e3e3a..4e942f31b1a7 100644 --- a/arch/x86/kernel/tls.c +++ b/arch/x86/kernel/tls.c @@ -27,6 +27,37 @@ static int get_free_idx(void) return -ESRCH; } +static bool tls_desc_okay(const struct user_desc *info) +{ + if (LDT_empty(info)) + return true; + + /* + * espfix is required for 16-bit data segments, but espfix + * only works for LDT segments. + */ + if (!info->seg_32bit) + return false; + + /* Only allow data segments in the TLS array. */ + if (info->contents > 1) + return false; + + /* + * Non-present segments with DPL 3 present an interesting attack + * surface. The kernel should handle such segments correctly, + * but TLS is very difficult to protect in a sandbox, so prevent + * such segments from being created. + * + * If userspace needs to remove a TLS entry, it can still delete + * it outright. + */ + if (info->seg_not_present) + return false; + + return true; +} + static void set_tls_desc(struct task_struct *p, int idx, const struct user_desc *info, int n) { @@ -66,6 +97,9 @@ int do_set_thread_area(struct task_struct *p, int idx, if (copy_from_user(&info, u_info, sizeof(info))) return -EFAULT; + if (!tls_desc_okay(&info)) + return -EINVAL; + if (idx == -1) idx = info.entry_number; @@ -192,6 +226,7 @@ int regset_tls_set(struct task_struct *target, const struct user_regset *regset, { struct user_desc infobuf[GDT_ENTRY_TLS_ENTRIES]; const struct user_desc *info; + int i; if (pos >= GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) || (pos % sizeof(struct user_desc)) != 0 || @@ -205,6 +240,10 @@ int regset_tls_set(struct task_struct *target, const struct user_regset *regset, else info = infobuf; + for (i = 0; i < count / sizeof(struct user_desc); i++) + if (!tls_desc_okay(info + i)) + return -EINVAL; + set_tls_desc(target, GDT_ENTRY_TLS_MIN + (pos / sizeof(struct user_desc)), info, count / sizeof(struct user_desc)); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index de801f22128a..07ab8e9733c5 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -387,7 +387,7 @@ NOKPROBE_SYMBOL(do_int3); * for scheduling or signal handling. The actual stack switch is done in * entry.S */ -asmlinkage __visible struct pt_regs *sync_regs(struct pt_regs *eregs) +asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs) { struct pt_regs *regs = eregs; /* Did already sync */ @@ -413,7 +413,7 @@ struct bad_iret_stack { struct pt_regs regs; }; -asmlinkage __visible +asmlinkage __visible notrace struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s) { /* @@ -436,6 +436,7 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s) BUG_ON(!user_mode_vm(&new_stack->regs)); return new_stack; } +NOKPROBE_SYMBOL(fixup_bad_iret); #endif /* diff --git a/crypto/af_alg.c b/crypto/af_alg.c index 6a3ad8011585..1de4beeb25f8 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -449,6 +449,9 @@ void af_alg_complete(struct crypto_async_request *req, int err) { struct af_alg_completion *completion = req->data; + if (err == -EINPROGRESS) + return; + completion->err = err; complete(&completion->completion); } diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index 43005d4d3348..1fa2af957b18 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -462,7 +462,10 @@ static void __init arch_counter_register(unsigned type) /* Register the CP15 based counter if we have one */ if (type & ARCH_CP15_TIMER) { - arch_timer_read_counter = arch_counter_get_cntvct; + if (arch_timer_use_virtual) + arch_timer_read_counter = arch_counter_get_cntvct; + else + arch_timer_read_counter = arch_counter_get_cntpct; } else { arch_timer_read_counter = arch_counter_get_cntvct_mem; diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c index ce023fa3e8ae..ab9a4539a446 100644 --- a/drivers/gpu/drm/tegra/gem.c +++ b/drivers/gpu/drm/tegra/gem.c @@ -259,16 +259,12 @@ void tegra_bo_free_object(struct drm_gem_object *gem) int tegra_bo_dumb_create(struct drm_file *file, struct drm_device *drm, struct drm_mode_create_dumb *args) { - int min_pitch = DIV_ROUND_UP(args->width * args->bpp, 8); + unsigned int min_pitch = DIV_ROUND_UP(args->width * args->bpp, 8); struct tegra_drm *tegra = drm->dev_private; struct tegra_bo *bo; - min_pitch = round_up(min_pitch, tegra->pitch_align); - if (args->pitch < min_pitch) - args->pitch = min_pitch; - - if (args->size < args->pitch * args->height) - args->size = args->pitch * args->height; + args->pitch = round_up(min_pitch, tegra->pitch_align); + args->size = args->pitch * args->height; bo = tegra_bo_create_with_handle(file, drm, args->size, 0, &args->handle); diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index afe79719ea32..ecbd3ffcf359 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -532,6 +532,19 @@ static void use_dmio(struct dm_buffer *b, int rw, sector_t block, end_io(&b->bio, r); } +static void inline_endio(struct bio *bio, int error) +{ + bio_end_io_t *end_fn = bio->bi_private; + + /* + * Reset the bio to free any attached resources + * (e.g. bio integrity profiles). + */ + bio_reset(bio); + + end_fn(bio, error); +} + static void use_inline_bio(struct dm_buffer *b, int rw, sector_t block, bio_end_io_t *end_io) { @@ -543,7 +556,12 @@ static void use_inline_bio(struct dm_buffer *b, int rw, sector_t block, b->bio.bi_max_vecs = DM_BUFIO_INLINE_VECS; b->bio.bi_iter.bi_sector = block << b->c->sectors_per_block_bits; b->bio.bi_bdev = b->c->bdev; - b->bio.bi_end_io = end_io; + b->bio.bi_end_io = inline_endio; + /* + * Use of .bi_private isn't a problem here because + * the dm_buffer's inline bio is local to bufio. + */ + b->bio.bi_private = end_io; /* * We assume that if len >= PAGE_SIZE ptr is page-aligned. diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 7130505c2425..da496cfb458d 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -951,10 +951,14 @@ static void migration_success_post_commit(struct dm_cache_migration *mg) } } else { - clear_dirty(cache, mg->new_oblock, mg->cblock); - if (mg->requeue_holder) + if (mg->requeue_holder) { + clear_dirty(cache, mg->new_oblock, mg->cblock); cell_defer(cache, mg->new_ocell, true); - else { + } else { + /* + * The block was promoted via an overwrite, so it's dirty. + */ + set_dirty(cache, mg->new_oblock, mg->cblock); bio_endio(mg->new_ocell->holder, 0); cell_defer(cache, mg->new_ocell, false); } @@ -1070,7 +1074,8 @@ static void issue_copy(struct dm_cache_migration *mg) avoid = is_discarded_oblock(cache, mg->new_oblock); - if (!avoid && bio_writes_complete_block(cache, bio)) { + if (writeback_mode(&cache->features) && + !avoid && bio_writes_complete_block(cache, bio)) { issue_overwrite(mg, bio); return; } @@ -2549,11 +2554,11 @@ static int __cache_map(struct cache *cache, struct bio *bio, struct dm_bio_priso static int cache_map(struct dm_target *ti, struct bio *bio) { int r; - struct dm_bio_prison_cell *cell; + struct dm_bio_prison_cell *cell = NULL; struct cache *cache = ti->private; r = __cache_map(cache, bio, &cell); - if (r == DM_MAPIO_REMAPPED) { + if (r == DM_MAPIO_REMAPPED && cell) { inc_ds(cache, bio, cell); cell_defer(cache, cell, false); } diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index fc93b9330af4..08981be7baa1 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -705,7 +705,7 @@ static int crypt_iv_tcw_whitening(struct crypt_config *cc, for (i = 0; i < ((1 << SECTOR_SHIFT) / 8); i++) crypto_xor(data + i * 8, buf, 8); out: - memset(buf, 0, sizeof(buf)); + memzero_explicit(buf, sizeof(buf)); return r; } diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 0f86d802b533..aae19133cfac 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -990,6 +990,24 @@ static void schedule_external_copy(struct thin_c *tc, dm_block_t virt_block, schedule_zero(tc, virt_block, data_dest, cell, bio); } +static void set_pool_mode(struct pool *pool, enum pool_mode new_mode); + +static void check_for_space(struct pool *pool) +{ + int r; + dm_block_t nr_free; + + if (get_pool_mode(pool) != PM_OUT_OF_DATA_SPACE) + return; + + r = dm_pool_get_free_block_count(pool->pmd, &nr_free); + if (r) + return; + + if (nr_free) + set_pool_mode(pool, PM_WRITE); +} + /* * A non-zero return indicates read_only or fail_io mode. * Many callers don't care about the return value. @@ -1004,6 +1022,8 @@ static int commit(struct pool *pool) r = dm_pool_commit_metadata(pool->pmd); if (r) metadata_operation_failed(pool, "dm_pool_commit_metadata", r); + else + check_for_space(pool); return r; } @@ -1022,8 +1042,6 @@ static void check_low_water_mark(struct pool *pool, dm_block_t free_blocks) } } -static void set_pool_mode(struct pool *pool, enum pool_mode new_mode); - static int alloc_data_block(struct thin_c *tc, dm_block_t *result) { int r; @@ -1824,7 +1842,7 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) pool->process_bio = process_bio_read_only; pool->process_discard = process_discard; pool->process_prepared_mapping = process_prepared_mapping; - pool->process_prepared_discard = process_prepared_discard_passdown; + pool->process_prepared_discard = process_prepared_discard; if (!pool->pf.error_if_no_space && no_space_timeout) queue_delayed_work(pool->wq, &pool->no_space_timeout, no_space_timeout); @@ -3248,14 +3266,14 @@ static void thin_dtr(struct dm_target *ti) struct thin_c *tc = ti->private; unsigned long flags; - thin_put(tc); - wait_for_completion(&tc->can_destroy); - spin_lock_irqsave(&tc->pool->lock, flags); list_del_rcu(&tc->list); spin_unlock_irqrestore(&tc->pool->lock, flags); synchronize_rcu(); + thin_put(tc); + wait_for_completion(&tc->can_destroy); + mutex_lock(&dm_thin_pool_table.mutex); __pool_dec(tc->pool); diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c index 786b689bdfc7..f4e22bcc7fb8 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.c +++ b/drivers/md/persistent-data/dm-space-map-metadata.c @@ -564,7 +564,9 @@ static int sm_bootstrap_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count { struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); - return smm->ll.nr_blocks; + *count = smm->ll.nr_blocks; + + return 0; } static int sm_bootstrap_get_nr_free(struct dm_space_map *sm, dm_block_t *count) diff --git a/drivers/mfd/tc6393xb.c b/drivers/mfd/tc6393xb.c index 4fac16bcd732..0afddf6c37af 100644 --- a/drivers/mfd/tc6393xb.c +++ b/drivers/mfd/tc6393xb.c @@ -263,6 +263,17 @@ static int tc6393xb_ohci_disable(struct platform_device *dev) return 0; } +static int tc6393xb_ohci_suspend(struct platform_device *dev) +{ + struct tc6393xb_platform_data *tcpd = dev_get_platdata(dev->dev.parent); + + /* We can't properly store/restore OHCI state, so fail here */ + if (tcpd->resume_restore) + return -EBUSY; + + return tc6393xb_ohci_disable(dev); +} + static int tc6393xb_fb_enable(struct platform_device *dev) { struct tc6393xb *tc6393xb = dev_get_drvdata(dev->dev.parent); @@ -403,7 +414,7 @@ static struct mfd_cell tc6393xb_cells[] = { .num_resources = ARRAY_SIZE(tc6393xb_ohci_resources), .resources = tc6393xb_ohci_resources, .enable = tc6393xb_ohci_enable, - .suspend = tc6393xb_ohci_disable, + .suspend = tc6393xb_ohci_suspend, .resume = tc6393xb_ohci_enable, .disable = tc6393xb_ohci_disable, }, diff --git a/drivers/mfd/twl4030-power.c b/drivers/mfd/twl4030-power.c index 50f9091bcd38..7d63e324e6a8 100644 --- a/drivers/mfd/twl4030-power.c +++ b/drivers/mfd/twl4030-power.c @@ -831,6 +831,9 @@ static struct twl4030_power_data osc_off_idle = { static struct of_device_id twl4030_power_of_match[] = { { + .compatible = "ti,twl4030-power", + }, + { .compatible = "ti,twl4030-power-reset", .data = &omap3_reset, }, diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index 1fa4c80ff886..a11451f4f408 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -260,7 +260,7 @@ static ssize_t force_ro_show(struct device *dev, struct device_attribute *attr, int ret; struct mmc_blk_data *md = mmc_blk_get(dev_to_disk(dev)); - ret = snprintf(buf, PAGE_SIZE, "%d", + ret = snprintf(buf, PAGE_SIZE, "%d\n", get_disk_ro(dev_to_disk(dev)) ^ md->read_only); mmc_blk_put(md); diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c index 69f0cc68d5b2..f7c95abc8c11 100644 --- a/drivers/mmc/host/dw_mmc.c +++ b/drivers/mmc/host/dw_mmc.c @@ -626,6 +626,13 @@ static void dw_mci_ctrl_rd_thld(struct dw_mci *host, struct mmc_data *data) WARN_ON(!(data->flags & MMC_DATA_READ)); + /* + * CDTHRCTL doesn't exist prior to 240A (in fact that register offset is + * in the FIFO region, so we really shouldn't access it). + */ + if (host->verid < DW_MMC_240A) + return; + if (host->timing != MMC_TIMING_MMC_HS200 && host->timing != MMC_TIMING_UHS_SDR104) goto disable; diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c index df27bb4fc098..9c2b9cbcbce0 100644 --- a/drivers/mmc/host/omap_hsmmc.c +++ b/drivers/mmc/host/omap_hsmmc.c @@ -609,6 +609,7 @@ static void omap_hsmmc_set_clock(struct omap_hsmmc_host *host) */ if ((mmc_slot(host).features & HSMMC_HAS_HSPE_SUPPORT) && (ios->timing != MMC_TIMING_MMC_DDR52) && + (ios->timing != MMC_TIMING_UHS_DDR50) && ((OMAP_HSMMC_READ(host->base, CAPA) & HSS) == HSS)) { regval = OMAP_HSMMC_READ(host->base, HCTL); if (clkdiv && (clk_get_rate(host->fclk)/clkdiv) > 25000000) @@ -628,7 +629,8 @@ static void omap_hsmmc_set_bus_width(struct omap_hsmmc_host *host) u32 con; con = OMAP_HSMMC_READ(host->base, CON); - if (ios->timing == MMC_TIMING_MMC_DDR52) + if (ios->timing == MMC_TIMING_MMC_DDR52 || + ios->timing == MMC_TIMING_UHS_DDR50) con |= DDR; /* configure in DDR mode */ else con &= ~DDR; diff --git a/drivers/mmc/host/sdhci-pci-o2micro.c b/drivers/mmc/host/sdhci-pci-o2micro.c index 5670e381b0cf..e2ec108dba0e 100644 --- a/drivers/mmc/host/sdhci-pci-o2micro.c +++ b/drivers/mmc/host/sdhci-pci-o2micro.c @@ -127,8 +127,6 @@ void sdhci_pci_o2_fujin2_pci_init(struct sdhci_pci_chip *chip) return; scratch_32 &= ~((1 << 21) | (1 << 30)); - /* Set RTD3 function disabled */ - scratch_32 |= ((1 << 29) | (1 << 28)); pci_write_config_dword(chip->pdev, O2_SD_FUNC_REG3, scratch_32); /* Set L1 Entrance Timer */ diff --git a/drivers/net/wireless/brcm80211/brcmfmac/msgbuf.c b/drivers/net/wireless/brcm80211/brcmfmac/msgbuf.c index 11cc051f97cd..8079a9ddcba9 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/msgbuf.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/msgbuf.c @@ -1355,6 +1355,7 @@ int brcmf_proto_msgbuf_attach(struct brcmf_pub *drvr) } INIT_WORK(&msgbuf->txflow_work, brcmf_msgbuf_txflow_worker); count = BITS_TO_LONGS(if_msgbuf->nrof_flowrings); + count = count * sizeof(unsigned long); msgbuf->flow_map = kzalloc(count, GFP_KERNEL); if (!msgbuf->flow_map) goto fail; diff --git a/drivers/regulator/anatop-regulator.c b/drivers/regulator/anatop-regulator.c index 4f730af70e7c..30e8d7ad5813 100644 --- a/drivers/regulator/anatop-regulator.c +++ b/drivers/regulator/anatop-regulator.c @@ -283,6 +283,14 @@ static int anatop_regulator_probe(struct platform_device *pdev) sreg->sel = 0; sreg->bypass = true; } + + /* + * In case vddpu was disabled by the bootloader, we need to set + * a sane default until imx6-cpufreq was probed and changes the + * voltage to the correct value. In this case we set 1.25V. + */ + if (!sreg->sel && !strcmp(sreg->name, "vddpu")) + sreg->sel = 22; } else { rdesc->ops = &anatop_rops; } diff --git a/drivers/scsi/NCR5380.c b/drivers/scsi/NCR5380.c index 45da3c823322..ab1c09eaa5b8 100644 --- a/drivers/scsi/NCR5380.c +++ b/drivers/scsi/NCR5380.c @@ -2647,14 +2647,14 @@ static void NCR5380_dma_complete(NCR5380_instance * instance) { * * Purpose : abort a command * - * Inputs : cmd - the Scsi_Cmnd to abort, code - code to set the - * host byte of the result field to, if zero DID_ABORTED is + * Inputs : cmd - the Scsi_Cmnd to abort, code - code to set the + * host byte of the result field to, if zero DID_ABORTED is * used. * - * Returns : 0 - success, -1 on failure. + * Returns : SUCCESS - success, FAILED on failure. * - * XXX - there is no way to abort the command that is currently - * connected, you have to wait for it to complete. If this is + * XXX - there is no way to abort the command that is currently + * connected, you have to wait for it to complete. If this is * a problem, we could implement longjmp() / setjmp(), setjmp() * called where the loop started in NCR5380_main(). * @@ -2704,7 +2704,7 @@ static int NCR5380_abort(Scsi_Cmnd * cmd) { * aborted flag and get back into our main loop. */ - return 0; + return SUCCESS; } #endif diff --git a/drivers/scsi/aha1740.c b/drivers/scsi/aha1740.c index 5f3101797c93..31ace4bef8fe 100644 --- a/drivers/scsi/aha1740.c +++ b/drivers/scsi/aha1740.c @@ -531,7 +531,7 @@ static int aha1740_eh_abort_handler (Scsi_Cmnd *dummy) * quiet as possible... */ - return 0; + return SUCCESS; } static struct scsi_host_template aha1740_template = { diff --git a/drivers/scsi/atari_NCR5380.c b/drivers/scsi/atari_NCR5380.c index 79e6f045c2a9..e3bbc0a0f9f1 100644 --- a/drivers/scsi/atari_NCR5380.c +++ b/drivers/scsi/atari_NCR5380.c @@ -2607,7 +2607,7 @@ static void NCR5380_reselect(struct Scsi_Host *instance) * host byte of the result field to, if zero DID_ABORTED is * used. * - * Returns : 0 - success, -1 on failure. + * Returns : SUCCESS - success, FAILED on failure. * * XXX - there is no way to abort the command that is currently * connected, you have to wait for it to complete. If this is diff --git a/drivers/scsi/esas2r/esas2r_main.c b/drivers/scsi/esas2r/esas2r_main.c index 6504a195c874..45aa684f8b74 100644 --- a/drivers/scsi/esas2r/esas2r_main.c +++ b/drivers/scsi/esas2r/esas2r_main.c @@ -1057,7 +1057,7 @@ int esas2r_eh_abort(struct scsi_cmnd *cmd) cmd->scsi_done(cmd); - return 0; + return SUCCESS; } spin_lock_irqsave(&a->queue_lock, flags); diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c index ac5d94cfd52f..2485255f3414 100644 --- a/drivers/scsi/megaraid.c +++ b/drivers/scsi/megaraid.c @@ -1945,7 +1945,7 @@ megaraid_abort_and_reset(adapter_t *adapter, Scsi_Cmnd *cmd, int aor) cmd->device->id, (u32)cmd->device->lun); if(list_empty(&adapter->pending_list)) - return FALSE; + return FAILED; list_for_each_safe(pos, next, &adapter->pending_list) { @@ -1968,7 +1968,7 @@ megaraid_abort_and_reset(adapter_t *adapter, Scsi_Cmnd *cmd, int aor) (aor==SCB_ABORT) ? "ABORTING":"RESET", scb->idx); - return FALSE; + return FAILED; } else { @@ -1993,12 +1993,12 @@ megaraid_abort_and_reset(adapter_t *adapter, Scsi_Cmnd *cmd, int aor) list_add_tail(SCSI_LIST(cmd), &adapter->completed_list); - return TRUE; + return SUCCESS; } } } - return FALSE; + return FAILED; } static inline int diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 5640ad1c8214..5e881e5e67b6 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -1008,7 +1008,7 @@ megasas_issue_blocked_abort_cmd(struct megasas_instance *instance, cpu_to_le32(upper_32_bits(cmd_to_abort->frame_phys_addr)); cmd->sync_cmd = 1; - cmd->cmd_status = 0xFF; + cmd->cmd_status = ENODATA; instance->instancet->issue_dcmd(instance, cmd); diff --git a/drivers/scsi/megaraid/megaraid_sas_fp.c b/drivers/scsi/megaraid/megaraid_sas_fp.c index 685e6f391fe4..0f66d0ef0b26 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fp.c +++ b/drivers/scsi/megaraid/megaraid_sas_fp.c @@ -183,14 +183,15 @@ void MR_PopulateDrvRaidMap(struct megasas_instance *instance) /* New Raid map will not set totalSize, so keep expected value * for legacy code in ValidateMapInfo */ - pDrvRaidMap->totalSize = sizeof(struct MR_FW_RAID_MAP_EXT); + pDrvRaidMap->totalSize = + cpu_to_le32(sizeof(struct MR_FW_RAID_MAP_EXT)); } else { fw_map_old = (struct MR_FW_RAID_MAP_ALL *) fusion->ld_map[(instance->map_id & 1)]; pFwRaidMap = &fw_map_old->raidMap; #if VD_EXT_DEBUG - for (i = 0; i < pFwRaidMap->ldCount; i++) { + for (i = 0; i < le16_to_cpu(pFwRaidMap->ldCount); i++) { dev_dbg(&instance->pdev->dev, "(%d) :Index 0x%x " "Target Id 0x%x Seq Num 0x%x Size 0/%llx\n", instance->unique_id, i, @@ -202,12 +203,12 @@ void MR_PopulateDrvRaidMap(struct megasas_instance *instance) memset(drv_map, 0, fusion->drv_map_sz); pDrvRaidMap->totalSize = pFwRaidMap->totalSize; - pDrvRaidMap->ldCount = pFwRaidMap->ldCount; + pDrvRaidMap->ldCount = (__le16)pFwRaidMap->ldCount; pDrvRaidMap->fpPdIoTimeoutSec = pFwRaidMap->fpPdIoTimeoutSec; for (i = 0; i < MAX_RAIDMAP_LOGICAL_DRIVES + MAX_RAIDMAP_VIEWS; i++) pDrvRaidMap->ldTgtIdToLd[i] = (u8)pFwRaidMap->ldTgtIdToLd[i]; - for (i = 0; i < pDrvRaidMap->ldCount; i++) { + for (i = 0; i < le16_to_cpu(pDrvRaidMap->ldCount); i++) { pDrvRaidMap->ldSpanMap[i] = pFwRaidMap->ldSpanMap[i]; #if VD_EXT_DEBUG dev_dbg(&instance->pdev->dev, @@ -268,7 +269,7 @@ u8 MR_ValidateMapInfo(struct megasas_instance *instance) else expected_size = (sizeof(struct MR_FW_RAID_MAP) - sizeof(struct MR_LD_SPAN_MAP) + - (sizeof(struct MR_LD_SPAN_MAP) * le32_to_cpu(pDrvRaidMap->ldCount))); + (sizeof(struct MR_LD_SPAN_MAP) * le16_to_cpu(pDrvRaidMap->ldCount))); if (le32_to_cpu(pDrvRaidMap->totalSize) != expected_size) { dev_err(&instance->pdev->dev, "map info structure size 0x%x is not matching with ld count\n", @@ -284,7 +285,7 @@ u8 MR_ValidateMapInfo(struct megasas_instance *instance) mr_update_load_balance_params(drv_map, lbInfo); - num_lds = le32_to_cpu(drv_map->raidMap.ldCount); + num_lds = le16_to_cpu(drv_map->raidMap.ldCount); /*Convert Raid capability values to CPU arch */ for (ldCount = 0; ldCount < num_lds; ldCount++) { @@ -457,7 +458,7 @@ u32 mr_spanset_get_span_block(struct megasas_instance *instance, quad = &map->raidMap.ldSpanMap[ld]. spanBlock[span]. block_span_info.quad[info]; - if (le32_to_cpu(quad->diff == 0)) + if (le32_to_cpu(quad->diff) == 0) return SPAN_INVALID; if (le64_to_cpu(quad->logStart) <= row && row <= le64_to_cpu(quad->logEnd) && @@ -520,7 +521,7 @@ static u64 get_row_from_strip(struct megasas_instance *instance, span_set->span_row_data_width) * span_set->diff; for (span = 0, span_offset = 0; span < raid->spanDepth; span++) if (le32_to_cpu(map->raidMap.ldSpanMap[ld].spanBlock[span]. - block_span_info.noElements >= info+1)) { + block_span_info.noElements) >= info+1) { if (strip_offset >= span_set->strip_offset[span]) span_offset++; diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index f37eed682c75..9d9c27cd4687 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -880,7 +880,7 @@ megasas_sync_map_info(struct megasas_instance *instance) map = fusion->ld_drv_map[instance->map_id & 1]; - num_lds = le32_to_cpu(map->raidMap.ldCount); + num_lds = le16_to_cpu(map->raidMap.ldCount); dcmd = &cmd->frame->dcmd; @@ -1173,9 +1173,10 @@ megasas_fire_cmd_fusion(struct megasas_instance *instance, struct megasas_register_set __iomem *regs) { #if defined(writeq) && defined(CONFIG_64BIT) - u64 req_data = (((u64)req_desc_hi << 32) | (u32)req_desc_lo); + u64 req_data = (((u64)le32_to_cpu(req_desc_hi) << 32) | + le32_to_cpu(req_desc_lo)); - writeq(le64_to_cpu(req_data), &(regs)->inbound_low_queue_port); + writeq(req_data, &(regs)->inbound_low_queue_port); #else unsigned long flags; @@ -1373,7 +1374,7 @@ megasas_set_pd_lba(struct MPI2_RAID_SCSI_IO_REQUEST *io_request, u8 cdb_len, /* Logical block reference tag */ io_request->CDB.EEDP32.PrimaryReferenceTag = cpu_to_be32(ref_tag); - io_request->CDB.EEDP32.PrimaryApplicationTagMask = 0xffff; + io_request->CDB.EEDP32.PrimaryApplicationTagMask = cpu_to_be16(0xffff); io_request->IoFlags = cpu_to_le16(32); /* Specify 32-byte cdb */ /* Transfer length */ @@ -1769,7 +1770,7 @@ megasas_build_dcdb_fusion(struct megasas_instance *instance, /* set RAID context values */ pRAID_Context->regLockFlags = REGION_TYPE_SHARED_READ; - pRAID_Context->timeoutValue = raid->fpIoTimeoutForLd; + pRAID_Context->timeoutValue = cpu_to_le16(raid->fpIoTimeoutForLd); pRAID_Context->VirtualDiskTgtId = cpu_to_le16(device_id); pRAID_Context->regLockRowLBA = 0; pRAID_Context->regLockLength = 0; @@ -2254,7 +2255,7 @@ build_mpt_mfi_pass_thru(struct megasas_instance *instance, * megasas_complete_cmd */ - if (frame_hdr->flags & MFI_FRAME_DONT_POST_IN_REPLY_QUEUE) + if (frame_hdr->flags & cpu_to_le16(MFI_FRAME_DONT_POST_IN_REPLY_QUEUE)) cmd->flags = MFI_FRAME_DONT_POST_IN_REPLY_QUEUE; fusion = instance->ctrl_context; diff --git a/drivers/scsi/sun3_NCR5380.c b/drivers/scsi/sun3_NCR5380.c index 1a2367a1b1f2..6d248a299bc4 100644 --- a/drivers/scsi/sun3_NCR5380.c +++ b/drivers/scsi/sun3_NCR5380.c @@ -2590,15 +2590,15 @@ static void NCR5380_reselect (struct Scsi_Host *instance) * Purpose : abort a command * * Inputs : cmd - the struct scsi_cmnd to abort, code - code to set the - * host byte of the result field to, if zero DID_ABORTED is + * host byte of the result field to, if zero DID_ABORTED is * used. * - * Returns : 0 - success, -1 on failure. + * Returns : SUCCESS - success, FAILED on failure. * - * XXX - there is no way to abort the command that is currently - * connected, you have to wait for it to complete. If this is + * XXX - there is no way to abort the command that is currently + * connected, you have to wait for it to complete. If this is * a problem, we could implement longjmp() / setjmp(), setjmp() - * called where the loop started in NCR5380_main(). + * called where the loop started in NCR5380_main(). */ static int NCR5380_abort(struct scsi_cmnd *cmd) diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 43b90709585f..488e9bfd996b 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -1835,10 +1835,10 @@ static int __init thermal_init(void) exit_netlink: genetlink_exit(); -unregister_governors: - thermal_unregister_governors(); unregister_class: class_unregister(&thermal_class); +unregister_governors: + thermal_unregister_governors(); error: idr_destroy(&thermal_tz_idr); idr_destroy(&thermal_cdev_idr); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 1bf9f897065d..97676731190c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -4106,12 +4106,6 @@ again: if (ret) break; - /* opt_discard */ - if (btrfs_test_opt(root, DISCARD)) - ret = btrfs_error_discard_extent(root, start, - end + 1 - start, - NULL); - clear_extent_dirty(unpin, start, end, GFP_NOFS); btrfs_error_unpin_extent_range(root, start, end); cond_resched(); @@ -4129,6 +4123,25 @@ again: return 0; } +static void btrfs_free_pending_ordered(struct btrfs_transaction *cur_trans, + struct btrfs_fs_info *fs_info) +{ + struct btrfs_ordered_extent *ordered; + + spin_lock(&fs_info->trans_lock); + while (!list_empty(&cur_trans->pending_ordered)) { + ordered = list_first_entry(&cur_trans->pending_ordered, + struct btrfs_ordered_extent, + trans_list); + list_del_init(&ordered->trans_list); + spin_unlock(&fs_info->trans_lock); + + btrfs_put_ordered_extent(ordered); + spin_lock(&fs_info->trans_lock); + } + spin_unlock(&fs_info->trans_lock); +} + void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, struct btrfs_root *root) { @@ -4140,6 +4153,7 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, cur_trans->state = TRANS_STATE_UNBLOCKED; wake_up(&root->fs_info->transaction_wait); + btrfs_free_pending_ordered(cur_trans, root->fs_info); btrfs_destroy_delayed_inodes(root); btrfs_assert_delayed_root_empty(root); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 47c1ba141082..4bd5e06fa5ab 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5715,7 +5715,8 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, update_global_block_rsv(fs_info); } -static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) +static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end, + const bool return_free_space) { struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_block_group_cache *cache = NULL; @@ -5739,7 +5740,8 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) if (start < cache->last_byte_to_unpin) { len = min(len, cache->last_byte_to_unpin - start); - btrfs_add_free_space(cache, start, len); + if (return_free_space) + btrfs_add_free_space(cache, start, len); } start += len; @@ -5803,7 +5805,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, end + 1 - start, NULL); clear_extent_dirty(unpin, start, end, GFP_NOFS); - unpin_extent_range(root, start, end); + unpin_extent_range(root, start, end, true); cond_resched(); } @@ -9585,7 +9587,7 @@ out: int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) { - return unpin_extent_range(root, start, end); + return unpin_extent_range(root, start, end, false); } int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 225302b39afb..6a98bddd8f33 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -287,8 +287,6 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len, if (!em) goto out; - if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags)) - list_move(&em->list, &tree->modified_extents); em->generation = gen; clear_bit(EXTENT_FLAG_PINNED, &em->flags); em->mod_start = em->start; diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index ac734ec4cc20..269e21dd1506 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -220,6 +220,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, INIT_LIST_HEAD(&entry->work_list); init_completion(&entry->completion); INIT_LIST_HEAD(&entry->log_list); + INIT_LIST_HEAD(&entry->trans_list); trace_btrfs_ordered_extent_add(inode, entry); @@ -443,6 +444,8 @@ void btrfs_get_logged_extents(struct inode *inode, ordered = rb_entry(n, struct btrfs_ordered_extent, rb_node); if (!list_empty(&ordered->log_list)) continue; + if (test_bit(BTRFS_ORDERED_LOGGED, &ordered->flags)) + continue; list_add_tail(&ordered->log_list, logged_list); atomic_inc(&ordered->refs); } @@ -472,7 +475,8 @@ void btrfs_submit_logged_extents(struct list_head *logged_list, spin_unlock_irq(&log->log_extents_lock[index]); } -void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid) +void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans, + struct btrfs_root *log, u64 transid) { struct btrfs_ordered_extent *ordered; int index = transid % 2; @@ -497,7 +501,8 @@ void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid) wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags)); - btrfs_put_ordered_extent(ordered); + if (!test_and_set_bit(BTRFS_ORDERED_LOGGED, &ordered->flags)) + list_add_tail(&ordered->trans_list, &trans->ordered); spin_lock_irq(&log->log_extents_lock[index]); } spin_unlock_irq(&log->log_extents_lock[index]); diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index d81a274d621e..0124bffc775f 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -71,6 +71,8 @@ struct btrfs_ordered_sum { ordered extent */ #define BTRFS_ORDERED_TRUNCATED 9 /* Set when we have to truncate an extent */ +#define BTRFS_ORDERED_LOGGED 10 /* Set when we've waited on this ordered extent + * in the logging code. */ struct btrfs_ordered_extent { /* logical offset in the file */ u64 file_offset; @@ -121,6 +123,9 @@ struct btrfs_ordered_extent { /* If we need to wait on this to be done */ struct list_head log_list; + /* If the transaction needs to wait on this ordered extent */ + struct list_head trans_list; + /* used to wait for the BTRFS_ORDERED_COMPLETE bit */ wait_queue_head_t wait; @@ -197,7 +202,8 @@ void btrfs_get_logged_extents(struct inode *inode, void btrfs_put_logged_extents(struct list_head *logged_list); void btrfs_submit_logged_extents(struct list_head *logged_list, struct btrfs_root *log); -void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid); +void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans, + struct btrfs_root *log, u64 transid); void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid); int __init ordered_data_init(void); void ordered_data_exit(void); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 54bd91ece35b..cde9c03e3913 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1824,7 +1824,7 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_bfree -= block_rsv->size >> bits; spin_unlock(&block_rsv->lock); - buf->f_bavail = total_free_data; + buf->f_bavail = div_u64(total_free_data, factor); ret = btrfs_calc_avail_data_space(fs_info->tree_root, &total_free_data); if (ret) { mutex_unlock(&fs_info->chunk_mutex); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index dcaae3616728..63c6d05950f2 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -220,6 +220,7 @@ loop: INIT_LIST_HEAD(&cur_trans->pending_snapshots); INIT_LIST_HEAD(&cur_trans->pending_chunks); INIT_LIST_HEAD(&cur_trans->switch_commits); + INIT_LIST_HEAD(&cur_trans->pending_ordered); list_add_tail(&cur_trans->list, &fs_info->trans_list); extent_io_tree_init(&cur_trans->dirty_pages, fs_info->btree_inode->i_mapping); @@ -488,6 +489,7 @@ again: h->sync = false; INIT_LIST_HEAD(&h->qgroup_ref_list); INIT_LIST_HEAD(&h->new_bgs); + INIT_LIST_HEAD(&h->ordered); smp_mb(); if (cur_trans->state >= TRANS_STATE_BLOCKED && @@ -719,6 +721,12 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, if (!list_empty(&trans->new_bgs)) btrfs_create_pending_block_groups(trans, root); + if (!list_empty(&trans->ordered)) { + spin_lock(&info->trans_lock); + list_splice(&trans->ordered, &cur_trans->pending_ordered); + spin_unlock(&info->trans_lock); + } + trans->delayed_ref_updates = 0; if (!trans->sync) { must_run_delayed_refs = @@ -1652,6 +1660,28 @@ static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info) btrfs_wait_ordered_roots(fs_info, -1); } +static inline void +btrfs_wait_pending_ordered(struct btrfs_transaction *cur_trans, + struct btrfs_fs_info *fs_info) +{ + struct btrfs_ordered_extent *ordered; + + spin_lock(&fs_info->trans_lock); + while (!list_empty(&cur_trans->pending_ordered)) { + ordered = list_first_entry(&cur_trans->pending_ordered, + struct btrfs_ordered_extent, + trans_list); + list_del_init(&ordered->trans_list); + spin_unlock(&fs_info->trans_lock); + + wait_event(ordered->wait, test_bit(BTRFS_ORDERED_COMPLETE, + &ordered->flags)); + btrfs_put_ordered_extent(ordered); + spin_lock(&fs_info->trans_lock); + } + spin_unlock(&fs_info->trans_lock); +} + int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { @@ -1702,6 +1732,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, } spin_lock(&root->fs_info->trans_lock); + list_splice(&trans->ordered, &cur_trans->pending_ordered); if (cur_trans->state >= TRANS_STATE_COMMIT_START) { spin_unlock(&root->fs_info->trans_lock); atomic_inc(&cur_trans->use_count); @@ -1754,6 +1785,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, btrfs_wait_delalloc_flush(root->fs_info); + btrfs_wait_pending_ordered(cur_trans, root->fs_info); + btrfs_scrub_pause(root); /* * Ok now we need to make sure to block out any other joins while we diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index d8f40e1a5d2d..1ba9c3e04191 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -56,6 +56,7 @@ struct btrfs_transaction { wait_queue_head_t commit_wait; struct list_head pending_snapshots; struct list_head pending_chunks; + struct list_head pending_ordered; struct list_head switch_commits; struct btrfs_delayed_ref_root delayed_refs; int aborted; @@ -105,6 +106,7 @@ struct btrfs_trans_handle { */ struct btrfs_root *root; struct seq_list delayed_ref_elem; + struct list_head ordered; struct list_head qgroup_ref_list; struct list_head new_bgs; }; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 286213cec861..7d96cc961663 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2600,9 +2600,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, if (atomic_read(&log_root_tree->log_commit[index2])) { blk_finish_plug(&plug); btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); + btrfs_wait_logged_extents(trans, log, log_transid); wait_log_commit(trans, log_root_tree, root_log_ctx.log_transid); - btrfs_free_logged_extents(log, log_transid); mutex_unlock(&log_root_tree->log_mutex); ret = root_log_ctx.log_ret; goto out; @@ -2645,7 +2645,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, btrfs_wait_marked_extents(log_root_tree, &log_root_tree->dirty_log_pages, EXTENT_NEW | EXTENT_DIRTY); - btrfs_wait_logged_extents(log, log_transid); + btrfs_wait_logged_extents(trans, log, log_transid); btrfs_set_super_log_root(root->fs_info->super_for_commit, log_root_tree->node->start); @@ -3766,7 +3766,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans, fi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); - btrfs_set_token_file_extent_generation(leaf, fi, em->generation, + btrfs_set_token_file_extent_generation(leaf, fi, trans->transid, &token); if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) btrfs_set_token_file_extent_type(leaf, fi, diff --git a/fs/dcache.c b/fs/dcache.c index 71acf8d6f2be..03dca3cad918 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2393,6 +2393,8 @@ static void swap_names(struct dentry *dentry, struct dentry *target) */ unsigned int i; BUILD_BUG_ON(!IS_ALIGNED(DNAME_INLINE_LEN, sizeof(long))); + kmemcheck_mark_initialized(dentry->d_iname, DNAME_INLINE_LEN); + kmemcheck_mark_initialized(target->d_iname, DNAME_INLINE_LEN); for (i = 0; i < DNAME_INLINE_LEN / sizeof(long); i++) { swap(((long *) &dentry->d_iname)[i], ((long *) &target->d_iname)[i]); diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index 2f6735dbf1a9..31b148f3e772 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -1917,7 +1917,6 @@ ecryptfs_decode_from_filename(unsigned char *dst, size_t *dst_size, break; case 2: dst[dst_byte_offset++] |= (src_byte); - dst[dst_byte_offset] = 0; current_bit_offset = 0; break; } diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index f5bce9096555..54742f9a67a8 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -190,23 +190,11 @@ static int ecryptfs_open(struct inode *inode, struct file *file) { int rc = 0; struct ecryptfs_crypt_stat *crypt_stat = NULL; - struct ecryptfs_mount_crypt_stat *mount_crypt_stat; struct dentry *ecryptfs_dentry = file->f_path.dentry; /* Private value of ecryptfs_dentry allocated in * ecryptfs_lookup() */ struct ecryptfs_file_info *file_info; - mount_crypt_stat = &ecryptfs_superblock_to_private( - ecryptfs_dentry->d_sb)->mount_crypt_stat; - if ((mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) - && ((file->f_flags & O_WRONLY) || (file->f_flags & O_RDWR) - || (file->f_flags & O_CREAT) || (file->f_flags & O_TRUNC) - || (file->f_flags & O_APPEND))) { - printk(KERN_WARNING "Mount has encrypted view enabled; " - "files may only be read\n"); - rc = -EPERM; - goto out; - } /* Released in ecryptfs_release or end of function if failure */ file_info = kmem_cache_zalloc(ecryptfs_file_info_cache, GFP_KERNEL); ecryptfs_set_file_private(file, file_info); diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index c4cd1fd86cc2..d9eb84bda559 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -493,6 +493,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags { struct super_block *s; struct ecryptfs_sb_info *sbi; + struct ecryptfs_mount_crypt_stat *mount_crypt_stat; struct ecryptfs_dentry_info *root_info; const char *err = "Getting sb failed"; struct inode *inode; @@ -511,6 +512,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags err = "Error parsing options"; goto out; } + mount_crypt_stat = &sbi->mount_crypt_stat; s = sget(fs_type, NULL, set_anon_super, flags, NULL); if (IS_ERR(s)) { @@ -557,11 +559,19 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags /** * Set the POSIX ACL flag based on whether they're enabled in the lower - * mount. Force a read-only eCryptfs mount if the lower mount is ro. - * Allow a ro eCryptfs mount even when the lower mount is rw. + * mount. */ s->s_flags = flags & ~MS_POSIXACL; - s->s_flags |= path.dentry->d_sb->s_flags & (MS_RDONLY | MS_POSIXACL); + s->s_flags |= path.dentry->d_sb->s_flags & MS_POSIXACL; + + /** + * Force a read-only eCryptfs mount when: + * 1) The lower mount is ro + * 2) The ecryptfs_encrypted_view mount option is specified + */ + if (path.dentry->d_sb->s_flags & MS_RDONLY || + mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) + s->s_flags |= MS_RDONLY; s->s_maxbytes = path.dentry->d_sb->s_maxbytes; s->s_blocksize = path.dentry->d_sb->s_blocksize; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 8e58c4cc2cb9..f988b01b6f89 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1007,21 +1007,19 @@ inline_data: goto out; } - if (dn.data_blkaddr == NEW_ADDR) { + if (f2fs_has_inline_data(inode)) { + err = f2fs_read_inline_data(inode, page); + if (err) { + page_cache_release(page); + goto fail; + } + } else if (dn.data_blkaddr == NEW_ADDR) { zero_user_segment(page, 0, PAGE_CACHE_SIZE); } else { - if (f2fs_has_inline_data(inode)) { - err = f2fs_read_inline_data(inode, page); - if (err) { - page_cache_release(page); - goto fail; - } - } else { - err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, - READ_SYNC); - if (err) - goto fail; - } + err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, + READ_SYNC); + if (err) + goto fail; lock_page(page); if (unlikely(!PageUptodate(page))) { diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 923cb76fdc46..3c31221affe6 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1004,6 +1004,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) range->len < sbi->blocksize) return -EINVAL; + cpc.trimmed = 0; if (end <= MAIN_BLKADDR(sbi)) goto out; @@ -1015,7 +1016,6 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) cpc.trim_start = start_segno; cpc.trim_end = end_segno; cpc.trim_minlen = range->minlen >> sbi->log_blocksize; - cpc.trimmed = 0; /* do checkpoint to issue discard commands safely */ write_checkpoint(sbi, &cpc); diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c index f488bbae541a..735d7522a3a9 100644 --- a/fs/isofs/rock.c +++ b/fs/isofs/rock.c @@ -30,6 +30,7 @@ struct rock_state { int cont_size; int cont_extent; int cont_offset; + int cont_loops; struct inode *inode; }; @@ -73,6 +74,9 @@ static void init_rock_state(struct rock_state *rs, struct inode *inode) rs->inode = inode; } +/* Maximum number of Rock Ridge continuation entries */ +#define RR_MAX_CE_ENTRIES 32 + /* * Returns 0 if the caller should continue scanning, 1 if the scan must end * and -ve on error. @@ -105,6 +109,8 @@ static int rock_continue(struct rock_state *rs) goto out; } ret = -EIO; + if (++rs->cont_loops >= RR_MAX_CE_ENTRIES) + goto out; bh = sb_bread(rs->inode->i_sb, rs->cont_extent); if (bh) { memcpy(rs->buffer, bh->b_data + rs->cont_offset, @@ -356,6 +362,9 @@ repeat: rs.cont_size = isonum_733(rr->u.CE.size); break; case SIG('E', 'R'): + /* Invalid length of ER tag id? */ + if (rr->u.ER.len_id + offsetof(struct rock_ridge, u.ER.data) > rr->len) + goto out; ISOFS_SB(inode->i_sb)->s_rock = 1; printk(KERN_DEBUG "ISO 9660 Extensions: "); { diff --git a/fs/namespace.c b/fs/namespace.c index 5b66b2b3624d..bbde14719655 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1369,6 +1369,8 @@ void umount_tree(struct mount *mnt, int how) } if (last) { last->mnt_hash.next = unmounted.first; + if (unmounted.first) + unmounted.first->pprev = &last->mnt_hash.next; unmounted.first = tmp_list.first; unmounted.first->pprev = &unmounted.first; } @@ -1544,6 +1546,9 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags) goto dput_and_out; if (mnt->mnt.mnt_flags & MNT_LOCKED) goto dput_and_out; + retval = -EPERM; + if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN)) + goto dput_and_out; retval = do_umount(mnt, flags); dput_and_out: @@ -2098,7 +2103,13 @@ static int do_remount(struct path *path, int flags, int mnt_flags, } if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) && !(mnt_flags & MNT_NODEV)) { - return -EPERM; + /* Was the nodev implicitly added in mount? */ + if ((mnt->mnt_ns->user_ns != &init_user_ns) && + !(sb->s_type->fs_flags & FS_USERNS_DEV_MOUNT)) { + mnt_flags |= MNT_NODEV; + } else { + return -EPERM; + } } if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) && !(mnt_flags & MNT_NOSUID)) { diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c index d5659d96ee7f..cf7e043a9447 100644 --- a/fs/ncpfs/ioctl.c +++ b/fs/ncpfs/ioctl.c @@ -447,7 +447,6 @@ static long __ncp_ioctl(struct inode *inode, unsigned int cmd, unsigned long arg result = -EIO; } } - result = 0; } mutex_unlock(&server->root_setup_lock); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 69dc20a743f9..83f3a7d7466e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7704,6 +7704,9 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) dprintk("--> %s\n", __func__); + /* nfs4_layoutget_release calls pnfs_put_layout_hdr */ + pnfs_get_layout_hdr(NFS_I(inode)->layout); + lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags); if (!lgp->args.layout.pages) { nfs4_layoutget_release(lgp); @@ -7716,9 +7719,6 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) lgp->res.seq_res.sr_slot = NULL; nfs4_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0); - /* nfs4_layoutget_release calls pnfs_put_layout_hdr */ - pnfs_get_layout_hdr(NFS_I(inode)->layout); - task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return ERR_CAST(task); diff --git a/fs/proc/base.c b/fs/proc/base.c index 772efa45a452..7dc3ea89ef1a 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2464,6 +2464,57 @@ static const struct file_operations proc_projid_map_operations = { .llseek = seq_lseek, .release = proc_id_map_release, }; + +static int proc_setgroups_open(struct inode *inode, struct file *file) +{ + struct user_namespace *ns = NULL; + struct task_struct *task; + int ret; + + ret = -ESRCH; + task = get_proc_task(inode); + if (task) { + rcu_read_lock(); + ns = get_user_ns(task_cred_xxx(task, user_ns)); + rcu_read_unlock(); + put_task_struct(task); + } + if (!ns) + goto err; + + if (file->f_mode & FMODE_WRITE) { + ret = -EACCES; + if (!ns_capable(ns, CAP_SYS_ADMIN)) + goto err_put_ns; + } + + ret = single_open(file, &proc_setgroups_show, ns); + if (ret) + goto err_put_ns; + + return 0; +err_put_ns: + put_user_ns(ns); +err: + return ret; +} + +static int proc_setgroups_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct user_namespace *ns = seq->private; + int ret = single_release(inode, file); + put_user_ns(ns); + return ret; +} + +static const struct file_operations proc_setgroups_operations = { + .open = proc_setgroups_open, + .write = proc_setgroups_write, + .read = seq_read, + .llseek = seq_lseek, + .release = proc_setgroups_release, +}; #endif /* CONFIG_USER_NS */ static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, @@ -2572,6 +2623,7 @@ static const struct pid_entry tgid_base_stuff[] = { REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), + REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations), #endif #ifdef CONFIG_CHECKPOINT_RESTORE REG("timers", S_IRUGO, proc_timers_operations), @@ -2913,6 +2965,7 @@ static const struct pid_entry tid_base_stuff[] = { REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), + REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations), #endif }; diff --git a/fs/udf/dir.c b/fs/udf/dir.c index a012c51caffd..a7690b46ce0a 100644 --- a/fs/udf/dir.c +++ b/fs/udf/dir.c @@ -167,7 +167,8 @@ static int udf_readdir(struct file *file, struct dir_context *ctx) continue; } - flen = udf_get_filename(dir->i_sb, nameptr, fname, lfi); + flen = udf_get_filename(dir->i_sb, nameptr, lfi, fname, + UDF_NAME_LEN); if (!flen) continue; diff --git a/fs/udf/inode.c b/fs/udf/inode.c index c9b4df5810d5..5bc71d9a674a 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -1489,6 +1489,20 @@ reread: } inode->i_generation = iinfo->i_unique; + /* Sanity checks for files in ICB so that we don't get confused later */ + if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { + /* + * For file in ICB data is stored in allocation descriptor + * so sizes should match + */ + if (iinfo->i_lenAlloc != inode->i_size) + goto out; + /* File in ICB has to fit in there... */ + if (inode->i_size > inode->i_sb->s_blocksize - + udf_file_entry_alloc_offset(inode)) + goto out; + } + switch (fe->icbTag.fileType) { case ICBTAG_FILE_TYPE_DIRECTORY: inode->i_op = &udf_dir_inode_operations; diff --git a/fs/udf/namei.c b/fs/udf/namei.c index c12e260fd6c4..6ff19b54b51f 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -233,7 +233,8 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir, if (!lfi) continue; - flen = udf_get_filename(dir->i_sb, nameptr, fname, lfi); + flen = udf_get_filename(dir->i_sb, nameptr, lfi, fname, + UDF_NAME_LEN); if (flen && udf_match(flen, fname, child->len, child->name)) goto out_ok; } diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c index 6fb7945c1e6e..ac10ca939f26 100644 --- a/fs/udf/symlink.c +++ b/fs/udf/symlink.c @@ -30,49 +30,73 @@ #include #include "udf_i.h" -static void udf_pc_to_char(struct super_block *sb, unsigned char *from, - int fromlen, unsigned char *to) +static int udf_pc_to_char(struct super_block *sb, unsigned char *from, + int fromlen, unsigned char *to, int tolen) { struct pathComponent *pc; int elen = 0; + int comp_len; unsigned char *p = to; + /* Reserve one byte for terminating \0 */ + tolen--; while (elen < fromlen) { pc = (struct pathComponent *)(from + elen); + elen += sizeof(struct pathComponent); switch (pc->componentType) { case 1: /* * Symlink points to some place which should be agreed * upon between originator and receiver of the media. Ignore. */ - if (pc->lengthComponentIdent > 0) + if (pc->lengthComponentIdent > 0) { + elen += pc->lengthComponentIdent; break; + } /* Fall through */ case 2: + if (tolen == 0) + return -ENAMETOOLONG; p = to; *p++ = '/'; + tolen--; break; case 3: + if (tolen < 3) + return -ENAMETOOLONG; memcpy(p, "../", 3); p += 3; + tolen -= 3; break; case 4: + if (tolen < 2) + return -ENAMETOOLONG; memcpy(p, "./", 2); p += 2; + tolen -= 2; /* that would be . - just ignore */ break; case 5: - p += udf_get_filename(sb, pc->componentIdent, p, - pc->lengthComponentIdent); + elen += pc->lengthComponentIdent; + if (elen > fromlen) + return -EIO; + comp_len = udf_get_filename(sb, pc->componentIdent, + pc->lengthComponentIdent, + p, tolen); + p += comp_len; + tolen -= comp_len; + if (tolen == 0) + return -ENAMETOOLONG; *p++ = '/'; + tolen--; break; } - elen += sizeof(struct pathComponent) + pc->lengthComponentIdent; } if (p > to + 1) p[-1] = '\0'; else p[0] = '\0'; + return 0; } static int udf_symlink_filler(struct file *file, struct page *page) @@ -80,11 +104,17 @@ static int udf_symlink_filler(struct file *file, struct page *page) struct inode *inode = page->mapping->host; struct buffer_head *bh = NULL; unsigned char *symlink; - int err = -EIO; + int err; unsigned char *p = kmap(page); struct udf_inode_info *iinfo; uint32_t pos; + /* We don't support symlinks longer than one block */ + if (inode->i_size > inode->i_sb->s_blocksize) { + err = -ENAMETOOLONG; + goto out_unmap; + } + iinfo = UDF_I(inode); pos = udf_block_map(inode, 0); @@ -94,14 +124,18 @@ static int udf_symlink_filler(struct file *file, struct page *page) } else { bh = sb_bread(inode->i_sb, pos); - if (!bh) - goto out; + if (!bh) { + err = -EIO; + goto out_unlock_inode; + } symlink = bh->b_data; } - udf_pc_to_char(inode->i_sb, symlink, inode->i_size, p); + err = udf_pc_to_char(inode->i_sb, symlink, inode->i_size, p, PAGE_SIZE); brelse(bh); + if (err) + goto out_unlock_inode; up_read(&iinfo->i_data_sem); SetPageUptodate(page); @@ -109,9 +143,10 @@ static int udf_symlink_filler(struct file *file, struct page *page) unlock_page(page); return 0; -out: +out_unlock_inode: up_read(&iinfo->i_data_sem); SetPageError(page); +out_unmap: kunmap(page); unlock_page(page); return err; diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index 1cc3c993ebd0..47bb3f5ca360 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h @@ -211,7 +211,8 @@ udf_get_lb_pblock(struct super_block *sb, struct kernel_lb_addr *loc, } /* unicode.c */ -extern int udf_get_filename(struct super_block *, uint8_t *, uint8_t *, int); +extern int udf_get_filename(struct super_block *, uint8_t *, int, uint8_t *, + int); extern int udf_put_filename(struct super_block *, const uint8_t *, uint8_t *, int); extern int udf_build_ustr(struct ustr *, dstring *, int); diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c index afd470e588ff..b84fee372734 100644 --- a/fs/udf/unicode.c +++ b/fs/udf/unicode.c @@ -28,7 +28,8 @@ #include "udf_sb.h" -static int udf_translate_to_linux(uint8_t *, uint8_t *, int, uint8_t *, int); +static int udf_translate_to_linux(uint8_t *, int, uint8_t *, int, uint8_t *, + int); static int udf_char_to_ustr(struct ustr *dest, const uint8_t *src, int strlen) { @@ -333,8 +334,8 @@ try_again: return u_len + 1; } -int udf_get_filename(struct super_block *sb, uint8_t *sname, uint8_t *dname, - int flen) +int udf_get_filename(struct super_block *sb, uint8_t *sname, int slen, + uint8_t *dname, int dlen) { struct ustr *filename, *unifilename; int len = 0; @@ -347,7 +348,7 @@ int udf_get_filename(struct super_block *sb, uint8_t *sname, uint8_t *dname, if (!unifilename) goto out1; - if (udf_build_ustr_exact(unifilename, sname, flen)) + if (udf_build_ustr_exact(unifilename, sname, slen)) goto out2; if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) { @@ -366,7 +367,8 @@ int udf_get_filename(struct super_block *sb, uint8_t *sname, uint8_t *dname, } else goto out2; - len = udf_translate_to_linux(dname, filename->u_name, filename->u_len, + len = udf_translate_to_linux(dname, dlen, + filename->u_name, filename->u_len, unifilename->u_name, unifilename->u_len); out2: kfree(unifilename); @@ -403,10 +405,12 @@ int udf_put_filename(struct super_block *sb, const uint8_t *sname, #define EXT_MARK '.' #define CRC_MARK '#' #define EXT_SIZE 5 +/* Number of chars we need to store generated CRC to make filename unique */ +#define CRC_LEN 5 -static int udf_translate_to_linux(uint8_t *newName, uint8_t *udfName, - int udfLen, uint8_t *fidName, - int fidNameLen) +static int udf_translate_to_linux(uint8_t *newName, int newLen, + uint8_t *udfName, int udfLen, + uint8_t *fidName, int fidNameLen) { int index, newIndex = 0, needsCRC = 0; int extIndex = 0, newExtIndex = 0, hasExt = 0; @@ -439,7 +443,7 @@ static int udf_translate_to_linux(uint8_t *newName, uint8_t *udfName, newExtIndex = newIndex; } } - if (newIndex < 256) + if (newIndex < newLen) newName[newIndex++] = curr; else needsCRC = 1; @@ -467,13 +471,13 @@ static int udf_translate_to_linux(uint8_t *newName, uint8_t *udfName, } ext[localExtIndex++] = curr; } - maxFilenameLen = 250 - localExtIndex; + maxFilenameLen = newLen - CRC_LEN - localExtIndex; if (newIndex > maxFilenameLen) newIndex = maxFilenameLen; else newIndex = newExtIndex; - } else if (newIndex > 250) - newIndex = 250; + } else if (newIndex > newLen - CRC_LEN) + newIndex = newLen - CRC_LEN; newName[newIndex++] = CRC_MARK; valueCRC = crc_itu_t(0, fidName, fidNameLen); newName[newIndex++] = hex_asc_upper_hi(valueCRC >> 8); diff --git a/include/linux/audit.h b/include/linux/audit.h index e58fe7df8b9c..10f155b7daf6 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -47,6 +47,7 @@ struct sk_buff; struct audit_krule { int vers_ops; + u32 pflags; u32 flags; u32 listnr; u32 action; @@ -64,6 +65,9 @@ struct audit_krule { u64 prio; }; +/* Flag to indicate legacy AUDIT_LOGINUID unset usage */ +#define AUDIT_LOGINUID_LEGACY 0x1 + struct audit_field { u32 type; union { diff --git a/include/linux/cred.h b/include/linux/cred.h index b2d0820837c4..2fb2ca2127ed 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -68,6 +68,7 @@ extern void groups_free(struct group_info *); extern int set_current_groups(struct group_info *); extern void set_groups(struct cred *, struct group_info *); extern int groups_search(const struct group_info *, kgid_t); +extern bool may_setgroups(void); /* access the groups "array" with this macro */ #define GROUP_AT(gi, i) \ diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index e95372654f09..9f3579ff543d 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -17,6 +17,10 @@ struct uid_gid_map { /* 64 bytes -- 1 cache line */ } extent[UID_GID_MAP_MAX_EXTENTS]; }; +#define USERNS_SETGROUPS_ALLOWED 1UL + +#define USERNS_INIT_FLAGS USERNS_SETGROUPS_ALLOWED + struct user_namespace { struct uid_gid_map uid_map; struct uid_gid_map gid_map; @@ -27,6 +31,7 @@ struct user_namespace { kuid_t owner; kgid_t group; unsigned int proc_inum; + unsigned long flags; /* Register of per-UID persistent keyrings for this namespace */ #ifdef CONFIG_PERSISTENT_KEYRINGS @@ -63,6 +68,9 @@ extern const struct seq_operations proc_projid_seq_operations; extern ssize_t proc_uid_map_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t proc_gid_map_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t proc_projid_map_write(struct file *, const char __user *, size_t, loff_t *); +extern ssize_t proc_setgroups_write(struct file *, const char __user *, size_t, loff_t *); +extern int proc_setgroups_show(struct seq_file *m, void *v); +extern bool userns_may_setgroups(const struct user_namespace *ns); #else static inline struct user_namespace *get_user_ns(struct user_namespace *ns) @@ -87,6 +95,10 @@ static inline void put_user_ns(struct user_namespace *ns) { } +static inline bool userns_may_setgroups(const struct user_namespace *ns) +{ + return true; +} #endif #endif /* _LINUX_USER_H */ diff --git a/kernel/audit.c b/kernel/audit.c index cebb11db4d34..c6df9905f1c6 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -429,7 +429,7 @@ static void kauditd_send_skb(struct sk_buff *skb) * This function doesn't consume an skb as might be expected since it has to * copy it anyways. */ -static void kauditd_send_multicast_skb(struct sk_buff *skb) +static void kauditd_send_multicast_skb(struct sk_buff *skb, gfp_t gfp_mask) { struct sk_buff *copy; struct audit_net *aunet = net_generic(&init_net, audit_net_id); @@ -448,11 +448,11 @@ static void kauditd_send_multicast_skb(struct sk_buff *skb) * no reason for new multicast clients to continue with this * non-compliance. */ - copy = skb_copy(skb, GFP_KERNEL); + copy = skb_copy(skb, gfp_mask); if (!copy) return; - nlmsg_multicast(sock, copy, 0, AUDIT_NLGRP_READLOG, GFP_KERNEL); + nlmsg_multicast(sock, copy, 0, AUDIT_NLGRP_READLOG, gfp_mask); } /* @@ -1949,7 +1949,7 @@ void audit_log_end(struct audit_buffer *ab) struct nlmsghdr *nlh = nlmsg_hdr(ab->skb); nlh->nlmsg_len = ab->skb->len; - kauditd_send_multicast_skb(ab->skb); + kauditd_send_multicast_skb(ab->skb, ab->gfp_mask); /* * The original kaudit unicast socket sends up messages with diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 3598e13f2a65..4f68a326d92e 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -442,19 +442,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, if ((f->type == AUDIT_LOGINUID) && (f->val == AUDIT_UID_UNSET)) { f->type = AUDIT_LOGINUID_SET; f->val = 0; - } - - if ((f->type == AUDIT_PID) || (f->type == AUDIT_PPID)) { - struct pid *pid; - rcu_read_lock(); - pid = find_vpid(f->val); - if (!pid) { - rcu_read_unlock(); - err = -ESRCH; - goto exit_free; - } - f->val = pid_nr(pid); - rcu_read_unlock(); + entry->rule.pflags |= AUDIT_LOGINUID_LEGACY; } err = audit_field_valid(entry, f); @@ -630,6 +618,13 @@ static struct audit_rule_data *audit_krule_to_data(struct audit_krule *krule) data->buflen += data->values[i] = audit_pack_string(&bufp, krule->filterkey); break; + case AUDIT_LOGINUID_SET: + if (krule->pflags & AUDIT_LOGINUID_LEGACY && !f->val) { + data->fields[i] = AUDIT_LOGINUID; + data->values[i] = AUDIT_UID_UNSET; + break; + } + /* fallthrough if set */ default: data->values[i] = f->val; } @@ -646,6 +641,7 @@ static int audit_compare_rule(struct audit_krule *a, struct audit_krule *b) int i; if (a->flags != b->flags || + a->pflags != b->pflags || a->listnr != b->listnr || a->action != b->action || a->field_count != b->field_count) @@ -764,6 +760,7 @@ struct audit_entry *audit_dupe_rule(struct audit_krule *old) new = &entry->rule; new->vers_ops = old->vers_ops; new->flags = old->flags; + new->pflags = old->pflags; new->listnr = old->listnr; new->action = old->action; for (i = 0; i < AUDIT_BITMASK_SIZE; i++) diff --git a/kernel/groups.c b/kernel/groups.c index 451698f86cfa..664411f171b5 100644 --- a/kernel/groups.c +++ b/kernel/groups.c @@ -6,6 +6,7 @@ #include #include #include +#include #include /* init to 2 - one for init_task, one to ensure it is never freed */ @@ -213,6 +214,14 @@ out: return i; } +bool may_setgroups(void) +{ + struct user_namespace *user_ns = current_user_ns(); + + return ns_capable(user_ns, CAP_SETGID) && + userns_may_setgroups(user_ns); +} + /* * SMP: Our groups are copy-on-write. We can set them safely * without another task interfering. @@ -223,7 +232,7 @@ SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist) struct group_info *group_info; int retval; - if (!ns_capable(current_user_ns(), CAP_SETGID)) + if (!may_setgroups()) return -EPERM; if ((unsigned)gidsetsize > NGROUPS_MAX) return -EINVAL; diff --git a/kernel/pid.c b/kernel/pid.c index 9b9a26698144..82430c858d69 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -341,6 +341,8 @@ out: out_unlock: spin_unlock_irq(&pidmap_lock); + put_pid_ns(ns); + out_free: while (++i <= ns->level) free_pidmap(pid->numbers + i); diff --git a/kernel/uid16.c b/kernel/uid16.c index 602e5bbbceff..d58cc4d8f0d1 100644 --- a/kernel/uid16.c +++ b/kernel/uid16.c @@ -176,7 +176,7 @@ SYSCALL_DEFINE2(setgroups16, int, gidsetsize, old_gid_t __user *, grouplist) struct group_info *group_info; int retval; - if (!ns_capable(current_user_ns(), CAP_SETGID)) + if (!may_setgroups()) return -EPERM; if ((unsigned)gidsetsize > NGROUPS_MAX) return -EINVAL; diff --git a/kernel/user.c b/kernel/user.c index 4efa39350e44..2d09940c9632 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -51,6 +51,7 @@ struct user_namespace init_user_ns = { .owner = GLOBAL_ROOT_UID, .group = GLOBAL_ROOT_GID, .proc_inum = PROC_USER_INIT_INO, + .flags = USERNS_INIT_FLAGS, #ifdef CONFIG_PERSISTENT_KEYRINGS .persistent_keyring_register_sem = __RWSEM_INITIALIZER(init_user_ns.persistent_keyring_register_sem), diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index aa312b0dc3ec..a2e37c5d2f63 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -24,6 +24,7 @@ #include static struct kmem_cache *user_ns_cachep __read_mostly; +static DEFINE_MUTEX(userns_state_mutex); static bool new_idmap_permitted(const struct file *file, struct user_namespace *ns, int cap_setid, @@ -99,6 +100,11 @@ int create_user_ns(struct cred *new) ns->owner = owner; ns->group = group; + /* Inherit USERNS_SETGROUPS_ALLOWED from our parent */ + mutex_lock(&userns_state_mutex); + ns->flags = parent_ns->flags; + mutex_unlock(&userns_state_mutex); + set_cred_user_ns(new, ns); #ifdef CONFIG_PERSISTENT_KEYRINGS @@ -583,9 +589,6 @@ static bool mappings_overlap(struct uid_gid_map *new_map, return false; } - -static DEFINE_MUTEX(id_map_mutex); - static ssize_t map_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos, int cap_setid, @@ -602,7 +605,7 @@ static ssize_t map_write(struct file *file, const char __user *buf, ssize_t ret = -EINVAL; /* - * The id_map_mutex serializes all writes to any given map. + * The userns_state_mutex serializes all writes to any given map. * * Any map is only ever written once. * @@ -620,7 +623,7 @@ static ssize_t map_write(struct file *file, const char __user *buf, * order and smp_rmb() is guaranteed that we don't have crazy * architectures returning stale data. */ - mutex_lock(&id_map_mutex); + mutex_lock(&userns_state_mutex); ret = -EPERM; /* Only allow one successful write to the map */ @@ -750,7 +753,7 @@ static ssize_t map_write(struct file *file, const char __user *buf, *ppos = count; ret = count; out: - mutex_unlock(&id_map_mutex); + mutex_unlock(&userns_state_mutex); if (page) free_page(page); return ret; @@ -812,16 +815,21 @@ static bool new_idmap_permitted(const struct file *file, struct user_namespace *ns, int cap_setid, struct uid_gid_map *new_map) { - /* Allow mapping to your own filesystem ids */ - if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1)) { + const struct cred *cred = file->f_cred; + /* Don't allow mappings that would allow anything that wouldn't + * be allowed without the establishment of unprivileged mappings. + */ + if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1) && + uid_eq(ns->owner, cred->euid)) { u32 id = new_map->extent[0].lower_first; if (cap_setid == CAP_SETUID) { kuid_t uid = make_kuid(ns->parent, id); - if (uid_eq(uid, file->f_cred->fsuid)) + if (uid_eq(uid, cred->euid)) return true; } else if (cap_setid == CAP_SETGID) { kgid_t gid = make_kgid(ns->parent, id); - if (gid_eq(gid, file->f_cred->fsgid)) + if (!(ns->flags & USERNS_SETGROUPS_ALLOWED) && + gid_eq(gid, cred->egid)) return true; } } @@ -841,6 +849,100 @@ static bool new_idmap_permitted(const struct file *file, return false; } +int proc_setgroups_show(struct seq_file *seq, void *v) +{ + struct user_namespace *ns = seq->private; + unsigned long userns_flags = ACCESS_ONCE(ns->flags); + + seq_printf(seq, "%s\n", + (userns_flags & USERNS_SETGROUPS_ALLOWED) ? + "allow" : "deny"); + return 0; +} + +ssize_t proc_setgroups_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct seq_file *seq = file->private_data; + struct user_namespace *ns = seq->private; + char kbuf[8], *pos; + bool setgroups_allowed; + ssize_t ret; + + /* Only allow a very narrow range of strings to be written */ + ret = -EINVAL; + if ((*ppos != 0) || (count >= sizeof(kbuf))) + goto out; + + /* What was written? */ + ret = -EFAULT; + if (copy_from_user(kbuf, buf, count)) + goto out; + kbuf[count] = '\0'; + pos = kbuf; + + /* What is being requested? */ + ret = -EINVAL; + if (strncmp(pos, "allow", 5) == 0) { + pos += 5; + setgroups_allowed = true; + } + else if (strncmp(pos, "deny", 4) == 0) { + pos += 4; + setgroups_allowed = false; + } + else + goto out; + + /* Verify there is not trailing junk on the line */ + pos = skip_spaces(pos); + if (*pos != '\0') + goto out; + + ret = -EPERM; + mutex_lock(&userns_state_mutex); + if (setgroups_allowed) { + /* Enabling setgroups after setgroups has been disabled + * is not allowed. + */ + if (!(ns->flags & USERNS_SETGROUPS_ALLOWED)) + goto out_unlock; + } else { + /* Permanently disabling setgroups after setgroups has + * been enabled by writing the gid_map is not allowed. + */ + if (ns->gid_map.nr_extents != 0) + goto out_unlock; + ns->flags &= ~USERNS_SETGROUPS_ALLOWED; + } + mutex_unlock(&userns_state_mutex); + + /* Report a successful write */ + *ppos = count; + ret = count; +out: + return ret; +out_unlock: + mutex_unlock(&userns_state_mutex); + goto out; +} + +bool userns_may_setgroups(const struct user_namespace *ns) +{ + bool allowed; + + mutex_lock(&userns_state_mutex); + /* It is not safe to use setgroups until a gid mapping in + * the user namespace has been established. + */ + allowed = ns->gid_map.nr_extents != 0; + /* Is setgroups allowed? */ + allowed = allowed && (ns->flags & USERNS_SETGROUPS_ALLOWED); + mutex_unlock(&userns_state_mutex); + + return allowed; +} + static void *userns_get(struct task_struct *task) { struct user_namespace *user_ns; diff --git a/mm/cma.c b/mm/cma.c index fde706e1284f..8e9ec13d31db 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -215,9 +215,21 @@ int __init cma_declare_contiguous(phys_addr_t base, bool fixed, struct cma **res_cma) { phys_addr_t memblock_end = memblock_end_of_DRAM(); - phys_addr_t highmem_start = __pa(high_memory); + phys_addr_t highmem_start; int ret = 0; +#ifdef CONFIG_X86 + /* + * high_memory isn't direct mapped memory so retrieving its physical + * address isn't appropriate. But it would be useful to check the + * physical address of the highmem boundary so it's justfiable to get + * the physical address from it. On x86 there is a validation check for + * this case, so the following workaround is needed to avoid it. + */ + highmem_start = __pa_nodebug(high_memory); +#else + highmem_start = __pa(high_memory); +#endif pr_debug("%s(size %pa, base %pa, limit %pa alignment %pa)\n", __func__, &size, &base, &limit, &alignment); diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 4c74e8da64b9..5ce13a76d0fd 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -929,6 +929,21 @@ ieee80211_vif_chanctx_reservation_complete(struct ieee80211_sub_if_data *sdata) } } +static void +ieee80211_vif_update_chandef(struct ieee80211_sub_if_data *sdata, + const struct cfg80211_chan_def *chandef) +{ + struct ieee80211_sub_if_data *vlan; + + sdata->vif.bss_conf.chandef = *chandef; + + if (sdata->vif.type != NL80211_IFTYPE_AP) + return; + + list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) + vlan->vif.bss_conf.chandef = *chandef; +} + static int ieee80211_vif_use_reserved_reassign(struct ieee80211_sub_if_data *sdata) { @@ -991,7 +1006,7 @@ ieee80211_vif_use_reserved_reassign(struct ieee80211_sub_if_data *sdata) if (sdata->vif.bss_conf.chandef.width != sdata->reserved_chandef.width) changed = BSS_CHANGED_BANDWIDTH; - sdata->vif.bss_conf.chandef = sdata->reserved_chandef; + ieee80211_vif_update_chandef(sdata, &sdata->reserved_chandef); if (changed) ieee80211_bss_info_change_notify(sdata, changed); @@ -1333,7 +1348,7 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local) sdata->reserved_chandef.width) changed = BSS_CHANGED_BANDWIDTH; - sdata->vif.bss_conf.chandef = sdata->reserved_chandef; + ieee80211_vif_update_chandef(sdata, &sdata->reserved_chandef); if (changed) ieee80211_bss_info_change_notify(sdata, changed); @@ -1504,7 +1519,7 @@ int ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata, goto out; } - sdata->vif.bss_conf.chandef = *chandef; + ieee80211_vif_update_chandef(sdata, chandef); ret = ieee80211_assign_vif_chanctx(sdata, ctx); if (ret) { @@ -1646,7 +1661,7 @@ int ieee80211_vif_change_bandwidth(struct ieee80211_sub_if_data *sdata, break; } - sdata->vif.bss_conf.chandef = *chandef; + ieee80211_vif_update_chandef(sdata, chandef); ieee80211_recalc_chanctx_chantype(local, ctx); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 653f5eb07a27..eeae0abd01de 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -511,6 +511,7 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) sdata->vif.cab_queue = master->vif.cab_queue; memcpy(sdata->vif.hw_queue, master->vif.hw_queue, sizeof(sdata->vif.hw_queue)); + sdata->vif.bss_conf.chandef = master->vif.bss_conf.chandef; break; } case NL80211_IFTYPE_AP: diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 4712150dc210..d66c6443164c 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -647,7 +647,7 @@ void ieee80211_free_sta_keys(struct ieee80211_local *local, int i; mutex_lock(&local->key_mtx); - for (i = 0; i < NUM_DEFAULT_KEYS; i++) { + for (i = 0; i < ARRAY_SIZE(sta->gtk); i++) { key = key_mtx_dereference(local, sta->gtk[i]); if (!key) continue; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 93af0f1c9d99..da1f639ecfb6 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -174,6 +174,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, if (!(ht_cap->cap_info & cpu_to_le16(IEEE80211_HT_CAP_SUP_WIDTH_20_40))) { ret = IEEE80211_STA_DISABLE_40MHZ; + vht_chandef = *chandef; goto out; } diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index a37f9af634cb..e60da9a062c2 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1678,14 +1678,14 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) sc = le16_to_cpu(hdr->seq_ctrl); frag = sc & IEEE80211_SCTL_FRAG; - if (likely(!ieee80211_has_morefrags(fc) && frag == 0)) - goto out; - if (is_multicast_ether_addr(hdr->addr1)) { rx->local->dot11MulticastReceivedFrameCount++; - goto out; + goto out_no_led; } + if (likely(!ieee80211_has_morefrags(fc) && frag == 0)) + goto out; + I802_DEBUG_INC(rx->local->rx_handlers_fragments); if (skb_linearize(rx->skb)) @@ -1776,9 +1776,10 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) status->rx_flags |= IEEE80211_RX_FRAGMENTED; out: + ieee80211_led_rx(rx->local); + out_no_led: if (rx->sta) rx->sta->rx_packets++; - ieee80211_led_rx(rx->local); return RX_CONTINUE; } diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c index db9675db1026..7bed4ad7cd76 100644 --- a/security/keys/encrypted-keys/encrypted.c +++ b/security/keys/encrypted-keys/encrypted.c @@ -1017,10 +1017,13 @@ static int __init init_encrypted(void) ret = encrypted_shash_alloc(); if (ret < 0) return ret; + ret = aes_get_sizes(); + if (ret < 0) + goto out; ret = register_key_type(&key_type_encrypted); if (ret < 0) goto out; - return aes_get_sizes(); + return 0; out: encrypted_shash_release(); return ret; diff --git a/tools/testing/selftests/mount/unprivileged-remount-test.c b/tools/testing/selftests/mount/unprivileged-remount-test.c index 1b3ff2fda4d0..517785052f1c 100644 --- a/tools/testing/selftests/mount/unprivileged-remount-test.c +++ b/tools/testing/selftests/mount/unprivileged-remount-test.c @@ -6,6 +6,8 @@ #include #include #include +#include +#include #include #include #include @@ -32,11 +34,14 @@ # define CLONE_NEWPID 0x20000000 #endif +#ifndef MS_REC +# define MS_REC 16384 +#endif #ifndef MS_RELATIME -#define MS_RELATIME (1 << 21) +# define MS_RELATIME (1 << 21) #endif #ifndef MS_STRICTATIME -#define MS_STRICTATIME (1 << 24) +# define MS_STRICTATIME (1 << 24) #endif static void die(char *fmt, ...) @@ -48,17 +53,14 @@ static void die(char *fmt, ...) exit(EXIT_FAILURE); } -static void write_file(char *filename, char *fmt, ...) +static void vmaybe_write_file(bool enoent_ok, char *filename, char *fmt, va_list ap) { char buf[4096]; int fd; ssize_t written; int buf_len; - va_list ap; - va_start(ap, fmt); buf_len = vsnprintf(buf, sizeof(buf), fmt, ap); - va_end(ap); if (buf_len < 0) { die("vsnprintf failed: %s\n", strerror(errno)); @@ -69,6 +71,8 @@ static void write_file(char *filename, char *fmt, ...) fd = open(filename, O_WRONLY); if (fd < 0) { + if ((errno == ENOENT) && enoent_ok) + return; die("open of %s failed: %s\n", filename, strerror(errno)); } @@ -87,6 +91,65 @@ static void write_file(char *filename, char *fmt, ...) } } +static void maybe_write_file(char *filename, char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vmaybe_write_file(true, filename, fmt, ap); + va_end(ap); + +} + +static void write_file(char *filename, char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vmaybe_write_file(false, filename, fmt, ap); + va_end(ap); + +} + +static int read_mnt_flags(const char *path) +{ + int ret; + struct statvfs stat; + int mnt_flags; + + ret = statvfs(path, &stat); + if (ret != 0) { + die("statvfs of %s failed: %s\n", + path, strerror(errno)); + } + if (stat.f_flag & ~(ST_RDONLY | ST_NOSUID | ST_NODEV | \ + ST_NOEXEC | ST_NOATIME | ST_NODIRATIME | ST_RELATIME | \ + ST_SYNCHRONOUS | ST_MANDLOCK)) { + die("Unrecognized mount flags\n"); + } + mnt_flags = 0; + if (stat.f_flag & ST_RDONLY) + mnt_flags |= MS_RDONLY; + if (stat.f_flag & ST_NOSUID) + mnt_flags |= MS_NOSUID; + if (stat.f_flag & ST_NODEV) + mnt_flags |= MS_NODEV; + if (stat.f_flag & ST_NOEXEC) + mnt_flags |= MS_NOEXEC; + if (stat.f_flag & ST_NOATIME) + mnt_flags |= MS_NOATIME; + if (stat.f_flag & ST_NODIRATIME) + mnt_flags |= MS_NODIRATIME; + if (stat.f_flag & ST_RELATIME) + mnt_flags |= MS_RELATIME; + if (stat.f_flag & ST_SYNCHRONOUS) + mnt_flags |= MS_SYNCHRONOUS; + if (stat.f_flag & ST_MANDLOCK) + mnt_flags |= ST_MANDLOCK; + + return mnt_flags; +} + static void create_and_enter_userns(void) { uid_t uid; @@ -100,13 +163,10 @@ static void create_and_enter_userns(void) strerror(errno)); } + maybe_write_file("/proc/self/setgroups", "deny"); write_file("/proc/self/uid_map", "0 %d 1", uid); write_file("/proc/self/gid_map", "0 %d 1", gid); - if (setgroups(0, NULL) != 0) { - die("setgroups failed: %s\n", - strerror(errno)); - } if (setgid(0) != 0) { die ("setgid(0) failed %s\n", strerror(errno)); @@ -118,7 +178,8 @@ static void create_and_enter_userns(void) } static -bool test_unpriv_remount(int mount_flags, int remount_flags, int invalid_flags) +bool test_unpriv_remount(const char *fstype, const char *mount_options, + int mount_flags, int remount_flags, int invalid_flags) { pid_t child; @@ -151,9 +212,11 @@ bool test_unpriv_remount(int mount_flags, int remount_flags, int invalid_flags) strerror(errno)); } - if (mount("testing", "/tmp", "ramfs", mount_flags, NULL) != 0) { - die("mount of /tmp failed: %s\n", - strerror(errno)); + if (mount("testing", "/tmp", fstype, mount_flags, mount_options) != 0) { + die("mount of %s with options '%s' on /tmp failed: %s\n", + fstype, + mount_options? mount_options : "", + strerror(errno)); } create_and_enter_userns(); @@ -181,62 +244,127 @@ bool test_unpriv_remount(int mount_flags, int remount_flags, int invalid_flags) static bool test_unpriv_remount_simple(int mount_flags) { - return test_unpriv_remount(mount_flags, mount_flags, 0); + return test_unpriv_remount("ramfs", NULL, mount_flags, mount_flags, 0); } static bool test_unpriv_remount_atime(int mount_flags, int invalid_flags) { - return test_unpriv_remount(mount_flags, mount_flags, invalid_flags); + return test_unpriv_remount("ramfs", NULL, mount_flags, mount_flags, + invalid_flags); +} + +static bool test_priv_mount_unpriv_remount(void) +{ + pid_t child; + int ret; + const char *orig_path = "/dev"; + const char *dest_path = "/tmp"; + int orig_mnt_flags, remount_mnt_flags; + + child = fork(); + if (child == -1) { + die("fork failed: %s\n", + strerror(errno)); + } + if (child != 0) { /* parent */ + pid_t pid; + int status; + pid = waitpid(child, &status, 0); + if (pid == -1) { + die("waitpid failed: %s\n", + strerror(errno)); + } + if (pid != child) { + die("waited for %d got %d\n", + child, pid); + } + if (!WIFEXITED(status)) { + die("child did not terminate cleanly\n"); + } + return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false; + } + + orig_mnt_flags = read_mnt_flags(orig_path); + + create_and_enter_userns(); + ret = unshare(CLONE_NEWNS); + if (ret != 0) { + die("unshare(CLONE_NEWNS) failed: %s\n", + strerror(errno)); + } + + ret = mount(orig_path, dest_path, "bind", MS_BIND | MS_REC, NULL); + if (ret != 0) { + die("recursive bind mount of %s onto %s failed: %s\n", + orig_path, dest_path, strerror(errno)); + } + + ret = mount(dest_path, dest_path, "none", + MS_REMOUNT | MS_BIND | orig_mnt_flags , NULL); + if (ret != 0) { + /* system("cat /proc/self/mounts"); */ + die("remount of /tmp failed: %s\n", + strerror(errno)); + } + + remount_mnt_flags = read_mnt_flags(dest_path); + if (orig_mnt_flags != remount_mnt_flags) { + die("Mount flags unexpectedly changed during remount of %s originally mounted on %s\n", + dest_path, orig_path); + } + exit(EXIT_SUCCESS); } int main(int argc, char **argv) { - if (!test_unpriv_remount_simple(MS_RDONLY|MS_NODEV)) { + if (!test_unpriv_remount_simple(MS_RDONLY)) { die("MS_RDONLY malfunctions\n"); } - if (!test_unpriv_remount_simple(MS_NODEV)) { + if (!test_unpriv_remount("devpts", "newinstance", MS_NODEV, MS_NODEV, 0)) { die("MS_NODEV malfunctions\n"); } - if (!test_unpriv_remount_simple(MS_NOSUID|MS_NODEV)) { + if (!test_unpriv_remount_simple(MS_NOSUID)) { die("MS_NOSUID malfunctions\n"); } - if (!test_unpriv_remount_simple(MS_NOEXEC|MS_NODEV)) { + if (!test_unpriv_remount_simple(MS_NOEXEC)) { die("MS_NOEXEC malfunctions\n"); } - if (!test_unpriv_remount_atime(MS_RELATIME|MS_NODEV, - MS_NOATIME|MS_NODEV)) + if (!test_unpriv_remount_atime(MS_RELATIME, + MS_NOATIME)) { die("MS_RELATIME malfunctions\n"); } - if (!test_unpriv_remount_atime(MS_STRICTATIME|MS_NODEV, - MS_NOATIME|MS_NODEV)) + if (!test_unpriv_remount_atime(MS_STRICTATIME, + MS_NOATIME)) { die("MS_STRICTATIME malfunctions\n"); } - if (!test_unpriv_remount_atime(MS_NOATIME|MS_NODEV, - MS_STRICTATIME|MS_NODEV)) + if (!test_unpriv_remount_atime(MS_NOATIME, + MS_STRICTATIME)) { - die("MS_RELATIME malfunctions\n"); + die("MS_NOATIME malfunctions\n"); } - if (!test_unpriv_remount_atime(MS_RELATIME|MS_NODIRATIME|MS_NODEV, - MS_NOATIME|MS_NODEV)) + if (!test_unpriv_remount_atime(MS_RELATIME|MS_NODIRATIME, + MS_NOATIME)) { - die("MS_RELATIME malfunctions\n"); + die("MS_RELATIME|MS_NODIRATIME malfunctions\n"); } - if (!test_unpriv_remount_atime(MS_STRICTATIME|MS_NODIRATIME|MS_NODEV, - MS_NOATIME|MS_NODEV)) + if (!test_unpriv_remount_atime(MS_STRICTATIME|MS_NODIRATIME, + MS_NOATIME)) { - die("MS_RELATIME malfunctions\n"); + die("MS_STRICTATIME|MS_NODIRATIME malfunctions\n"); } - if (!test_unpriv_remount_atime(MS_NOATIME|MS_NODIRATIME|MS_NODEV, - MS_STRICTATIME|MS_NODEV)) + if (!test_unpriv_remount_atime(MS_NOATIME|MS_NODIRATIME, + MS_STRICTATIME)) { - die("MS_RELATIME malfunctions\n"); + die("MS_NOATIME|MS_DIRATIME malfunctions\n"); } - if (!test_unpriv_remount(MS_STRICTATIME|MS_NODEV, MS_NODEV, - MS_NOATIME|MS_NODEV)) + if (!test_unpriv_remount("ramfs", NULL, MS_STRICTATIME, 0, MS_NOATIME)) { die("Default atime malfunctions\n"); } + if (!test_priv_mount_unpriv_remount()) { + die("Mount flags unexpectedly changed after remount\n"); + } return EXIT_SUCCESS; }