diff --git a/Documentation/lzo.txt b/Documentation/lzo.txt new file mode 100644 index 000000000000..ea45dd3901e3 --- /dev/null +++ b/Documentation/lzo.txt @@ -0,0 +1,164 @@ + +LZO stream format as understood by Linux's LZO decompressor +=========================================================== + +Introduction + + This is not a specification. No specification seems to be publicly available + for the LZO stream format. This document describes what input format the LZO + decompressor as implemented in the Linux kernel understands. The file subject + of this analysis is lib/lzo/lzo1x_decompress_safe.c. No analysis was made on + the compressor nor on any other implementations though it seems likely that + the format matches the standard one. The purpose of this document is to + better understand what the code does in order to propose more efficient fixes + for future bug reports. + +Description + + The stream is composed of a series of instructions, operands, and data. The + instructions consist in a few bits representing an opcode, and bits forming + the operands for the instruction, whose size and position depend on the + opcode and on the number of literals copied by previous instruction. The + operands are used to indicate : + + - a distance when copying data from the dictionary (past output buffer) + - a length (number of bytes to copy from dictionary) + - the number of literals to copy, which is retained in variable "state" + as a piece of information for next instructions. + + Optionally depending on the opcode and operands, extra data may follow. These + extra data can be a complement for the operand (eg: a length or a distance + encoded on larger values), or a literal to be copied to the output buffer. + + The first byte of the block follows a different encoding from other bytes, it + seems to be optimized for literal use only, since there is no dictionary yet + prior to that byte. + + Lengths are always encoded on a variable size starting with a small number + of bits in the operand. If the number of bits isn't enough to represent the + length, up to 255 may be added in increments by consuming more bytes with a + rate of at most 255 per extra byte (thus the compression ratio cannot exceed + around 255:1). The variable length encoding using #bits is always the same : + + length = byte & ((1 << #bits) - 1) + if (!length) { + length = ((1 << #bits) - 1) + length += 255*(number of zero bytes) + length += first-non-zero-byte + } + length += constant (generally 2 or 3) + + For references to the dictionary, distances are relative to the output + pointer. Distances are encoded using very few bits belonging to certain + ranges, resulting in multiple copy instructions using different encodings. + Certain encodings involve one extra byte, others involve two extra bytes + forming a little-endian 16-bit quantity (marked LE16 below). + + After any instruction except the large literal copy, 0, 1, 2 or 3 literals + are copied before starting the next instruction. The number of literals that + were copied may change the meaning and behaviour of the next instruction. In + practice, only one instruction needs to know whether 0, less than 4, or more + literals were copied. This is the information stored in the variable + in this implementation. This number of immediate literals to be copied is + generally encoded in the last two bits of the instruction but may also be + taken from the last two bits of an extra operand (eg: distance). + + End of stream is declared when a block copy of distance 0 is seen. Only one + instruction may encode this distance (0001HLLL), it takes one LE16 operand + for the distance, thus requiring 3 bytes. + + IMPORTANT NOTE : in the code some length checks are missing because certain + instructions are called under the assumption that a certain number of bytes + follow because it has already been garanteed before parsing the instructions. + They just have to "refill" this credit if they consume extra bytes. This is + an implementation design choice independant on the algorithm or encoding. + +Byte sequences + + First byte encoding : + + 0..17 : follow regular instruction encoding, see below. It is worth + noting that codes 16 and 17 will represent a block copy from + the dictionary which is empty, and that they will always be + invalid at this place. + + 18..21 : copy 0..3 literals + state = (byte - 17) = 0..3 [ copy literals ] + skip byte + + 22..255 : copy literal string + length = (byte - 17) = 4..238 + state = 4 [ don't copy extra literals ] + skip byte + + Instruction encoding : + + 0 0 0 0 X X X X (0..15) + Depends on the number of literals copied by the last instruction. + If last instruction did not copy any literal (state == 0), this + encoding will be a copy of 4 or more literal, and must be interpreted + like this : + + 0 0 0 0 L L L L (0..15) : copy long literal string + length = 3 + (L ?: 15 + (zero_bytes * 255) + non_zero_byte) + state = 4 (no extra literals are copied) + + If last instruction used to copy between 1 to 3 literals (encoded in + the instruction's opcode or distance), the instruction is a copy of a + 2-byte block from the dictionary within a 1kB distance. It is worth + noting that this instruction provides little savings since it uses 2 + bytes to encode a copy of 2 other bytes but it encodes the number of + following literals for free. It must be interpreted like this : + + 0 0 0 0 D D S S (0..15) : copy 2 bytes from <= 1kB distance + length = 2 + state = S (copy S literals after this block) + Always followed by exactly one byte : H H H H H H H H + distance = (H << 2) + D + 1 + + If last instruction used to copy 4 or more literals (as detected by + state == 4), the instruction becomes a copy of a 3-byte block from the + dictionary from a 2..3kB distance, and must be interpreted like this : + + 0 0 0 0 D D S S (0..15) : copy 3 bytes from 2..3 kB distance + length = 3 + state = S (copy S literals after this block) + Always followed by exactly one byte : H H H H H H H H + distance = (H << 2) + D + 2049 + + 0 0 0 1 H L L L (16..31) + Copy of a block within 16..48kB distance (preferably less than 10B) + length = 2 + (L ?: 7 + (zero_bytes * 255) + non_zero_byte) + Always followed by exactly one LE16 : D D D D D D D D : D D D D D D S S + distance = 16384 + (H << 14) + D + state = S (copy S literals after this block) + End of stream is reached if distance == 16384 + + 0 0 1 L L L L L (32..63) + Copy of small block within 16kB distance (preferably less than 34B) + length = 2 + (L ?: 31 + (zero_bytes * 255) + non_zero_byte) + Always followed by exactly one LE16 : D D D D D D D D : D D D D D D S S + distance = D + 1 + state = S (copy S literals after this block) + + 0 1 L D D D S S (64..127) + Copy 3-4 bytes from block within 2kB distance + state = S (copy S literals after this block) + length = 3 + L + Always followed by exactly one byte : H H H H H H H H + distance = (H << 3) + D + 1 + + 1 L L D D D S S (128..255) + Copy 5-8 bytes from block within 2kB distance + state = S (copy S literals after this block) + length = 5 + L + Always followed by exactly one byte : H H H H H H H H + distance = (H << 3) + D + 1 + +Authors + + This document was written by Willy Tarreau on 2014/07/19 during an + analysis of the decompression code available in Linux 3.16-rc5. The code is + tricky, it is possible that this document contains mistakes or that a few + corner cases were overlooked. In any case, please report any doubt, fix, or + proposed updates to the author(s) so that the document can be updated. diff --git a/Documentation/virtual/kvm/mmu.txt b/Documentation/virtual/kvm/mmu.txt index 290894176142..53838d9c6295 100644 --- a/Documentation/virtual/kvm/mmu.txt +++ b/Documentation/virtual/kvm/mmu.txt @@ -425,6 +425,20 @@ fault through the slow path. Since only 19 bits are used to store generation-number on mmio spte, all pages are zapped when there is an overflow. +Unfortunately, a single memory access might access kvm_memslots(kvm) multiple +times, the last one happening when the generation number is retrieved and +stored into the MMIO spte. Thus, the MMIO spte might be created based on +out-of-date information, but with an up-to-date generation number. + +To avoid this, the generation number is incremented again after synchronize_srcu +returns; thus, the low bit of kvm_memslots(kvm)->generation is only 1 during a +memslot update, while some SRCU readers might be using the old copy. We do not +want to use an MMIO sptes created with an odd generation number, and we can do +this without losing a bit in the MMIO spte. The low bit of the generation +is not stored in MMIO spte, and presumed zero when it is extracted out of the +spte. If KVM is unlucky and creates an MMIO spte while the low bit is 1, +the next access to the spte will always be a cache miss. + Further reading =============== diff --git a/Makefile b/Makefile index 46694098725d..390afde6538e 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 3 PATCHLEVEL = 17 -SUBLEVEL = 1 +SUBLEVEL = 2 EXTRAVERSION = NAME = Shuffling Zombie Juror diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile index b8c5cd3ddeb9..e6aa6e77a3ec 100644 --- a/arch/arm/boot/dts/Makefile +++ b/arch/arm/boot/dts/Makefile @@ -144,8 +144,8 @@ dtb-$(CONFIG_MACH_KIRKWOOD) += kirkwood-b3.dtb \ kirkwood-openrd-client.dtb \ kirkwood-openrd-ultimate.dtb \ kirkwood-rd88f6192.dtb \ - kirkwood-rd88f6281-a0.dtb \ - kirkwood-rd88f6281-a1.dtb \ + kirkwood-rd88f6281-z0.dtb \ + kirkwood-rd88f6281-a.dtb \ kirkwood-rs212.dtb \ kirkwood-rs409.dtb \ kirkwood-rs411.dtb \ diff --git a/arch/arm/boot/dts/armada-370-netgear-rn102.dts b/arch/arm/boot/dts/armada-370-netgear-rn102.dts index d6d572e5af32..285524fb915e 100644 --- a/arch/arm/boot/dts/armada-370-netgear-rn102.dts +++ b/arch/arm/boot/dts/armada-370-netgear-rn102.dts @@ -143,6 +143,10 @@ marvell,nand-enable-arbiter; nand-on-flash-bbt; + /* Use Hardware BCH ECC */ + nand-ecc-strength = <4>; + nand-ecc-step-size = <512>; + partition@0 { label = "u-boot"; reg = <0x0000000 0x180000>; /* 1.5MB */ diff --git a/arch/arm/boot/dts/armada-370-netgear-rn104.dts b/arch/arm/boot/dts/armada-370-netgear-rn104.dts index c5fe8b5dcdc7..4ec1ce561d34 100644 --- a/arch/arm/boot/dts/armada-370-netgear-rn104.dts +++ b/arch/arm/boot/dts/armada-370-netgear-rn104.dts @@ -145,6 +145,10 @@ marvell,nand-enable-arbiter; nand-on-flash-bbt; + /* Use Hardware BCH ECC */ + nand-ecc-strength = <4>; + nand-ecc-step-size = <512>; + partition@0 { label = "u-boot"; reg = <0x0000000 0x180000>; /* 1.5MB */ diff --git a/arch/arm/boot/dts/armada-xp-netgear-rn2120.dts b/arch/arm/boot/dts/armada-xp-netgear-rn2120.dts index 0cf999abc4ed..c5ed85a70ed9 100644 --- a/arch/arm/boot/dts/armada-xp-netgear-rn2120.dts +++ b/arch/arm/boot/dts/armada-xp-netgear-rn2120.dts @@ -223,6 +223,10 @@ marvell,nand-enable-arbiter; nand-on-flash-bbt; + /* Use Hardware BCH ECC */ + nand-ecc-strength = <4>; + nand-ecc-step-size = <512>; + partition@0 { label = "u-boot"; reg = <0x0000000 0x180000>; /* 1.5MB */ diff --git a/arch/arm/boot/dts/at91sam9263.dtsi b/arch/arm/boot/dts/at91sam9263.dtsi index bb23c2d33cf8..5e95a8053445 100644 --- a/arch/arm/boot/dts/at91sam9263.dtsi +++ b/arch/arm/boot/dts/at91sam9263.dtsi @@ -834,6 +834,7 @@ compatible = "atmel,hsmci"; reg = <0xfff80000 0x600>; interrupts = <10 IRQ_TYPE_LEVEL_HIGH 0>; + pinctrl-names = "default"; #address-cells = <1>; #size-cells = <0>; clocks = <&mci0_clk>; @@ -845,6 +846,7 @@ compatible = "atmel,hsmci"; reg = <0xfff84000 0x600>; interrupts = <11 IRQ_TYPE_LEVEL_HIGH 0>; + pinctrl-names = "default"; #address-cells = <1>; #size-cells = <0>; clocks = <&mci1_clk>; diff --git a/arch/arm/boot/dts/imx28-evk.dts b/arch/arm/boot/dts/imx28-evk.dts index e4cc44c98585..41a983405e7d 100644 --- a/arch/arm/boot/dts/imx28-evk.dts +++ b/arch/arm/boot/dts/imx28-evk.dts @@ -193,7 +193,6 @@ i2c0: i2c@80058000 { pinctrl-names = "default"; pinctrl-0 = <&i2c0_pins_a>; - clock-frequency = <400000>; status = "okay"; sgtl5000: codec@0a { diff --git a/arch/arm/boot/dts/kirkwood-mv88f6281gtw-ge.dts b/arch/arm/boot/dts/kirkwood-mv88f6281gtw-ge.dts index 8f76d28759a3..f82827d6fcff 100644 --- a/arch/arm/boot/dts/kirkwood-mv88f6281gtw-ge.dts +++ b/arch/arm/boot/dts/kirkwood-mv88f6281gtw-ge.dts @@ -123,11 +123,11 @@ dsa@0 { compatible = "marvell,dsa"; - #address-cells = <2>; + #address-cells = <1>; #size-cells = <0>; - dsa,ethernet = <ð0>; - dsa,mii-bus = <ðphy0>; + dsa,ethernet = <ð0port>; + dsa,mii-bus = <&mdio>; switch@0 { #address-cells = <1>; @@ -169,17 +169,13 @@ &mdio { status = "okay"; - - ethphy0: ethernet-phy@ff { - reg = <0xff>; /* No phy attached */ - speed = <1000>; - duplex = <1>; - }; }; ð0 { status = "okay"; + ethernet0-port@0 { - phy-handle = <ðphy0>; + speed = <1000>; + duplex = <1>; }; }; diff --git a/arch/arm/boot/dts/kirkwood-rd88f6281-a.dts b/arch/arm/boot/dts/kirkwood-rd88f6281-a.dts new file mode 100644 index 000000000000..f2e08b3b33ea --- /dev/null +++ b/arch/arm/boot/dts/kirkwood-rd88f6281-a.dts @@ -0,0 +1,43 @@ +/* + * Marvell RD88F6181 A Board descrition + * + * Andrew Lunn + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + * + * This file contains the definitions for the board with the A0 or + * higher stepping of the SoC. The ethernet switch does not have a + * "wan" port. + */ + +/dts-v1/; +#include "kirkwood-rd88f6281.dtsi" + +/ { + model = "Marvell RD88f6281 Reference design, with A0 or higher SoC"; + compatible = "marvell,rd88f6281-a", "marvell,rd88f6281","marvell,kirkwood-88f6281", "marvell,kirkwood"; + + dsa@0 { + switch@0 { + reg = <10 0>; /* MDIO address 10, switch 0 in tree */ + }; + }; +}; + +&mdio { + status = "okay"; + + ethphy1: ethernet-phy@11 { + reg = <11>; + }; +}; + +ð1 { + status = "okay"; + + ethernet1-port@0 { + phy-handle = <ðphy1>; + }; +}; diff --git a/arch/arm/boot/dts/kirkwood-rd88f6281-a0.dts b/arch/arm/boot/dts/kirkwood-rd88f6281-a0.dts deleted file mode 100644 index a803bbb70bc8..000000000000 --- a/arch/arm/boot/dts/kirkwood-rd88f6281-a0.dts +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Marvell RD88F6181 A0 Board descrition - * - * Andrew Lunn - * - * This file is licensed under the terms of the GNU General Public - * License version 2. This program is licensed "as is" without any - * warranty of any kind, whether express or implied. - * - * This file contains the definitions for the board with the A0 variant of - * the SoC. The ethernet switch does not have a "wan" port. - */ - -/dts-v1/; -#include "kirkwood-rd88f6281.dtsi" - -/ { - model = "Marvell RD88f6281 Reference design, with A0 SoC"; - compatible = "marvell,rd88f6281-a0", "marvell,rd88f6281","marvell,kirkwood-88f6281", "marvell,kirkwood"; - - dsa@0 { - switch@0 { - reg = <10 0>; /* MDIO address 10, switch 0 in tree */ - }; - }; -}; \ No newline at end of file diff --git a/arch/arm/boot/dts/kirkwood-rd88f6281-a1.dts b/arch/arm/boot/dts/kirkwood-rd88f6281-a1.dts deleted file mode 100644 index baeebbf1d8c7..000000000000 --- a/arch/arm/boot/dts/kirkwood-rd88f6281-a1.dts +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Marvell RD88F6181 A1 Board descrition - * - * Andrew Lunn - * - * This file is licensed under the terms of the GNU General Public - * License version 2. This program is licensed "as is" without any - * warranty of any kind, whether express or implied. - * - * This file contains the definitions for the board with the A1 variant of - * the SoC. The ethernet switch has a "wan" port. - */ - -/dts-v1/; - -#include "kirkwood-rd88f6281.dtsi" - -/ { - model = "Marvell RD88f6281 Reference design, with A1 SoC"; - compatible = "marvell,rd88f6281-a1", "marvell,rd88f6281","marvell,kirkwood-88f6281", "marvell,kirkwood"; - - dsa@0 { - switch@0 { - reg = <0 0>; /* MDIO address 0, switch 0 in tree */ - port@4 { - reg = <4>; - label = "wan"; - }; - }; - }; -}; \ No newline at end of file diff --git a/arch/arm/boot/dts/kirkwood-rd88f6281-z0.dts b/arch/arm/boot/dts/kirkwood-rd88f6281-z0.dts new file mode 100644 index 000000000000..f4272b64ed7f --- /dev/null +++ b/arch/arm/boot/dts/kirkwood-rd88f6281-z0.dts @@ -0,0 +1,35 @@ +/* + * Marvell RD88F6181 Z0 stepping descrition + * + * Andrew Lunn + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + * + * This file contains the definitions for the board using the Z0 + * stepping of the SoC. The ethernet switch has a "wan" port. +*/ + +/dts-v1/; + +#include "kirkwood-rd88f6281.dtsi" + +/ { + model = "Marvell RD88f6281 Reference design, with Z0 SoC"; + compatible = "marvell,rd88f6281-z0", "marvell,rd88f6281","marvell,kirkwood-88f6281", "marvell,kirkwood"; + + dsa@0 { + switch@0 { + reg = <0 0>; /* MDIO address 0, switch 0 in tree */ + port@4 { + reg = <4>; + label = "wan"; + }; + }; + }; +}; + +ð1 { + status = "disabled"; +}; diff --git a/arch/arm/boot/dts/kirkwood-rd88f6281.dtsi b/arch/arm/boot/dts/kirkwood-rd88f6281.dtsi index 26cf0e0ccefd..d195e884b3b5 100644 --- a/arch/arm/boot/dts/kirkwood-rd88f6281.dtsi +++ b/arch/arm/boot/dts/kirkwood-rd88f6281.dtsi @@ -37,7 +37,6 @@ ocp@f1000000 { pinctrl: pin-controller@10000 { - pinctrl-0 = <&pmx_sdio_cd>; pinctrl-names = "default"; pmx_sdio_cd: pmx-sdio-cd { @@ -69,8 +68,8 @@ #address-cells = <2>; #size-cells = <0>; - dsa,ethernet = <ð0>; - dsa,mii-bus = <ðphy1>; + dsa,ethernet = <ð0port>; + dsa,mii-bus = <&mdio>; switch@0 { #address-cells = <1>; @@ -119,35 +118,19 @@ }; partition@300000 { - label = "data"; + label = "rootfs"; reg = <0x0300000 0x500000>; }; }; &mdio { status = "okay"; - - ethphy0: ethernet-phy@0 { - reg = <0>; - }; - - ethphy1: ethernet-phy@ff { - reg = <0xff>; /* No PHY attached */ - speed = <1000>; - duple = <1>; - }; }; ð0 { status = "okay"; ethernet0-port@0 { - phy-handle = <ðphy0>; - }; -}; - -ð1 { - status = "okay"; - ethernet1-port@0 { - phy-handle = <ðphy1>; + speed = <1000>; + duplex = <1>; }; }; diff --git a/arch/arm/boot/dts/kirkwood.dtsi b/arch/arm/boot/dts/kirkwood.dtsi index afc640cd80c5..464f09a1a4a5 100644 --- a/arch/arm/boot/dts/kirkwood.dtsi +++ b/arch/arm/boot/dts/kirkwood.dtsi @@ -309,7 +309,7 @@ marvell,tx-checksum-limit = <1600>; status = "disabled"; - ethernet0-port@0 { + eth0port: ethernet0-port@0 { compatible = "marvell,kirkwood-eth-port"; reg = <0>; interrupts = <11>; @@ -342,7 +342,7 @@ pinctrl-names = "default"; status = "disabled"; - ethernet1-port@0 { + eth1port: ethernet1-port@0 { compatible = "marvell,kirkwood-eth-port"; reg = <0>; interrupts = <15>; diff --git a/arch/arm/boot/dts/sama5d3_can.dtsi b/arch/arm/boot/dts/sama5d3_can.dtsi index a0775851cce5..eaf41451ad0c 100644 --- a/arch/arm/boot/dts/sama5d3_can.dtsi +++ b/arch/arm/boot/dts/sama5d3_can.dtsi @@ -40,7 +40,7 @@ atmel,clk-output-range = <0 66000000>; }; - can1_clk: can0_clk { + can1_clk: can1_clk { #clock-cells = <0>; reg = <41>; atmel,clk-output-range = <0 66000000>; diff --git a/arch/arm/mach-at91/clock.c b/arch/arm/mach-at91/clock.c index 034529d801b2..d66f102c352a 100644 --- a/arch/arm/mach-at91/clock.c +++ b/arch/arm/mach-at91/clock.c @@ -962,6 +962,7 @@ static int __init at91_clock_reset(void) } at91_pmc_write(AT91_PMC_SCDR, scdr); + at91_pmc_write(AT91_PMC_PCDR, pcdr); if (cpu_is_sama5d3()) at91_pmc_write(AT91_PMC_PCDR1, pcdr1); diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h index 253e33bc94fb..56de5aadede2 100644 --- a/arch/arm64/include/asm/compat.h +++ b/arch/arm64/include/asm/compat.h @@ -37,8 +37,8 @@ typedef s32 compat_ssize_t; typedef s32 compat_time_t; typedef s32 compat_clock_t; typedef s32 compat_pid_t; -typedef u32 __compat_uid_t; -typedef u32 __compat_gid_t; +typedef u16 __compat_uid_t; +typedef u16 __compat_gid_t; typedef u16 __compat_uid16_t; typedef u16 __compat_gid16_t; typedef u32 __compat_uid32_t; diff --git a/arch/arm64/include/asm/irq_work.h b/arch/arm64/include/asm/irq_work.h index 8e24ef3f7c82..b4f6b19a8a68 100644 --- a/arch/arm64/include/asm/irq_work.h +++ b/arch/arm64/include/asm/irq_work.h @@ -1,6 +1,8 @@ #ifndef __ASM_IRQ_WORK_H #define __ASM_IRQ_WORK_H +#ifdef CONFIG_SMP + #include static inline bool arch_irq_work_has_interrupt(void) @@ -8,4 +10,13 @@ static inline bool arch_irq_work_has_interrupt(void) return !!__smp_cross_call; } +#else + +static inline bool arch_irq_work_has_interrupt(void) +{ + return false; +} + +#endif + #endif /* __ASM_IRQ_WORK_H */ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index f0b5e5120a87..726b910fe6ec 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -324,7 +324,6 @@ el1_dbg: mrs x0, far_el1 mov x2, sp // struct pt_regs bl do_debug_exception - enable_dbg kernel_exit 1 el1_inv: // TODO: add support for undefined instructions in kernel mode diff --git a/arch/m68k/mm/hwtest.c b/arch/m68k/mm/hwtest.c index 2c7dde3c6430..2a5259fd23eb 100644 --- a/arch/m68k/mm/hwtest.c +++ b/arch/m68k/mm/hwtest.c @@ -28,9 +28,11 @@ int hwreg_present( volatile void *regp ) { int ret = 0; + unsigned long flags; long save_sp, save_vbr; long tmp_vectors[3]; + local_irq_save(flags); __asm__ __volatile__ ( "movec %/vbr,%2\n\t" "movel #Lberr1,%4@(8)\n\t" @@ -46,6 +48,7 @@ int hwreg_present( volatile void *regp ) : "=&d" (ret), "=&r" (save_sp), "=&r" (save_vbr) : "a" (regp), "a" (tmp_vectors) ); + local_irq_restore(flags); return( ret ); } @@ -58,9 +61,11 @@ EXPORT_SYMBOL(hwreg_present); int hwreg_write( volatile void *regp, unsigned short val ) { int ret; + unsigned long flags; long save_sp, save_vbr; long tmp_vectors[3]; + local_irq_save(flags); __asm__ __volatile__ ( "movec %/vbr,%2\n\t" "movel #Lberr2,%4@(8)\n\t" @@ -78,6 +83,7 @@ int hwreg_write( volatile void *regp, unsigned short val ) : "=&d" (ret), "=&r" (save_sp), "=&r" (save_vbr) : "a" (regp), "a" (tmp_vectors), "g" (val) ); + local_irq_restore(flags); return( ret ); } diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index 00e3844525a6..eef08f0bca73 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -584,6 +584,8 @@ static void *__eeh_pe_state_clear(void *data, void *flag) { struct eeh_pe *pe = (struct eeh_pe *)data; int state = *((int *)flag); + struct eeh_dev *edev, *tmp; + struct pci_dev *pdev; /* Keep the state of permanently removed PE intact */ if ((pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) && @@ -592,9 +594,22 @@ static void *__eeh_pe_state_clear(void *data, void *flag) pe->state &= ~state; - /* Clear check count since last isolation */ - if (state & EEH_PE_ISOLATED) - pe->check_count = 0; + /* + * Special treatment on clearing isolated state. Clear + * check count since last isolation and put all affected + * devices to normal state. + */ + if (!(state & EEH_PE_ISOLATED)) + return NULL; + + pe->check_count = 0; + eeh_pe_for_each_dev(pe, edev, tmp) { + pdev = eeh_dev_to_pci_dev(edev); + if (!pdev) + continue; + + pdev->error_state = pci_channel_io_normal; + } return NULL; } diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index a0738af4aba6..dc0e7744d2a8 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -379,8 +379,11 @@ void __init smp_prepare_cpus(unsigned int max_cpus) /* * numa_node_id() works after this. */ - set_cpu_numa_node(cpu, numa_cpu_lookup_table[cpu]); - set_cpu_numa_mem(cpu, local_memory_node(numa_cpu_lookup_table[cpu])); + if (cpu_present(cpu)) { + set_cpu_numa_node(cpu, numa_cpu_lookup_table[cpu]); + set_cpu_numa_mem(cpu, + local_memory_node(numa_cpu_lookup_table[cpu])); + } } cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid)); @@ -728,6 +731,9 @@ void start_secondary(void *unused) } traverse_core_siblings(cpu, true); + set_numa_node(numa_cpu_lookup_table[cpu]); + set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu])); + smp_wmb(); notify_cpu_starting(cpu); set_cpu_online(cpu, true); diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index d7737a542fd7..3a9061e9f5dd 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1127,9 +1127,8 @@ void __init do_init_bootmem(void) * even before we online them, so that we can use cpu_to_{node,mem} * early in boot, cf. smp_prepare_cpus(). */ - for_each_possible_cpu(cpu) { - cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE, - (void *)(unsigned long)cpu); + for_each_present_cpu(cpu) { + numa_setup_cpu((unsigned long)cpu); } } diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 4642d6a4d356..de1ec54a2a57 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -329,16 +329,16 @@ struct direct_window { /* Dynamic DMA Window support */ struct ddw_query_response { - __be32 windows_available; - __be32 largest_available_block; - __be32 page_size; - __be32 migration_capable; + u32 windows_available; + u32 largest_available_block; + u32 page_size; + u32 migration_capable; }; struct ddw_create_response { - __be32 liobn; - __be32 addr_hi; - __be32 addr_lo; + u32 liobn; + u32 addr_hi; + u32 addr_lo; }; static LIST_HEAD(direct_window_list); @@ -725,16 +725,18 @@ static void remove_ddw(struct device_node *np, bool remove_prop) { struct dynamic_dma_window_prop *dwp; struct property *win64; - const u32 *ddw_avail; + u32 ddw_avail[3]; u64 liobn; - int len, ret = 0; + int ret = 0; + + ret = of_property_read_u32_array(np, "ibm,ddw-applicable", + &ddw_avail[0], 3); - ddw_avail = of_get_property(np, "ibm,ddw-applicable", &len); win64 = of_find_property(np, DIRECT64_PROPNAME, NULL); if (!win64) return; - if (!ddw_avail || len < 3 * sizeof(u32) || win64->length < sizeof(*dwp)) + if (ret || win64->length < sizeof(*dwp)) goto delprop; dwp = win64->value; @@ -872,8 +874,9 @@ static int create_ddw(struct pci_dev *dev, const u32 *ddw_avail, do { /* extra outputs are LIOBN and dma-addr (hi, lo) */ - ret = rtas_call(ddw_avail[1], 5, 4, (u32 *)create, cfg_addr, - BUID_HI(buid), BUID_LO(buid), page_shift, window_shift); + ret = rtas_call(ddw_avail[1], 5, 4, (u32 *)create, + cfg_addr, BUID_HI(buid), BUID_LO(buid), + page_shift, window_shift); } while (rtas_busy_delay(ret)); dev_info(&dev->dev, "ibm,create-pe-dma-window(%x) %x %x %x %x %x returned %d " @@ -910,7 +913,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) int page_shift; u64 dma_addr, max_addr; struct device_node *dn; - const u32 *uninitialized_var(ddw_avail); + u32 ddw_avail[3]; struct direct_window *window; struct property *win64; struct dynamic_dma_window_prop *ddwprop; @@ -942,8 +945,9 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) * for the given node in that order. * the property is actually in the parent, not the PE */ - ddw_avail = of_get_property(pdn, "ibm,ddw-applicable", &len); - if (!ddw_avail || len < 3 * sizeof(u32)) + ret = of_property_read_u32_array(pdn, "ibm,ddw-applicable", + &ddw_avail[0], 3); + if (ret) goto out_failed; /* @@ -966,11 +970,11 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) dev_dbg(&dev->dev, "no free dynamic windows"); goto out_failed; } - if (be32_to_cpu(query.page_size) & 4) { + if (query.page_size & 4) { page_shift = 24; /* 16MB */ - } else if (be32_to_cpu(query.page_size) & 2) { + } else if (query.page_size & 2) { page_shift = 16; /* 64kB */ - } else if (be32_to_cpu(query.page_size) & 1) { + } else if (query.page_size & 1) { page_shift = 12; /* 4kB */ } else { dev_dbg(&dev->dev, "no supported direct page size in mask %x", @@ -980,7 +984,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) /* verify the window * number of ptes will map the partition */ /* check largest block * page size > max memory hotplug addr */ max_addr = memory_hotplug_max(); - if (be32_to_cpu(query.largest_available_block) < (max_addr >> page_shift)) { + if (query.largest_available_block < (max_addr >> page_shift)) { dev_dbg(&dev->dev, "can't map partiton max 0x%llx with %u " "%llu-sized pages\n", max_addr, query.largest_available_block, 1ULL << page_shift); @@ -1006,8 +1010,9 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) if (ret != 0) goto out_free_prop; - ddwprop->liobn = create.liobn; - ddwprop->dma_base = cpu_to_be64(of_read_number(&create.addr_hi, 2)); + ddwprop->liobn = cpu_to_be32(create.liobn); + ddwprop->dma_base = cpu_to_be64(((u64)create.addr_hi << 32) | + create.addr_lo); ddwprop->tce_shift = cpu_to_be32(page_shift); ddwprop->window_shift = cpu_to_be32(len); @@ -1039,7 +1044,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) list_add(&window->list, &direct_window_list); spin_unlock(&direct_window_list_lock); - dma_addr = of_read_number(&create.addr_hi, 2); + dma_addr = be64_to_cpu(ddwprop->dma_base); goto out_unlock; out_free_window: diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index f4c819bfc193..fe482ec99bae 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -85,6 +85,7 @@ static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu, return 0; if (vcpu->arch.sie_block->gcr[0] & 0x2000ul) return 1; + return 0; case KVM_S390_INT_EMERGENCY: if (psw_extint_disabled(vcpu)) return 0; diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index a537816613f9..96ac69c5eba0 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -67,6 +67,7 @@ config SPARC64 select HAVE_SYSCALL_TRACEPOINTS select HAVE_CONTEXT_TRACKING select HAVE_DEBUG_KMEMLEAK + select SPARSE_IRQ select RTC_DRV_CMOS select RTC_DRV_BQ4802 select RTC_DRV_SUN4V diff --git a/arch/sparc/include/asm/hypervisor.h b/arch/sparc/include/asm/hypervisor.h index 94b39caea3eb..4f6725ff4c33 100644 --- a/arch/sparc/include/asm/hypervisor.h +++ b/arch/sparc/include/asm/hypervisor.h @@ -2947,6 +2947,16 @@ unsigned long sun4v_vt_set_perfreg(unsigned long reg_num, unsigned long reg_val); #endif +#define HV_FAST_T5_GET_PERFREG 0x1a8 +#define HV_FAST_T5_SET_PERFREG 0x1a9 + +#ifndef __ASSEMBLY__ +unsigned long sun4v_t5_get_perfreg(unsigned long reg_num, + unsigned long *reg_val); +unsigned long sun4v_t5_set_perfreg(unsigned long reg_num, + unsigned long reg_val); +#endif + /* Function numbers for HV_CORE_TRAP. */ #define HV_CORE_SET_VER 0x00 #define HV_CORE_PUTCHAR 0x01 @@ -2978,6 +2988,7 @@ unsigned long sun4v_vt_set_perfreg(unsigned long reg_num, #define HV_GRP_VF_CPU 0x0205 #define HV_GRP_KT_CPU 0x0209 #define HV_GRP_VT_CPU 0x020c +#define HV_GRP_T5_CPU 0x0211 #define HV_GRP_DIAG 0x0300 #ifndef __ASSEMBLY__ diff --git a/arch/sparc/include/asm/irq_64.h b/arch/sparc/include/asm/irq_64.h index 91d219381306..3f70f900e834 100644 --- a/arch/sparc/include/asm/irq_64.h +++ b/arch/sparc/include/asm/irq_64.h @@ -37,7 +37,7 @@ * * ino_bucket->irq allocation is made during {sun4v_,}build_irq(). */ -#define NR_IRQS 255 +#define NR_IRQS (2048) void irq_install_pre_handler(int irq, void (*func)(unsigned int, void *, void *), @@ -57,11 +57,8 @@ unsigned int sun4u_build_msi(u32 portid, unsigned int *irq_p, unsigned long iclr_base); void sun4u_destroy_msi(unsigned int irq); -unsigned char irq_alloc(unsigned int dev_handle, - unsigned int dev_ino); -#ifdef CONFIG_PCI_MSI +unsigned int irq_alloc(unsigned int dev_handle, unsigned int dev_ino); void irq_free(unsigned int irq); -#endif void __init init_IRQ(void); void fixup_irqs(void); diff --git a/arch/sparc/include/asm/ldc.h b/arch/sparc/include/asm/ldc.h index c8c67f621f4f..58ab64de25d2 100644 --- a/arch/sparc/include/asm/ldc.h +++ b/arch/sparc/include/asm/ldc.h @@ -53,13 +53,14 @@ struct ldc_channel; /* Allocate state for a channel. */ struct ldc_channel *ldc_alloc(unsigned long id, const struct ldc_channel_config *cfgp, - void *event_arg); + void *event_arg, + const char *name); /* Shut down and free state for a channel. */ void ldc_free(struct ldc_channel *lp); /* Register TX and RX queues of the link with the hypervisor. */ -int ldc_bind(struct ldc_channel *lp, const char *name); +int ldc_bind(struct ldc_channel *lp); /* For non-RAW protocols we need to complete a handshake before * communication can proceed. ldc_connect() does that, if the diff --git a/arch/sparc/include/asm/oplib_64.h b/arch/sparc/include/asm/oplib_64.h index f34682430fcf..2e3a4add8591 100644 --- a/arch/sparc/include/asm/oplib_64.h +++ b/arch/sparc/include/asm/oplib_64.h @@ -62,7 +62,8 @@ struct linux_mem_p1275 { /* You must call prom_init() before using any of the library services, * preferably as early as possible. Pass it the romvec pointer. */ -void prom_init(void *cif_handler, void *cif_stack); +void prom_init(void *cif_handler); +void prom_init_report(void); /* Boot argument acquisition, returns the boot command line string. */ char *prom_getbootargs(void); diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h index bf109984a032..8c2a8c937540 100644 --- a/arch/sparc/include/asm/page_64.h +++ b/arch/sparc/include/asm/page_64.h @@ -57,18 +57,21 @@ void copy_user_page(void *to, void *from, unsigned long vaddr, struct page *topa typedef struct { unsigned long pte; } pte_t; typedef struct { unsigned long iopte; } iopte_t; typedef struct { unsigned long pmd; } pmd_t; +typedef struct { unsigned long pud; } pud_t; typedef struct { unsigned long pgd; } pgd_t; typedef struct { unsigned long pgprot; } pgprot_t; #define pte_val(x) ((x).pte) #define iopte_val(x) ((x).iopte) #define pmd_val(x) ((x).pmd) +#define pud_val(x) ((x).pud) #define pgd_val(x) ((x).pgd) #define pgprot_val(x) ((x).pgprot) #define __pte(x) ((pte_t) { (x) } ) #define __iopte(x) ((iopte_t) { (x) } ) #define __pmd(x) ((pmd_t) { (x) } ) +#define __pud(x) ((pud_t) { (x) } ) #define __pgd(x) ((pgd_t) { (x) } ) #define __pgprot(x) ((pgprot_t) { (x) } ) @@ -77,18 +80,21 @@ typedef struct { unsigned long pgprot; } pgprot_t; typedef unsigned long pte_t; typedef unsigned long iopte_t; typedef unsigned long pmd_t; +typedef unsigned long pud_t; typedef unsigned long pgd_t; typedef unsigned long pgprot_t; #define pte_val(x) (x) #define iopte_val(x) (x) #define pmd_val(x) (x) +#define pud_val(x) (x) #define pgd_val(x) (x) #define pgprot_val(x) (x) #define __pte(x) (x) #define __iopte(x) (x) #define __pmd(x) (x) +#define __pud(x) (x) #define __pgd(x) (x) #define __pgprot(x) (x) @@ -96,21 +102,14 @@ typedef unsigned long pgprot_t; typedef pte_t *pgtable_t; -/* These two values define the virtual address space range in which we - * must forbid 64-bit user processes from making mappings. It used to - * represent precisely the virtual address space hole present in most - * early sparc64 chips including UltraSPARC-I. But now it also is - * further constrained by the limits of our page tables, which is - * 43-bits of virtual address. - */ -#define SPARC64_VA_HOLE_TOP _AC(0xfffffc0000000000,UL) -#define SPARC64_VA_HOLE_BOTTOM _AC(0x0000040000000000,UL) +extern unsigned long sparc64_va_hole_top; +extern unsigned long sparc64_va_hole_bottom; /* The next two defines specify the actual exclusion region we * enforce, wherein we use a 4GB red zone on each side of the VA hole. */ -#define VA_EXCLUDE_START (SPARC64_VA_HOLE_BOTTOM - (1UL << 32UL)) -#define VA_EXCLUDE_END (SPARC64_VA_HOLE_TOP + (1UL << 32UL)) +#define VA_EXCLUDE_START (sparc64_va_hole_bottom - (1UL << 32UL)) +#define VA_EXCLUDE_END (sparc64_va_hole_top + (1UL << 32UL)) #define TASK_UNMAPPED_BASE (test_thread_flag(TIF_32BIT) ? \ _AC(0x0000000070000000,UL) : \ @@ -118,20 +117,16 @@ typedef pte_t *pgtable_t; #include -#define PAGE_OFFSET_BY_BITS(X) (-(_AC(1,UL) << (X))) extern unsigned long PAGE_OFFSET; #endif /* !(__ASSEMBLY__) */ -/* The maximum number of physical memory address bits we support, this - * is used to size various tables used to manage kernel TLB misses and - * also the sparsemem code. +/* The maximum number of physical memory address bits we support. The + * largest value we can support is whatever "KPGD_SHIFT + KPTE_BITS" + * evaluates to. */ -#define MAX_PHYS_ADDRESS_BITS 47 +#define MAX_PHYS_ADDRESS_BITS 53 -/* These two shift counts are used when indexing sparc64_valid_addr_bitmap - * and kpte_linear_bitmap. - */ #define ILOG2_4MB 22 #define ILOG2_256MB 28 diff --git a/arch/sparc/include/asm/pgalloc_64.h b/arch/sparc/include/asm/pgalloc_64.h index 39a7ac49b00c..5e3187185b4a 100644 --- a/arch/sparc/include/asm/pgalloc_64.h +++ b/arch/sparc/include/asm/pgalloc_64.h @@ -15,6 +15,13 @@ extern struct kmem_cache *pgtable_cache; +static inline void __pgd_populate(pgd_t *pgd, pud_t *pud) +{ + pgd_set(pgd, pud); +} + +#define pgd_populate(MM, PGD, PUD) __pgd_populate(PGD, PUD) + static inline pgd_t *pgd_alloc(struct mm_struct *mm) { return kmem_cache_alloc(pgtable_cache, GFP_KERNEL); @@ -25,7 +32,23 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) kmem_cache_free(pgtable_cache, pgd); } -#define pud_populate(MM, PUD, PMD) pud_set(PUD, PMD) +static inline void __pud_populate(pud_t *pud, pmd_t *pmd) +{ + pud_set(pud, pmd); +} + +#define pud_populate(MM, PUD, PMD) __pud_populate(PUD, PMD) + +static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) +{ + return kmem_cache_alloc(pgtable_cache, + GFP_KERNEL|__GFP_REPEAT); +} + +static inline void pud_free(struct mm_struct *mm, pud_t *pud) +{ + kmem_cache_free(pgtable_cache, pud); +} static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { @@ -91,4 +114,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pte_t *pte, #define __pmd_free_tlb(tlb, pmd, addr) \ pgtable_free_tlb(tlb, pmd, false) +#define __pud_free_tlb(tlb, pud, addr) \ + pgtable_free_tlb(tlb, pud, false) + #endif /* _SPARC64_PGALLOC_H */ diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 3770bf5c6e1b..bfeb626085ac 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -20,8 +20,6 @@ #include #include -#include - /* The kernel image occupies 0x4000000 to 0x6000000 (4MB --> 96MB). * The page copy blockops can use 0x6000000 to 0x8000000. * The 8K TSB is mapped in the 0x8000000 to 0x8400000 range. @@ -42,10 +40,7 @@ #define LOW_OBP_ADDRESS _AC(0x00000000f0000000,UL) #define HI_OBP_ADDRESS _AC(0x0000000100000000,UL) #define VMALLOC_START _AC(0x0000000100000000,UL) -#define VMALLOC_END _AC(0x0000010000000000,UL) -#define VMEMMAP_BASE _AC(0x0000010000000000,UL) - -#define vmemmap ((struct page *)VMEMMAP_BASE) +#define VMEMMAP_BASE VMALLOC_END /* PMD_SHIFT determines the size of the area a second-level page * table can map @@ -55,13 +50,25 @@ #define PMD_MASK (~(PMD_SIZE-1)) #define PMD_BITS (PAGE_SHIFT - 3) -/* PGDIR_SHIFT determines what a third-level page table entry can map */ -#define PGDIR_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-3) + PMD_BITS) +/* PUD_SHIFT determines the size of the area a third-level page + * table can map + */ +#define PUD_SHIFT (PMD_SHIFT + PMD_BITS) +#define PUD_SIZE (_AC(1,UL) << PUD_SHIFT) +#define PUD_MASK (~(PUD_SIZE-1)) +#define PUD_BITS (PAGE_SHIFT - 3) + +/* PGDIR_SHIFT determines what a fourth-level page table entry can map */ +#define PGDIR_SHIFT (PUD_SHIFT + PUD_BITS) #define PGDIR_SIZE (_AC(1,UL) << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) #define PGDIR_BITS (PAGE_SHIFT - 3) -#if (PGDIR_SHIFT + PGDIR_BITS) != 43 +#if (MAX_PHYS_ADDRESS_BITS > PGDIR_SHIFT + PGDIR_BITS) +#error MAX_PHYS_ADDRESS_BITS exceeds what kernel page tables can support +#endif + +#if (PGDIR_SHIFT + PGDIR_BITS) != 53 #error Page table parameters do not cover virtual address space properly. #endif @@ -71,28 +78,18 @@ #ifndef __ASSEMBLY__ -#include - -extern unsigned long sparc64_valid_addr_bitmap[]; +extern unsigned long VMALLOC_END; -/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */ -static inline bool __kern_addr_valid(unsigned long paddr) -{ - if ((paddr >> MAX_PHYS_ADDRESS_BITS) != 0UL) - return false; - return test_bit(paddr >> ILOG2_4MB, sparc64_valid_addr_bitmap); -} +#define vmemmap ((struct page *)VMEMMAP_BASE) -static inline bool kern_addr_valid(unsigned long addr) -{ - unsigned long paddr = __pa(addr); +#include - return __kern_addr_valid(paddr); -} +bool kern_addr_valid(unsigned long addr); /* Entries per page directory level. */ #define PTRS_PER_PTE (1UL << (PAGE_SHIFT-3)) #define PTRS_PER_PMD (1UL << PMD_BITS) +#define PTRS_PER_PUD (1UL << PUD_BITS) #define PTRS_PER_PGD (1UL << PGDIR_BITS) /* Kernel has a separate 44bit address space. */ @@ -101,6 +98,9 @@ static inline bool kern_addr_valid(unsigned long addr) #define pmd_ERROR(e) \ pr_err("%s:%d: bad pmd %p(%016lx) seen at (%pS)\n", \ __FILE__, __LINE__, &(e), pmd_val(e), __builtin_return_address(0)) +#define pud_ERROR(e) \ + pr_err("%s:%d: bad pud %p(%016lx) seen at (%pS)\n", \ + __FILE__, __LINE__, &(e), pud_val(e), __builtin_return_address(0)) #define pgd_ERROR(e) \ pr_err("%s:%d: bad pgd %p(%016lx) seen at (%pS)\n", \ __FILE__, __LINE__, &(e), pgd_val(e), __builtin_return_address(0)) @@ -112,6 +112,7 @@ static inline bool kern_addr_valid(unsigned long addr) #define _PAGE_R _AC(0x8000000000000000,UL) /* Keep ref bit uptodate*/ #define _PAGE_SPECIAL _AC(0x0200000000000000,UL) /* Special page */ #define _PAGE_PMD_HUGE _AC(0x0100000000000000,UL) /* Huge page */ +#define _PAGE_PUD_HUGE _PAGE_PMD_HUGE /* Advertise support for _PAGE_SPECIAL */ #define __HAVE_ARCH_PTE_SPECIAL @@ -658,26 +659,26 @@ static inline unsigned long pmd_large(pmd_t pmd) return pte_val(pte) & _PAGE_PMD_HUGE; } -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -static inline unsigned long pmd_young(pmd_t pmd) +static inline unsigned long pmd_pfn(pmd_t pmd) { pte_t pte = __pte(pmd_val(pmd)); - return pte_young(pte); + return pte_pfn(pte); } -static inline unsigned long pmd_write(pmd_t pmd) +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static inline unsigned long pmd_young(pmd_t pmd) { pte_t pte = __pte(pmd_val(pmd)); - return pte_write(pte); + return pte_young(pte); } -static inline unsigned long pmd_pfn(pmd_t pmd) +static inline unsigned long pmd_write(pmd_t pmd) { pte_t pte = __pte(pmd_val(pmd)); - return pte_pfn(pte); + return pte_write(pte); } static inline unsigned long pmd_trans_huge(pmd_t pmd) @@ -771,13 +772,15 @@ static inline int pmd_present(pmd_t pmd) * the top bits outside of the range of any physical address size we * support are clear as well. We also validate the physical itself. */ -#define pmd_bad(pmd) ((pmd_val(pmd) & ~PAGE_MASK) || \ - !__kern_addr_valid(pmd_val(pmd))) +#define pmd_bad(pmd) (pmd_val(pmd) & ~PAGE_MASK) #define pud_none(pud) (!pud_val(pud)) -#define pud_bad(pud) ((pud_val(pud) & ~PAGE_MASK) || \ - !__kern_addr_valid(pud_val(pud))) +#define pud_bad(pud) (pud_val(pud) & ~PAGE_MASK) + +#define pgd_none(pgd) (!pgd_val(pgd)) + +#define pgd_bad(pgd) (pgd_val(pgd) & ~PAGE_MASK) #ifdef CONFIG_TRANSPARENT_HUGEPAGE void set_pmd_at(struct mm_struct *mm, unsigned long addr, @@ -815,10 +818,31 @@ static inline unsigned long __pmd_page(pmd_t pmd) #define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0UL) #define pud_present(pud) (pud_val(pud) != 0U) #define pud_clear(pudp) (pud_val(*(pudp)) = 0UL) +#define pgd_page_vaddr(pgd) \ + ((unsigned long) __va(pgd_val(pgd))) +#define pgd_present(pgd) (pgd_val(pgd) != 0U) +#define pgd_clear(pgdp) (pgd_val(*(pgd)) = 0UL) + +static inline unsigned long pud_large(pud_t pud) +{ + pte_t pte = __pte(pud_val(pud)); + + return pte_val(pte) & _PAGE_PMD_HUGE; +} + +static inline unsigned long pud_pfn(pud_t pud) +{ + pte_t pte = __pte(pud_val(pud)); + + return pte_pfn(pte); +} /* Same in both SUN4V and SUN4U. */ #define pte_none(pte) (!pte_val(pte)) +#define pgd_set(pgdp, pudp) \ + (pgd_val(*(pgdp)) = (__pa((unsigned long) (pudp)))) + /* to find an entry in a page-table-directory. */ #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) #define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) @@ -826,6 +850,11 @@ static inline unsigned long __pmd_page(pmd_t pmd) /* to find an entry in a kernel page-table-directory */ #define pgd_offset_k(address) pgd_offset(&init_mm, address) +/* Find an entry in the third-level page table.. */ +#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) +#define pud_offset(pgdp, address) \ + ((pud_t *) pgd_page_vaddr(*(pgdp)) + pud_index(address)) + /* Find an entry in the second-level page table.. */ #define pmd_offset(pudp, address) \ ((pmd_t *) pud_page_vaddr(*(pudp)) + \ @@ -898,7 +927,6 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, #endif extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; -extern pmd_t swapper_low_pmd_dir[PTRS_PER_PMD]; void paging_init(void); unsigned long find_ecache_flush_span(unsigned long size); diff --git a/arch/sparc/include/asm/setup.h b/arch/sparc/include/asm/setup.h index f5fffd84d0dd..29d64b1758ed 100644 --- a/arch/sparc/include/asm/setup.h +++ b/arch/sparc/include/asm/setup.h @@ -48,6 +48,8 @@ unsigned long safe_compute_effective_address(struct pt_regs *, unsigned int); #endif #ifdef CONFIG_SPARC64 +void __init start_early_boot(void); + /* unaligned_64.c */ int handle_ldf_stq(u32 insn, struct pt_regs *regs); void handle_ld_nf(u32 insn, struct pt_regs *regs); diff --git a/arch/sparc/include/asm/spitfire.h b/arch/sparc/include/asm/spitfire.h index 3fc58691dbd0..56f933816144 100644 --- a/arch/sparc/include/asm/spitfire.h +++ b/arch/sparc/include/asm/spitfire.h @@ -45,6 +45,8 @@ #define SUN4V_CHIP_NIAGARA3 0x03 #define SUN4V_CHIP_NIAGARA4 0x04 #define SUN4V_CHIP_NIAGARA5 0x05 +#define SUN4V_CHIP_SPARC_M6 0x06 +#define SUN4V_CHIP_SPARC_M7 0x07 #define SUN4V_CHIP_SPARC64X 0x8a #define SUN4V_CHIP_UNKNOWN 0xff diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h index a5f01ac6d0f1..cc6275c931a5 100644 --- a/arch/sparc/include/asm/thread_info_64.h +++ b/arch/sparc/include/asm/thread_info_64.h @@ -63,7 +63,8 @@ struct thread_info { struct pt_regs *kern_una_regs; unsigned int kern_una_insn; - unsigned long fpregs[0] __attribute__ ((aligned(64))); + unsigned long fpregs[(7 * 256) / sizeof(unsigned long)] + __attribute__ ((aligned(64))); }; #endif /* !(__ASSEMBLY__) */ @@ -102,6 +103,7 @@ struct thread_info { #define FAULT_CODE_ITLB 0x04 /* Miss happened in I-TLB */ #define FAULT_CODE_WINFIXUP 0x08 /* Miss happened during spill/fill */ #define FAULT_CODE_BLKCOMMIT 0x10 /* Use blk-commit ASI in copy_page */ +#define FAULT_CODE_BAD_RA 0x20 /* Bad RA for sun4v */ #if PAGE_SHIFT == 13 #define THREAD_SIZE (2*PAGE_SIZE) diff --git a/arch/sparc/include/asm/tsb.h b/arch/sparc/include/asm/tsb.h index 90916f955cac..ecb49cfa3be9 100644 --- a/arch/sparc/include/asm/tsb.h +++ b/arch/sparc/include/asm/tsb.h @@ -133,9 +133,24 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end; sub TSB, 0x8, TSB; \ TSB_STORE(TSB, TAG); - /* Do a kernel page table walk. Leaves physical PTE pointer in - * REG1. Jumps to FAIL_LABEL on early page table walk termination. - * VADDR will not be clobbered, but REG2 will. + /* Do a kernel page table walk. Leaves valid PTE value in + * REG1. Jumps to FAIL_LABEL on early page table walk + * termination. VADDR will not be clobbered, but REG2 will. + * + * There are two masks we must apply to propagate bits from + * the virtual address into the PTE physical address field + * when dealing with huge pages. This is because the page + * table boundaries do not match the huge page size(s) the + * hardware supports. + * + * In these cases we propagate the bits that are below the + * page table level where we saw the huge page mapping, but + * are still within the relevant physical bits for the huge + * page size in question. So for PMD mappings (which fall on + * bit 23, for 8MB per PMD) we must propagate bit 22 for a + * 4MB huge page. For huge PUDs (which fall on bit 33, for + * 8GB per PUD), we have to accomodate 256MB and 2GB huge + * pages. So for those we propagate bits 32 to 28. */ #define KERN_PGTABLE_WALK(VADDR, REG1, REG2, FAIL_LABEL) \ sethi %hi(swapper_pg_dir), REG1; \ @@ -145,15 +160,40 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end; andn REG2, 0x7, REG2; \ ldx [REG1 + REG2], REG1; \ brz,pn REG1, FAIL_LABEL; \ - sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \ + sllx VADDR, 64 - (PUD_SHIFT + PUD_BITS), REG2; \ srlx REG2, 64 - PAGE_SHIFT, REG2; \ andn REG2, 0x7, REG2; \ ldxa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \ brz,pn REG1, FAIL_LABEL; \ - sllx VADDR, 64 - PMD_SHIFT, REG2; \ + sethi %uhi(_PAGE_PUD_HUGE), REG2; \ + brz,pn REG1, FAIL_LABEL; \ + sllx REG2, 32, REG2; \ + andcc REG1, REG2, %g0; \ + sethi %hi(0xf8000000), REG2; \ + bne,pt %xcc, 697f; \ + sllx REG2, 1, REG2; \ + sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \ srlx REG2, 64 - PAGE_SHIFT, REG2; \ andn REG2, 0x7, REG2; \ - add REG1, REG2, REG1; + ldxa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \ + sethi %uhi(_PAGE_PMD_HUGE), REG2; \ + brz,pn REG1, FAIL_LABEL; \ + sllx REG2, 32, REG2; \ + andcc REG1, REG2, %g0; \ + be,pn %xcc, 698f; \ + sethi %hi(0x400000), REG2; \ +697: brgez,pn REG1, FAIL_LABEL; \ + andn REG1, REG2, REG1; \ + and VADDR, REG2, REG2; \ + ba,pt %xcc, 699f; \ + or REG1, REG2, REG1; \ +698: sllx VADDR, 64 - PMD_SHIFT, REG2; \ + srlx REG2, 64 - PAGE_SHIFT, REG2; \ + andn REG2, 0x7, REG2; \ + ldxa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \ + brgez,pn REG1, FAIL_LABEL; \ + nop; \ +699: /* PMD has been loaded into REG1, interpret the value, seeing * if it is a HUGE PMD or a normal one. If it is not valid @@ -198,6 +238,11 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end; andn REG2, 0x7, REG2; \ ldxa [PHYS_PGD + REG2] ASI_PHYS_USE_EC, REG1; \ brz,pn REG1, FAIL_LABEL; \ + sllx VADDR, 64 - (PUD_SHIFT + PUD_BITS), REG2; \ + srlx REG2, 64 - PAGE_SHIFT, REG2; \ + andn REG2, 0x7, REG2; \ + ldxa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \ + brz,pn REG1, FAIL_LABEL; \ sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \ srlx REG2, 64 - PAGE_SHIFT, REG2; \ andn REG2, 0x7, REG2; \ @@ -246,8 +291,6 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end; (KERNEL_TSB_SIZE_BYTES / 16) #define KERNEL_TSB4M_NENTRIES 4096 -#define KTSB_PHYS_SHIFT 15 - /* Do a kernel TSB lookup at tl>0 on VADDR+TAG, branch to OK_LABEL * on TSB hit. REG1, REG2, REG3, and REG4 are used as temporaries * and the found TTE will be left in REG1. REG3 and REG4 must @@ -256,17 +299,15 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end; * VADDR and TAG will be preserved and not clobbered by this macro. */ #define KERN_TSB_LOOKUP_TL1(VADDR, TAG, REG1, REG2, REG3, REG4, OK_LABEL) \ -661: sethi %hi(swapper_tsb), REG1; \ - or REG1, %lo(swapper_tsb), REG1; \ +661: sethi %uhi(swapper_tsb), REG1; \ + sethi %hi(swapper_tsb), REG2; \ + or REG1, %ulo(swapper_tsb), REG1; \ + or REG2, %lo(swapper_tsb), REG2; \ .section .swapper_tsb_phys_patch, "ax"; \ .word 661b; \ .previous; \ -661: nop; \ - .section .tsb_ldquad_phys_patch, "ax"; \ - .word 661b; \ - sllx REG1, KTSB_PHYS_SHIFT, REG1; \ - sllx REG1, KTSB_PHYS_SHIFT, REG1; \ - .previous; \ + sllx REG1, 32, REG1; \ + or REG1, REG2, REG1; \ srlx VADDR, PAGE_SHIFT, REG2; \ and REG2, (KERNEL_TSB_NENTRIES - 1), REG2; \ sllx REG2, 4, REG2; \ @@ -281,17 +322,15 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end; * we can make use of that for the index computation. */ #define KERN_TSB4M_LOOKUP_TL1(TAG, REG1, REG2, REG3, REG4, OK_LABEL) \ -661: sethi %hi(swapper_4m_tsb), REG1; \ - or REG1, %lo(swapper_4m_tsb), REG1; \ +661: sethi %uhi(swapper_4m_tsb), REG1; \ + sethi %hi(swapper_4m_tsb), REG2; \ + or REG1, %ulo(swapper_4m_tsb), REG1; \ + or REG2, %lo(swapper_4m_tsb), REG2; \ .section .swapper_4m_tsb_phys_patch, "ax"; \ .word 661b; \ .previous; \ -661: nop; \ - .section .tsb_ldquad_phys_patch, "ax"; \ - .word 661b; \ - sllx REG1, KTSB_PHYS_SHIFT, REG1; \ - sllx REG1, KTSB_PHYS_SHIFT, REG1; \ - .previous; \ + sllx REG1, 32, REG1; \ + or REG1, REG2, REG1; \ and TAG, (KERNEL_TSB4M_NENTRIES - 1), REG2; \ sllx REG2, 4, REG2; \ add REG1, REG2, REG2; \ diff --git a/arch/sparc/include/asm/visasm.h b/arch/sparc/include/asm/visasm.h index b26673759283..1f0aa2024e94 100644 --- a/arch/sparc/include/asm/visasm.h +++ b/arch/sparc/include/asm/visasm.h @@ -39,6 +39,14 @@ 297: wr %o5, FPRS_FEF, %fprs; \ 298: +#define VISEntryHalfFast(fail_label) \ + rd %fprs, %o5; \ + andcc %o5, FPRS_FEF, %g0; \ + be,pt %icc, 297f; \ + nop; \ + ba,a,pt %xcc, fail_label; \ +297: wr %o5, FPRS_FEF, %fprs; + #define VISExitHalf \ wr %o5, 0, %fprs; diff --git a/arch/sparc/kernel/cpu.c b/arch/sparc/kernel/cpu.c index 82a3a71c451e..dfad8b1aea9f 100644 --- a/arch/sparc/kernel/cpu.c +++ b/arch/sparc/kernel/cpu.c @@ -494,6 +494,18 @@ static void __init sun4v_cpu_probe(void) sparc_pmu_type = "niagara5"; break; + case SUN4V_CHIP_SPARC_M6: + sparc_cpu_type = "SPARC-M6"; + sparc_fpu_type = "SPARC-M6 integrated FPU"; + sparc_pmu_type = "sparc-m6"; + break; + + case SUN4V_CHIP_SPARC_M7: + sparc_cpu_type = "SPARC-M7"; + sparc_fpu_type = "SPARC-M7 integrated FPU"; + sparc_pmu_type = "sparc-m7"; + break; + case SUN4V_CHIP_SPARC64X: sparc_cpu_type = "SPARC64-X"; sparc_fpu_type = "SPARC64-X integrated FPU"; diff --git a/arch/sparc/kernel/cpumap.c b/arch/sparc/kernel/cpumap.c index de1c844dfabc..e69ec0e3f155 100644 --- a/arch/sparc/kernel/cpumap.c +++ b/arch/sparc/kernel/cpumap.c @@ -326,6 +326,8 @@ static int iterate_cpu(struct cpuinfo_tree *t, unsigned int root_index) case SUN4V_CHIP_NIAGARA3: case SUN4V_CHIP_NIAGARA4: case SUN4V_CHIP_NIAGARA5: + case SUN4V_CHIP_SPARC_M6: + case SUN4V_CHIP_SPARC_M7: case SUN4V_CHIP_SPARC64X: rover_inc_table = niagara_iterate_method; break; diff --git a/arch/sparc/kernel/ds.c b/arch/sparc/kernel/ds.c index dff60abbea01..f87a55d77094 100644 --- a/arch/sparc/kernel/ds.c +++ b/arch/sparc/kernel/ds.c @@ -1200,14 +1200,14 @@ static int ds_probe(struct vio_dev *vdev, const struct vio_device_id *id) ds_cfg.tx_irq = vdev->tx_irq; ds_cfg.rx_irq = vdev->rx_irq; - lp = ldc_alloc(vdev->channel_id, &ds_cfg, dp); + lp = ldc_alloc(vdev->channel_id, &ds_cfg, dp, "DS"); if (IS_ERR(lp)) { err = PTR_ERR(lp); goto out_free_ds_states; } dp->lp = lp; - err = ldc_bind(lp, "DS"); + err = ldc_bind(lp); if (err) goto out_free_ldc; diff --git a/arch/sparc/kernel/dtlb_prot.S b/arch/sparc/kernel/dtlb_prot.S index b2c2c5be281c..d668ca149e64 100644 --- a/arch/sparc/kernel/dtlb_prot.S +++ b/arch/sparc/kernel/dtlb_prot.S @@ -24,11 +24,11 @@ mov TLB_TAG_ACCESS, %g4 ! For reload of vaddr /* PROT ** ICACHE line 2: More real fault processing */ + ldxa [%g4] ASI_DMMU, %g5 ! Put tagaccess in %g5 bgu,pn %xcc, winfix_trampoline ! Yes, perform winfixup - ldxa [%g4] ASI_DMMU, %g5 ! Put tagaccess in %g5 - ba,pt %xcc, sparc64_realfault_common ! Nope, normal fault mov FAULT_CODE_DTLB | FAULT_CODE_WRITE, %g4 - nop + ba,pt %xcc, sparc64_realfault_common ! Nope, normal fault + nop nop nop nop diff --git a/arch/sparc/kernel/entry.h b/arch/sparc/kernel/entry.h index ebaba6167dd4..88d322b67fac 100644 --- a/arch/sparc/kernel/entry.h +++ b/arch/sparc/kernel/entry.h @@ -65,13 +65,10 @@ struct pause_patch_entry { extern struct pause_patch_entry __pause_3insn_patch, __pause_3insn_patch_end; -void __init per_cpu_patch(void); void sun4v_patch_1insn_range(struct sun4v_1insn_patch_entry *, struct sun4v_1insn_patch_entry *); void sun4v_patch_2insn_range(struct sun4v_2insn_patch_entry *, struct sun4v_2insn_patch_entry *); -void __init sun4v_patch(void); -void __init boot_cpu_id_too_large(int cpu); extern unsigned int dcache_parity_tl1_occurred; extern unsigned int icache_parity_tl1_occurred; diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index 452f04fe8da6..3d61fcae7ee3 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -427,6 +427,12 @@ sun4v_chip_type: cmp %g2, '5' be,pt %xcc, 5f mov SUN4V_CHIP_NIAGARA5, %g4 + cmp %g2, '6' + be,pt %xcc, 5f + mov SUN4V_CHIP_SPARC_M6, %g4 + cmp %g2, '7' + be,pt %xcc, 5f + mov SUN4V_CHIP_SPARC_M7, %g4 ba,pt %xcc, 49f nop @@ -585,6 +591,12 @@ niagara_tlb_fixup: cmp %g1, SUN4V_CHIP_NIAGARA5 be,pt %xcc, niagara4_patch nop + cmp %g1, SUN4V_CHIP_SPARC_M6 + be,pt %xcc, niagara4_patch + nop + cmp %g1, SUN4V_CHIP_SPARC_M7 + be,pt %xcc, niagara4_patch + nop call generic_patch_copyops nop @@ -660,14 +672,12 @@ tlb_fixup_done: sethi %hi(init_thread_union), %g6 or %g6, %lo(init_thread_union), %g6 ldx [%g6 + TI_TASK], %g4 - mov %sp, %l6 wr %g0, ASI_P, %asi mov 1, %g1 sllx %g1, THREAD_SHIFT, %g1 sub %g1, (STACKFRAME_SZ + STACK_BIAS), %g1 add %g6, %g1, %sp - mov 0, %fp /* Set per-cpu pointer initially to zero, this makes * the boot-cpu use the in-kernel-image per-cpu areas @@ -694,44 +704,14 @@ tlb_fixup_done: nop #endif - mov %l6, %o1 ! OpenPROM stack call prom_init mov %l7, %o0 ! OpenPROM cif handler - /* Initialize current_thread_info()->cpu as early as possible. - * In order to do that accurately we have to patch up the get_cpuid() - * assembler sequences. And that, in turn, requires that we know - * if we are on a Starfire box or not. While we're here, patch up - * the sun4v sequences as well. + /* To create a one-register-window buffer between the kernel's + * initial stack and the last stack frame we use from the firmware, + * do the rest of the boot from a C helper function. */ - call check_if_starfire - nop - call per_cpu_patch - nop - call sun4v_patch - nop - -#ifdef CONFIG_SMP - call hard_smp_processor_id - nop - cmp %o0, NR_CPUS - blu,pt %xcc, 1f - nop - call boot_cpu_id_too_large - nop - /* Not reached... */ - -1: -#else - mov 0, %o0 -#endif - sth %o0, [%g6 + TI_CPU] - - call prom_init_report - nop - - /* Off we go.... */ - call start_kernel + call start_early_boot nop /* Not reached... */ diff --git a/arch/sparc/kernel/hvapi.c b/arch/sparc/kernel/hvapi.c index c0a2de0fd624..5c55145bfbf0 100644 --- a/arch/sparc/kernel/hvapi.c +++ b/arch/sparc/kernel/hvapi.c @@ -46,6 +46,7 @@ static struct api_info api_table[] = { { .group = HV_GRP_VF_CPU, }, { .group = HV_GRP_KT_CPU, }, { .group = HV_GRP_VT_CPU, }, + { .group = HV_GRP_T5_CPU, }, { .group = HV_GRP_DIAG, .flags = FLAG_PRE_API }, }; diff --git a/arch/sparc/kernel/hvcalls.S b/arch/sparc/kernel/hvcalls.S index f3ab509b76a8..caedf8320416 100644 --- a/arch/sparc/kernel/hvcalls.S +++ b/arch/sparc/kernel/hvcalls.S @@ -821,3 +821,19 @@ ENTRY(sun4v_vt_set_perfreg) retl nop ENDPROC(sun4v_vt_set_perfreg) + +ENTRY(sun4v_t5_get_perfreg) + mov %o1, %o4 + mov HV_FAST_T5_GET_PERFREG, %o5 + ta HV_FAST_TRAP + stx %o1, [%o4] + retl + nop +ENDPROC(sun4v_t5_get_perfreg) + +ENTRY(sun4v_t5_set_perfreg) + mov HV_FAST_T5_SET_PERFREG, %o5 + ta HV_FAST_TRAP + retl + nop +ENDPROC(sun4v_t5_set_perfreg) diff --git a/arch/sparc/kernel/hvtramp.S b/arch/sparc/kernel/hvtramp.S index b7ddcdd1dea9..cdbfec299f2f 100644 --- a/arch/sparc/kernel/hvtramp.S +++ b/arch/sparc/kernel/hvtramp.S @@ -109,7 +109,6 @@ hv_cpu_startup: sllx %g5, THREAD_SHIFT, %g5 sub %g5, (STACKFRAME_SZ + STACK_BIAS), %g5 add %g6, %g5, %sp - mov 0, %fp call init_irqwork_curcpu nop diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c index 7f08ec8a7c68..28fed53b13a0 100644 --- a/arch/sparc/kernel/ioport.c +++ b/arch/sparc/kernel/ioport.c @@ -278,7 +278,8 @@ static void *sbus_alloc_coherent(struct device *dev, size_t len, } order = get_order(len_total); - if ((va = __get_free_pages(GFP_KERNEL|__GFP_COMP, order)) == 0) + va = __get_free_pages(gfp, order); + if (va == 0) goto err_nopages; if ((res = kzalloc(sizeof(struct resource), GFP_KERNEL)) == NULL) @@ -443,7 +444,7 @@ static void *pci32_alloc_coherent(struct device *dev, size_t len, } order = get_order(len_total); - va = (void *) __get_free_pages(GFP_KERNEL, order); + va = (void *) __get_free_pages(gfp, order); if (va == NULL) { printk("pci_alloc_consistent: no %ld pages\n", len_total>>PAGE_SHIFT); goto err_nopages; diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c index 666193f4e8bb..4033c23bdfa6 100644 --- a/arch/sparc/kernel/irq_64.c +++ b/arch/sparc/kernel/irq_64.c @@ -47,8 +47,6 @@ #include "cpumap.h" #include "kstack.h" -#define NUM_IVECS (IMAP_INR + 1) - struct ino_bucket *ivector_table; unsigned long ivector_table_pa; @@ -107,55 +105,196 @@ static void bucket_set_irq(unsigned long bucket_pa, unsigned int irq) #define irq_work_pa(__cpu) &(trap_block[(__cpu)].irq_worklist_pa) -static struct { - unsigned int dev_handle; - unsigned int dev_ino; - unsigned int in_use; -} irq_table[NR_IRQS]; -static DEFINE_SPINLOCK(irq_alloc_lock); +static unsigned long hvirq_major __initdata; +static int __init early_hvirq_major(char *p) +{ + int rc = kstrtoul(p, 10, &hvirq_major); + + return rc; +} +early_param("hvirq", early_hvirq_major); + +static int hv_irq_version; + +/* Major version 2.0 of HV_GRP_INTR added support for the VIRQ cookie + * based interfaces, but: + * + * 1) Several OSs, Solaris and Linux included, use them even when only + * negotiating version 1.0 (or failing to negotiate at all). So the + * hypervisor has a workaround that provides the VIRQ interfaces even + * when only verion 1.0 of the API is in use. + * + * 2) Second, and more importantly, with major version 2.0 these VIRQ + * interfaces only were actually hooked up for LDC interrupts, even + * though the Hypervisor specification clearly stated: + * + * The new interrupt API functions will be available to a guest + * when it negotiates version 2.0 in the interrupt API group 0x2. When + * a guest negotiates version 2.0, all interrupt sources will only + * support using the cookie interface, and any attempt to use the + * version 1.0 interrupt APIs numbered 0xa0 to 0xa6 will result in the + * ENOTSUPPORTED error being returned. + * + * with an emphasis on "all interrupt sources". + * + * To correct this, major version 3.0 was created which does actually + * support VIRQs for all interrupt sources (not just LDC devices). So + * if we want to move completely over the cookie based VIRQs we must + * negotiate major version 3.0 or later of HV_GRP_INTR. + */ +static bool sun4v_cookie_only_virqs(void) +{ + if (hv_irq_version >= 3) + return true; + return false; +} -unsigned char irq_alloc(unsigned int dev_handle, unsigned int dev_ino) +static void __init irq_init_hv(void) { - unsigned long flags; - unsigned char ent; + unsigned long hv_error, major, minor = 0; + + if (tlb_type != hypervisor) + return; - BUILD_BUG_ON(NR_IRQS >= 256); + if (hvirq_major) + major = hvirq_major; + else + major = 3; - spin_lock_irqsave(&irq_alloc_lock, flags); + hv_error = sun4v_hvapi_register(HV_GRP_INTR, major, &minor); + if (!hv_error) + hv_irq_version = major; + else + hv_irq_version = 1; - for (ent = 1; ent < NR_IRQS; ent++) { - if (!irq_table[ent].in_use) + pr_info("SUN4V: Using IRQ API major %d, cookie only virqs %s\n", + hv_irq_version, + sun4v_cookie_only_virqs() ? "enabled" : "disabled"); +} + +/* This function is for the timer interrupt.*/ +int __init arch_probe_nr_irqs(void) +{ + return 1; +} + +#define DEFAULT_NUM_IVECS (0xfffU) +static unsigned int nr_ivec = DEFAULT_NUM_IVECS; +#define NUM_IVECS (nr_ivec) + +static unsigned int __init size_nr_ivec(void) +{ + if (tlb_type == hypervisor) { + switch (sun4v_chip_type) { + /* Athena's devhandle|devino is large.*/ + case SUN4V_CHIP_SPARC64X: + nr_ivec = 0xffff; break; + } } - if (ent >= NR_IRQS) { - printk(KERN_ERR "IRQ: Out of virtual IRQs.\n"); - ent = 0; - } else { - irq_table[ent].dev_handle = dev_handle; - irq_table[ent].dev_ino = dev_ino; - irq_table[ent].in_use = 1; - } + return nr_ivec; +} + +struct irq_handler_data { + union { + struct { + unsigned int dev_handle; + unsigned int dev_ino; + }; + unsigned long sysino; + }; + struct ino_bucket bucket; + unsigned long iclr; + unsigned long imap; +}; + +static inline unsigned int irq_data_to_handle(struct irq_data *data) +{ + struct irq_handler_data *ihd = data->handler_data; + + return ihd->dev_handle; +} + +static inline unsigned int irq_data_to_ino(struct irq_data *data) +{ + struct irq_handler_data *ihd = data->handler_data; - spin_unlock_irqrestore(&irq_alloc_lock, flags); + return ihd->dev_ino; +} + +static inline unsigned long irq_data_to_sysino(struct irq_data *data) +{ + struct irq_handler_data *ihd = data->handler_data; - return ent; + return ihd->sysino; } -#ifdef CONFIG_PCI_MSI void irq_free(unsigned int irq) { - unsigned long flags; + void *data = irq_get_handler_data(irq); - if (irq >= NR_IRQS) - return; + kfree(data); + irq_set_handler_data(irq, NULL); + irq_free_descs(irq, 1); +} - spin_lock_irqsave(&irq_alloc_lock, flags); +unsigned int irq_alloc(unsigned int dev_handle, unsigned int dev_ino) +{ + int irq; - irq_table[irq].in_use = 0; + irq = __irq_alloc_descs(-1, 1, 1, numa_node_id(), NULL); + if (irq <= 0) + goto out; - spin_unlock_irqrestore(&irq_alloc_lock, flags); + return irq; +out: + return 0; +} + +static unsigned int cookie_exists(u32 devhandle, unsigned int devino) +{ + unsigned long hv_err, cookie; + struct ino_bucket *bucket; + unsigned int irq = 0U; + + hv_err = sun4v_vintr_get_cookie(devhandle, devino, &cookie); + if (hv_err) { + pr_err("HV get cookie failed hv_err = %ld\n", hv_err); + goto out; + } + + if (cookie & ((1UL << 63UL))) { + cookie = ~cookie; + bucket = (struct ino_bucket *) __va(cookie); + irq = bucket->__irq; + } +out: + return irq; +} + +static unsigned int sysino_exists(u32 devhandle, unsigned int devino) +{ + unsigned long sysino = sun4v_devino_to_sysino(devhandle, devino); + struct ino_bucket *bucket; + unsigned int irq; + + bucket = &ivector_table[sysino]; + irq = bucket_get_irq(__pa(bucket)); + + return irq; +} + +void ack_bad_irq(unsigned int irq) +{ + pr_crit("BAD IRQ ack %d\n", irq); +} + +void irq_install_pre_handler(int irq, + void (*func)(unsigned int, void *, void *), + void *arg1, void *arg2) +{ + pr_warn("IRQ pre handler NOT supported.\n"); } -#endif /* * /proc/interrupts printing: @@ -206,15 +345,6 @@ static unsigned int sun4u_compute_tid(unsigned long imap, unsigned long cpuid) return tid; } -struct irq_handler_data { - unsigned long iclr; - unsigned long imap; - - void (*pre_handler)(unsigned int, void *, void *); - void *arg1; - void *arg2; -}; - #ifdef CONFIG_SMP static int irq_choose_cpu(unsigned int irq, const struct cpumask *affinity) { @@ -316,8 +446,8 @@ static void sun4u_irq_eoi(struct irq_data *data) static void sun4v_irq_enable(struct irq_data *data) { - unsigned int ino = irq_table[data->irq].dev_ino; unsigned long cpuid = irq_choose_cpu(data->irq, data->affinity); + unsigned int ino = irq_data_to_sysino(data); int err; err = sun4v_intr_settarget(ino, cpuid); @@ -337,8 +467,8 @@ static void sun4v_irq_enable(struct irq_data *data) static int sun4v_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) { - unsigned int ino = irq_table[data->irq].dev_ino; unsigned long cpuid = irq_choose_cpu(data->irq, mask); + unsigned int ino = irq_data_to_sysino(data); int err; err = sun4v_intr_settarget(ino, cpuid); @@ -351,7 +481,7 @@ static int sun4v_set_affinity(struct irq_data *data, static void sun4v_irq_disable(struct irq_data *data) { - unsigned int ino = irq_table[data->irq].dev_ino; + unsigned int ino = irq_data_to_sysino(data); int err; err = sun4v_intr_setenabled(ino, HV_INTR_DISABLED); @@ -362,7 +492,7 @@ static void sun4v_irq_disable(struct irq_data *data) static void sun4v_irq_eoi(struct irq_data *data) { - unsigned int ino = irq_table[data->irq].dev_ino; + unsigned int ino = irq_data_to_sysino(data); int err; err = sun4v_intr_setstate(ino, HV_INTR_STATE_IDLE); @@ -373,14 +503,13 @@ static void sun4v_irq_eoi(struct irq_data *data) static void sun4v_virq_enable(struct irq_data *data) { - unsigned long cpuid, dev_handle, dev_ino; + unsigned long dev_handle = irq_data_to_handle(data); + unsigned long dev_ino = irq_data_to_ino(data); + unsigned long cpuid; int err; cpuid = irq_choose_cpu(data->irq, data->affinity); - dev_handle = irq_table[data->irq].dev_handle; - dev_ino = irq_table[data->irq].dev_ino; - err = sun4v_vintr_set_target(dev_handle, dev_ino, cpuid); if (err != HV_EOK) printk(KERN_ERR "sun4v_vintr_set_target(%lx,%lx,%lu): " @@ -403,14 +532,13 @@ static void sun4v_virq_enable(struct irq_data *data) static int sun4v_virt_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) { - unsigned long cpuid, dev_handle, dev_ino; + unsigned long dev_handle = irq_data_to_handle(data); + unsigned long dev_ino = irq_data_to_ino(data); + unsigned long cpuid; int err; cpuid = irq_choose_cpu(data->irq, mask); - dev_handle = irq_table[data->irq].dev_handle; - dev_ino = irq_table[data->irq].dev_ino; - err = sun4v_vintr_set_target(dev_handle, dev_ino, cpuid); if (err != HV_EOK) printk(KERN_ERR "sun4v_vintr_set_target(%lx,%lx,%lu): " @@ -422,11 +550,10 @@ static int sun4v_virt_set_affinity(struct irq_data *data, static void sun4v_virq_disable(struct irq_data *data) { - unsigned long dev_handle, dev_ino; + unsigned long dev_handle = irq_data_to_handle(data); + unsigned long dev_ino = irq_data_to_ino(data); int err; - dev_handle = irq_table[data->irq].dev_handle; - dev_ino = irq_table[data->irq].dev_ino; err = sun4v_vintr_set_valid(dev_handle, dev_ino, HV_INTR_DISABLED); @@ -438,12 +565,10 @@ static void sun4v_virq_disable(struct irq_data *data) static void sun4v_virq_eoi(struct irq_data *data) { - unsigned long dev_handle, dev_ino; + unsigned long dev_handle = irq_data_to_handle(data); + unsigned long dev_ino = irq_data_to_ino(data); int err; - dev_handle = irq_table[data->irq].dev_handle; - dev_ino = irq_table[data->irq].dev_ino; - err = sun4v_vintr_set_state(dev_handle, dev_ino, HV_INTR_STATE_IDLE); if (err != HV_EOK) @@ -479,31 +604,10 @@ static struct irq_chip sun4v_virq = { .flags = IRQCHIP_EOI_IF_HANDLED, }; -static void pre_flow_handler(struct irq_data *d) -{ - struct irq_handler_data *handler_data = irq_data_get_irq_handler_data(d); - unsigned int ino = irq_table[d->irq].dev_ino; - - handler_data->pre_handler(ino, handler_data->arg1, handler_data->arg2); -} - -void irq_install_pre_handler(int irq, - void (*func)(unsigned int, void *, void *), - void *arg1, void *arg2) -{ - struct irq_handler_data *handler_data = irq_get_handler_data(irq); - - handler_data->pre_handler = func; - handler_data->arg1 = arg1; - handler_data->arg2 = arg2; - - __irq_set_preflow_handler(irq, pre_flow_handler); -} - unsigned int build_irq(int inofixup, unsigned long iclr, unsigned long imap) { - struct ino_bucket *bucket; struct irq_handler_data *handler_data; + struct ino_bucket *bucket; unsigned int irq; int ino; @@ -537,119 +641,166 @@ out: return irq; } -static unsigned int sun4v_build_common(unsigned long sysino, - struct irq_chip *chip) +static unsigned int sun4v_build_common(u32 devhandle, unsigned int devino, + void (*handler_data_init)(struct irq_handler_data *data, + u32 devhandle, unsigned int devino), + struct irq_chip *chip) { - struct ino_bucket *bucket; - struct irq_handler_data *handler_data; + struct irq_handler_data *data; unsigned int irq; - BUG_ON(tlb_type != hypervisor); + irq = irq_alloc(devhandle, devino); + if (!irq) + goto out; - bucket = &ivector_table[sysino]; - irq = bucket_get_irq(__pa(bucket)); - if (!irq) { - irq = irq_alloc(0, sysino); - bucket_set_irq(__pa(bucket), irq); - irq_set_chip_and_handler_name(irq, chip, handle_fasteoi_irq, - "IVEC"); + data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC); + if (unlikely(!data)) { + pr_err("IRQ handler data allocation failed.\n"); + irq_free(irq); + irq = 0; + goto out; } - handler_data = irq_get_handler_data(irq); - if (unlikely(handler_data)) - goto out; + irq_set_handler_data(irq, data); + handler_data_init(data, devhandle, devino); + irq_set_chip_and_handler_name(irq, chip, handle_fasteoi_irq, "IVEC"); + data->imap = ~0UL; + data->iclr = ~0UL; +out: + return irq; +} - handler_data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC); - if (unlikely(!handler_data)) { - prom_printf("IRQ: kzalloc(irq_handler_data) failed.\n"); - prom_halt(); - } - irq_set_handler_data(irq, handler_data); +static unsigned long cookie_assign(unsigned int irq, u32 devhandle, + unsigned int devino) +{ + struct irq_handler_data *ihd = irq_get_handler_data(irq); + unsigned long hv_error, cookie; - /* Catch accidental accesses to these things. IMAP/ICLR handling - * is done by hypervisor calls on sun4v platforms, not by direct - * register accesses. + /* handler_irq needs to find the irq. cookie is seen signed in + * sun4v_dev_mondo and treated as a non ivector_table delivery. */ - handler_data->imap = ~0UL; - handler_data->iclr = ~0UL; + ihd->bucket.__irq = irq; + cookie = ~__pa(&ihd->bucket); -out: - return irq; + hv_error = sun4v_vintr_set_cookie(devhandle, devino, cookie); + if (hv_error) + pr_err("HV vintr set cookie failed = %ld\n", hv_error); + + return hv_error; } -unsigned int sun4v_build_irq(u32 devhandle, unsigned int devino) +static void cookie_handler_data(struct irq_handler_data *data, + u32 devhandle, unsigned int devino) { - unsigned long sysino = sun4v_devino_to_sysino(devhandle, devino); + data->dev_handle = devhandle; + data->dev_ino = devino; +} - return sun4v_build_common(sysino, &sun4v_irq); +static unsigned int cookie_build_irq(u32 devhandle, unsigned int devino, + struct irq_chip *chip) +{ + unsigned long hv_error; + unsigned int irq; + + irq = sun4v_build_common(devhandle, devino, cookie_handler_data, chip); + + hv_error = cookie_assign(irq, devhandle, devino); + if (hv_error) { + irq_free(irq); + irq = 0; + } + + return irq; } -unsigned int sun4v_build_virq(u32 devhandle, unsigned int devino) +static unsigned int sun4v_build_cookie(u32 devhandle, unsigned int devino) { - struct irq_handler_data *handler_data; - unsigned long hv_err, cookie; - struct ino_bucket *bucket; unsigned int irq; - bucket = kzalloc(sizeof(struct ino_bucket), GFP_ATOMIC); - if (unlikely(!bucket)) - return 0; + irq = cookie_exists(devhandle, devino); + if (irq) + goto out; - /* The only reference we store to the IRQ bucket is - * by physical address which kmemleak can't see, tell - * it that this object explicitly is not a leak and - * should be scanned. - */ - kmemleak_not_leak(bucket); + irq = cookie_build_irq(devhandle, devino, &sun4v_virq); - __flush_dcache_range((unsigned long) bucket, - ((unsigned long) bucket + - sizeof(struct ino_bucket))); +out: + return irq; +} - irq = irq_alloc(devhandle, devino); +static void sysino_set_bucket(unsigned int irq) +{ + struct irq_handler_data *ihd = irq_get_handler_data(irq); + struct ino_bucket *bucket; + unsigned long sysino; + + sysino = sun4v_devino_to_sysino(ihd->dev_handle, ihd->dev_ino); + BUG_ON(sysino >= nr_ivec); + bucket = &ivector_table[sysino]; bucket_set_irq(__pa(bucket), irq); +} - irq_set_chip_and_handler_name(irq, &sun4v_virq, handle_fasteoi_irq, - "IVEC"); +static void sysino_handler_data(struct irq_handler_data *data, + u32 devhandle, unsigned int devino) +{ + unsigned long sysino; - handler_data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC); - if (unlikely(!handler_data)) - return 0; + sysino = sun4v_devino_to_sysino(devhandle, devino); + data->sysino = sysino; +} - /* In order to make the LDC channel startup sequence easier, - * especially wrt. locking, we do not let request_irq() enable - * the interrupt. - */ - irq_set_status_flags(irq, IRQ_NOAUTOEN); - irq_set_handler_data(irq, handler_data); +static unsigned int sysino_build_irq(u32 devhandle, unsigned int devino, + struct irq_chip *chip) +{ + unsigned int irq; - /* Catch accidental accesses to these things. IMAP/ICLR handling - * is done by hypervisor calls on sun4v platforms, not by direct - * register accesses. - */ - handler_data->imap = ~0UL; - handler_data->iclr = ~0UL; + irq = sun4v_build_common(devhandle, devino, sysino_handler_data, chip); + if (!irq) + goto out; - cookie = ~__pa(bucket); - hv_err = sun4v_vintr_set_cookie(devhandle, devino, cookie); - if (hv_err) { - prom_printf("IRQ: Fatal, cannot set cookie for [%x:%x] " - "err=%lu\n", devhandle, devino, hv_err); - prom_halt(); - } + sysino_set_bucket(irq); +out: + return irq; +} +static int sun4v_build_sysino(u32 devhandle, unsigned int devino) +{ + int irq; + + irq = sysino_exists(devhandle, devino); + if (irq) + goto out; + + irq = sysino_build_irq(devhandle, devino, &sun4v_irq); +out: return irq; } -void ack_bad_irq(unsigned int irq) +unsigned int sun4v_build_irq(u32 devhandle, unsigned int devino) { - unsigned int ino = irq_table[irq].dev_ino; + unsigned int irq; - if (!ino) - ino = 0xdeadbeef; + if (sun4v_cookie_only_virqs()) + irq = sun4v_build_cookie(devhandle, devino); + else + irq = sun4v_build_sysino(devhandle, devino); - printk(KERN_CRIT "Unexpected IRQ from ino[%x] irq[%u]\n", - ino, irq); + return irq; +} + +unsigned int sun4v_build_virq(u32 devhandle, unsigned int devino) +{ + int irq; + + irq = cookie_build_irq(devhandle, devino, &sun4v_virq); + if (!irq) + goto out; + + /* This is borrowed from the original function. + */ + irq_set_status_flags(irq, IRQ_NOAUTOEN); + +out: + return irq; } void *hardirq_stack[NR_CPUS]; @@ -720,9 +871,12 @@ void fixup_irqs(void) for (irq = 0; irq < NR_IRQS; irq++) { struct irq_desc *desc = irq_to_desc(irq); - struct irq_data *data = irq_desc_get_irq_data(desc); + struct irq_data *data; unsigned long flags; + if (!desc) + continue; + data = irq_desc_get_irq_data(desc); raw_spin_lock_irqsave(&desc->lock, flags); if (desc->action && !irqd_is_per_cpu(data)) { if (data->chip->irq_set_affinity) @@ -922,16 +1076,22 @@ static struct irqaction timer_irq_action = { .name = "timer", }; -/* Only invoked on boot processor. */ -void __init init_IRQ(void) +static void __init irq_ivector_init(void) { - unsigned long size; + unsigned long size, order; + unsigned int ivecs; - map_prom_timers(); - kill_prom_timer(); + /* If we are doing cookie only VIRQs then we do not need the ivector + * table to process interrupts. + */ + if (sun4v_cookie_only_virqs()) + return; - size = sizeof(struct ino_bucket) * NUM_IVECS; - ivector_table = kzalloc(size, GFP_KERNEL); + ivecs = size_nr_ivec(); + size = sizeof(struct ino_bucket) * ivecs; + order = get_order(size); + ivector_table = (struct ino_bucket *) + __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); if (!ivector_table) { prom_printf("Fatal error, cannot allocate ivector_table\n"); prom_halt(); @@ -940,6 +1100,15 @@ void __init init_IRQ(void) ((unsigned long) ivector_table) + size); ivector_table_pa = __pa(ivector_table); +} + +/* Only invoked on boot processor.*/ +void __init init_IRQ(void) +{ + irq_init_hv(); + irq_ivector_init(); + map_prom_timers(); + kill_prom_timer(); if (tlb_type == hypervisor) sun4v_init_mondo_queues(); diff --git a/arch/sparc/kernel/ktlb.S b/arch/sparc/kernel/ktlb.S index 605d49204580..ef0d8e9e1210 100644 --- a/arch/sparc/kernel/ktlb.S +++ b/arch/sparc/kernel/ktlb.S @@ -47,14 +47,6 @@ kvmap_itlb_vmalloc_addr: KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_itlb_longpath) TSB_LOCK_TAG(%g1, %g2, %g7) - - /* Load and check PTE. */ - ldxa [%g5] ASI_PHYS_USE_EC, %g5 - mov 1, %g7 - sllx %g7, TSB_TAG_INVALID_BIT, %g7 - brgez,a,pn %g5, kvmap_itlb_longpath - TSB_STORE(%g1, %g7) - TSB_WRITE(%g1, %g5, %g6) /* fallthrough to TLB load */ @@ -118,6 +110,12 @@ kvmap_dtlb_obp: ba,pt %xcc, kvmap_dtlb_load nop +kvmap_linear_early: + sethi %hi(kern_linear_pte_xor), %g7 + ldx [%g7 + %lo(kern_linear_pte_xor)], %g2 + ba,pt %xcc, kvmap_dtlb_tsb4m_load + xor %g2, %g4, %g5 + .align 32 kvmap_dtlb_tsb4m_load: TSB_LOCK_TAG(%g1, %g2, %g7) @@ -146,105 +144,17 @@ kvmap_dtlb_4v: /* Correct TAG_TARGET is already in %g6, check 4mb TSB. */ KERN_TSB4M_LOOKUP_TL1(%g6, %g5, %g1, %g2, %g3, kvmap_dtlb_load) #endif - /* TSB entry address left in %g1, lookup linear PTE. - * Must preserve %g1 and %g6 (TAG). - */ -kvmap_dtlb_tsb4m_miss: - /* Clear the PAGE_OFFSET top virtual bits, shift - * down to get PFN, and make sure PFN is in range. - */ -661: sllx %g4, 0, %g5 - .section .page_offset_shift_patch, "ax" - .word 661b - .previous - - /* Check to see if we know about valid memory at the 4MB - * chunk this physical address will reside within. + /* Linear mapping TSB lookup failed. Fallthrough to kernel + * page table based lookup. */ -661: srlx %g5, MAX_PHYS_ADDRESS_BITS, %g2 - .section .page_offset_shift_patch, "ax" - .word 661b - .previous - - brnz,pn %g2, kvmap_dtlb_longpath - nop - - /* This unconditional branch and delay-slot nop gets patched - * by the sethi sequence once the bitmap is properly setup. - */ - .globl valid_addr_bitmap_insn -valid_addr_bitmap_insn: - ba,pt %xcc, 2f - nop - .subsection 2 - .globl valid_addr_bitmap_patch -valid_addr_bitmap_patch: - sethi %hi(sparc64_valid_addr_bitmap), %g7 - or %g7, %lo(sparc64_valid_addr_bitmap), %g7 - .previous - -661: srlx %g5, ILOG2_4MB, %g2 - .section .page_offset_shift_patch, "ax" - .word 661b - .previous - - srlx %g2, 6, %g5 - and %g2, 63, %g2 - sllx %g5, 3, %g5 - ldx [%g7 + %g5], %g5 - mov 1, %g7 - sllx %g7, %g2, %g7 - andcc %g5, %g7, %g0 - be,pn %xcc, kvmap_dtlb_longpath - -2: sethi %hi(kpte_linear_bitmap), %g2 - - /* Get the 256MB physical address index. */ -661: sllx %g4, 0, %g5 - .section .page_offset_shift_patch, "ax" - .word 661b - .previous - - or %g2, %lo(kpte_linear_bitmap), %g2 - -661: srlx %g5, ILOG2_256MB, %g5 - .section .page_offset_shift_patch, "ax" - .word 661b - .previous - - and %g5, (32 - 1), %g7 - - /* Divide by 32 to get the offset into the bitmask. */ - srlx %g5, 5, %g5 - add %g7, %g7, %g7 - sllx %g5, 3, %g5 - - /* kern_linear_pte_xor[(mask >> shift) & 3)] */ - ldx [%g2 + %g5], %g2 - srlx %g2, %g7, %g7 - sethi %hi(kern_linear_pte_xor), %g5 - and %g7, 3, %g7 - or %g5, %lo(kern_linear_pte_xor), %g5 - sllx %g7, 3, %g7 - ldx [%g5 + %g7], %g2 - .globl kvmap_linear_patch kvmap_linear_patch: - ba,pt %xcc, kvmap_dtlb_tsb4m_load - xor %g2, %g4, %g5 + ba,a,pt %xcc, kvmap_linear_early kvmap_dtlb_vmalloc_addr: KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_dtlb_longpath) TSB_LOCK_TAG(%g1, %g2, %g7) - - /* Load and check PTE. */ - ldxa [%g5] ASI_PHYS_USE_EC, %g5 - mov 1, %g7 - sllx %g7, TSB_TAG_INVALID_BIT, %g7 - brgez,a,pn %g5, kvmap_dtlb_longpath - TSB_STORE(%g1, %g7) - TSB_WRITE(%g1, %g5, %g6) /* fallthrough to TLB load */ @@ -276,13 +186,8 @@ kvmap_dtlb_load: #ifdef CONFIG_SPARSEMEM_VMEMMAP kvmap_vmemmap: - sub %g4, %g5, %g5 - srlx %g5, ILOG2_4MB, %g5 - sethi %hi(vmemmap_table), %g1 - sllx %g5, 3, %g5 - or %g1, %lo(vmemmap_table), %g1 - ba,pt %xcc, kvmap_dtlb_load - ldx [%g1 + %g5], %g5 + KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_dtlb_longpath) + ba,a,pt %xcc, kvmap_dtlb_load #endif kvmap_dtlb_nonlinear: @@ -294,8 +199,8 @@ kvmap_dtlb_nonlinear: #ifdef CONFIG_SPARSEMEM_VMEMMAP /* Do not use the TSB for vmemmap. */ - mov (VMEMMAP_BASE >> 40), %g5 - sllx %g5, 40, %g5 + sethi %hi(VMEMMAP_BASE), %g5 + ldx [%g5 + %lo(VMEMMAP_BASE)], %g5 cmp %g4,%g5 bgeu,pn %xcc, kvmap_vmemmap nop @@ -307,8 +212,8 @@ kvmap_dtlb_tsbmiss: sethi %hi(MODULES_VADDR), %g5 cmp %g4, %g5 blu,pn %xcc, kvmap_dtlb_longpath - mov (VMALLOC_END >> 40), %g5 - sllx %g5, 40, %g5 + sethi %hi(VMALLOC_END), %g5 + ldx [%g5 + %lo(VMALLOC_END)], %g5 cmp %g4, %g5 bgeu,pn %xcc, kvmap_dtlb_longpath nop diff --git a/arch/sparc/kernel/ldc.c b/arch/sparc/kernel/ldc.c index 66dacd56bb10..27bb55485472 100644 --- a/arch/sparc/kernel/ldc.c +++ b/arch/sparc/kernel/ldc.c @@ -1078,7 +1078,8 @@ static void ldc_iommu_release(struct ldc_channel *lp) struct ldc_channel *ldc_alloc(unsigned long id, const struct ldc_channel_config *cfgp, - void *event_arg) + void *event_arg, + const char *name) { struct ldc_channel *lp; const struct ldc_mode_ops *mops; @@ -1093,6 +1094,8 @@ struct ldc_channel *ldc_alloc(unsigned long id, err = -EINVAL; if (!cfgp) goto out_err; + if (!name) + goto out_err; switch (cfgp->mode) { case LDC_MODE_RAW: @@ -1185,6 +1188,21 @@ struct ldc_channel *ldc_alloc(unsigned long id, INIT_HLIST_HEAD(&lp->mh_list); + snprintf(lp->rx_irq_name, LDC_IRQ_NAME_MAX, "%s RX", name); + snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name); + + err = request_irq(lp->cfg.rx_irq, ldc_rx, 0, + lp->rx_irq_name, lp); + if (err) + goto out_free_txq; + + err = request_irq(lp->cfg.tx_irq, ldc_tx, 0, + lp->tx_irq_name, lp); + if (err) { + free_irq(lp->cfg.rx_irq, lp); + goto out_free_txq; + } + return lp; out_free_txq: @@ -1237,31 +1255,14 @@ EXPORT_SYMBOL(ldc_free); * state. This does not initiate a handshake, ldc_connect() does * that. */ -int ldc_bind(struct ldc_channel *lp, const char *name) +int ldc_bind(struct ldc_channel *lp) { unsigned long hv_err, flags; int err = -EINVAL; - if (!name || - (lp->state != LDC_STATE_INIT)) + if (lp->state != LDC_STATE_INIT) return -EINVAL; - snprintf(lp->rx_irq_name, LDC_IRQ_NAME_MAX, "%s RX", name); - snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name); - - err = request_irq(lp->cfg.rx_irq, ldc_rx, 0, - lp->rx_irq_name, lp); - if (err) - return err; - - err = request_irq(lp->cfg.tx_irq, ldc_tx, 0, - lp->tx_irq_name, lp); - if (err) { - free_irq(lp->cfg.rx_irq, lp); - return err; - } - - spin_lock_irqsave(&lp->lock, flags); enable_irq(lp->cfg.rx_irq); diff --git a/arch/sparc/kernel/pcr.c b/arch/sparc/kernel/pcr.c index 269af58497aa..7e967c8018c8 100644 --- a/arch/sparc/kernel/pcr.c +++ b/arch/sparc/kernel/pcr.c @@ -191,12 +191,41 @@ static const struct pcr_ops n4_pcr_ops = { .pcr_nmi_disable = PCR_N4_PICNPT, }; +static u64 n5_pcr_read(unsigned long reg_num) +{ + unsigned long val; + + (void) sun4v_t5_get_perfreg(reg_num, &val); + + return val; +} + +static void n5_pcr_write(unsigned long reg_num, u64 val) +{ + (void) sun4v_t5_set_perfreg(reg_num, val); +} + +static const struct pcr_ops n5_pcr_ops = { + .read_pcr = n5_pcr_read, + .write_pcr = n5_pcr_write, + .read_pic = n4_pic_read, + .write_pic = n4_pic_write, + .nmi_picl_value = n4_picl_value, + .pcr_nmi_enable = (PCR_N4_PICNPT | PCR_N4_STRACE | + PCR_N4_UTRACE | PCR_N4_TOE | + (26 << PCR_N4_SL_SHIFT)), + .pcr_nmi_disable = PCR_N4_PICNPT, +}; + + static unsigned long perf_hsvc_group; static unsigned long perf_hsvc_major; static unsigned long perf_hsvc_minor; static int __init register_perf_hsvc(void) { + unsigned long hverror; + if (tlb_type == hypervisor) { switch (sun4v_chip_type) { case SUN4V_CHIP_NIAGARA1: @@ -215,6 +244,10 @@ static int __init register_perf_hsvc(void) perf_hsvc_group = HV_GRP_VT_CPU; break; + case SUN4V_CHIP_NIAGARA5: + perf_hsvc_group = HV_GRP_T5_CPU; + break; + default: return -ENODEV; } @@ -222,10 +255,12 @@ static int __init register_perf_hsvc(void) perf_hsvc_major = 1; perf_hsvc_minor = 0; - if (sun4v_hvapi_register(perf_hsvc_group, - perf_hsvc_major, - &perf_hsvc_minor)) { - printk("perfmon: Could not register hvapi.\n"); + hverror = sun4v_hvapi_register(perf_hsvc_group, + perf_hsvc_major, + &perf_hsvc_minor); + if (hverror) { + pr_err("perfmon: Could not register hvapi(0x%lx).\n", + hverror); return -ENODEV; } } @@ -254,6 +289,10 @@ static int __init setup_sun4v_pcr_ops(void) pcr_ops = &n4_pcr_ops; break; + case SUN4V_CHIP_NIAGARA5: + pcr_ops = &n5_pcr_ops; + break; + default: ret = -ENODEV; break; diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index d35c490a91cb..c9759ad3f34a 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -1662,7 +1662,8 @@ static bool __init supported_pmu(void) sparc_pmu = &niagara2_pmu; return true; } - if (!strcmp(sparc_pmu_type, "niagara4")) { + if (!strcmp(sparc_pmu_type, "niagara4") || + !strcmp(sparc_pmu_type, "niagara5")) { sparc_pmu = &niagara4_pmu; return true; } diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index 3fdb455e3318..61a519808cb7 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -174,7 +175,7 @@ char reboot_command[COMMAND_LINE_SIZE]; static struct pt_regs fake_swapper_regs = { { 0, }, 0, 0, 0, 0 }; -void __init per_cpu_patch(void) +static void __init per_cpu_patch(void) { struct cpuid_patch_entry *p; unsigned long ver; @@ -266,7 +267,7 @@ void sun4v_patch_2insn_range(struct sun4v_2insn_patch_entry *start, } } -void __init sun4v_patch(void) +static void __init sun4v_patch(void) { extern void sun4v_hvapi_init(void); @@ -335,14 +336,25 @@ static void __init pause_patch(void) } } -#ifdef CONFIG_SMP -void __init boot_cpu_id_too_large(int cpu) +void __init start_early_boot(void) { - prom_printf("Serious problem, boot cpu id (%d) >= NR_CPUS (%d)\n", - cpu, NR_CPUS); - prom_halt(); + int cpu; + + check_if_starfire(); + per_cpu_patch(); + sun4v_patch(); + + cpu = hard_smp_processor_id(); + if (cpu >= NR_CPUS) { + prom_printf("Serious problem, boot cpu id (%d) >= NR_CPUS (%d)\n", + cpu, NR_CPUS); + prom_halt(); + } + current_thread_info()->cpu = cpu; + + prom_init_report(); + start_kernel(); } -#endif /* On Ultra, we support all of the v8 capabilities. */ unsigned long sparc64_elf_hwcap = (HWCAP_SPARC_FLUSH | HWCAP_SPARC_STBAR | @@ -500,12 +512,16 @@ static void __init init_sparc64_elf_hwcap(void) sun4v_chip_type == SUN4V_CHIP_NIAGARA3 || sun4v_chip_type == SUN4V_CHIP_NIAGARA4 || sun4v_chip_type == SUN4V_CHIP_NIAGARA5 || + sun4v_chip_type == SUN4V_CHIP_SPARC_M6 || + sun4v_chip_type == SUN4V_CHIP_SPARC_M7 || sun4v_chip_type == SUN4V_CHIP_SPARC64X) cap |= HWCAP_SPARC_BLKINIT; if (sun4v_chip_type == SUN4V_CHIP_NIAGARA2 || sun4v_chip_type == SUN4V_CHIP_NIAGARA3 || sun4v_chip_type == SUN4V_CHIP_NIAGARA4 || sun4v_chip_type == SUN4V_CHIP_NIAGARA5 || + sun4v_chip_type == SUN4V_CHIP_SPARC_M6 || + sun4v_chip_type == SUN4V_CHIP_SPARC_M7 || sun4v_chip_type == SUN4V_CHIP_SPARC64X) cap |= HWCAP_SPARC_N2; } @@ -533,6 +549,8 @@ static void __init init_sparc64_elf_hwcap(void) sun4v_chip_type == SUN4V_CHIP_NIAGARA3 || sun4v_chip_type == SUN4V_CHIP_NIAGARA4 || sun4v_chip_type == SUN4V_CHIP_NIAGARA5 || + sun4v_chip_type == SUN4V_CHIP_SPARC_M6 || + sun4v_chip_type == SUN4V_CHIP_SPARC_M7 || sun4v_chip_type == SUN4V_CHIP_SPARC64X) cap |= (AV_SPARC_VIS | AV_SPARC_VIS2 | AV_SPARC_ASI_BLK_INIT | @@ -540,6 +558,8 @@ static void __init init_sparc64_elf_hwcap(void) if (sun4v_chip_type == SUN4V_CHIP_NIAGARA3 || sun4v_chip_type == SUN4V_CHIP_NIAGARA4 || sun4v_chip_type == SUN4V_CHIP_NIAGARA5 || + sun4v_chip_type == SUN4V_CHIP_SPARC_M6 || + sun4v_chip_type == SUN4V_CHIP_SPARC_M7 || sun4v_chip_type == SUN4V_CHIP_SPARC64X) cap |= (AV_SPARC_VIS3 | AV_SPARC_HPC | AV_SPARC_FMAF); diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index f7ba87543e5f..c9300bfaee5a 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1467,6 +1467,13 @@ static void __init pcpu_populate_pte(unsigned long addr) pud_t *pud; pmd_t *pmd; + if (pgd_none(*pgd)) { + pud_t *new; + + new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); + pgd_populate(&init_mm, pgd, new); + } + pud = pud_offset(pgd, addr); if (pud_none(*pud)) { pmd_t *new; diff --git a/arch/sparc/kernel/sun4v_tlb_miss.S b/arch/sparc/kernel/sun4v_tlb_miss.S index e0c09bf85610..6179e19bc9b9 100644 --- a/arch/sparc/kernel/sun4v_tlb_miss.S +++ b/arch/sparc/kernel/sun4v_tlb_miss.S @@ -195,6 +195,11 @@ sun4v_tsb_miss_common: ldx [%g2 + TRAP_PER_CPU_PGD_PADDR], %g7 sun4v_itlb_error: + rdpr %tl, %g1 + cmp %g1, 1 + ble,pt %icc, sun4v_bad_ra + or %g0, FAULT_CODE_BAD_RA | FAULT_CODE_ITLB, %g1 + sethi %hi(sun4v_err_itlb_vaddr), %g1 stx %g4, [%g1 + %lo(sun4v_err_itlb_vaddr)] sethi %hi(sun4v_err_itlb_ctx), %g1 @@ -206,15 +211,10 @@ sun4v_itlb_error: sethi %hi(sun4v_err_itlb_error), %g1 stx %o0, [%g1 + %lo(sun4v_err_itlb_error)] + sethi %hi(1f), %g7 rdpr %tl, %g4 - cmp %g4, 1 - ble,pt %icc, 1f - sethi %hi(2f), %g7 ba,pt %xcc, etraptl1 - or %g7, %lo(2f), %g7 - -1: ba,pt %xcc, etrap -2: or %g7, %lo(2b), %g7 +1: or %g7, %lo(1f), %g7 mov %l4, %o1 call sun4v_itlb_error_report add %sp, PTREGS_OFF, %o0 @@ -222,6 +222,11 @@ sun4v_itlb_error: /* NOTREACHED */ sun4v_dtlb_error: + rdpr %tl, %g1 + cmp %g1, 1 + ble,pt %icc, sun4v_bad_ra + or %g0, FAULT_CODE_BAD_RA | FAULT_CODE_DTLB, %g1 + sethi %hi(sun4v_err_dtlb_vaddr), %g1 stx %g4, [%g1 + %lo(sun4v_err_dtlb_vaddr)] sethi %hi(sun4v_err_dtlb_ctx), %g1 @@ -233,21 +238,23 @@ sun4v_dtlb_error: sethi %hi(sun4v_err_dtlb_error), %g1 stx %o0, [%g1 + %lo(sun4v_err_dtlb_error)] + sethi %hi(1f), %g7 rdpr %tl, %g4 - cmp %g4, 1 - ble,pt %icc, 1f - sethi %hi(2f), %g7 ba,pt %xcc, etraptl1 - or %g7, %lo(2f), %g7 - -1: ba,pt %xcc, etrap -2: or %g7, %lo(2b), %g7 +1: or %g7, %lo(1f), %g7 mov %l4, %o1 call sun4v_dtlb_error_report add %sp, PTREGS_OFF, %o0 /* NOTREACHED */ +sun4v_bad_ra: + or %g0, %g4, %g5 + ba,pt %xcc, sparc64_realfault_common + or %g1, %g0, %g4 + + /* NOTREACHED */ + /* Instruction Access Exception, tl0. */ sun4v_iacc: ldxa [%g0] ASI_SCRATCHPAD, %g2 diff --git a/arch/sparc/kernel/trampoline_64.S b/arch/sparc/kernel/trampoline_64.S index 737f8cbc7d56..88ede1d53b4c 100644 --- a/arch/sparc/kernel/trampoline_64.S +++ b/arch/sparc/kernel/trampoline_64.S @@ -109,10 +109,13 @@ startup_continue: brnz,pn %g1, 1b nop - sethi %hi(p1275buf), %g2 - or %g2, %lo(p1275buf), %g2 - ldx [%g2 + 0x10], %l2 - add %l2, -(192 + 128), %sp + /* Get onto temporary stack which will be in the locked + * kernel image. + */ + sethi %hi(tramp_stack), %g1 + or %g1, %lo(tramp_stack), %g1 + add %g1, TRAMP_STACK_SIZE, %g1 + sub %g1, STACKFRAME_SZ + STACK_BIAS + 256, %sp flushw /* Setup the loop variables: @@ -394,7 +397,6 @@ after_lock_tlb: sllx %g5, THREAD_SHIFT, %g5 sub %g5, (STACKFRAME_SZ + STACK_BIAS), %g5 add %g6, %g5, %sp - mov 0, %fp rdpr %pstate, %o1 or %o1, PSTATE_IE, %o1 diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c index fb6640ec8557..981a769b9558 100644 --- a/arch/sparc/kernel/traps_64.c +++ b/arch/sparc/kernel/traps_64.c @@ -2104,6 +2104,11 @@ void sun4v_nonresum_overflow(struct pt_regs *regs) atomic_inc(&sun4v_nonresum_oflow_cnt); } +static void sun4v_tlb_error(struct pt_regs *regs) +{ + die_if_kernel("TLB/TSB error", regs); +} + unsigned long sun4v_err_itlb_vaddr; unsigned long sun4v_err_itlb_ctx; unsigned long sun4v_err_itlb_pte; @@ -2111,8 +2116,7 @@ unsigned long sun4v_err_itlb_error; void sun4v_itlb_error_report(struct pt_regs *regs, int tl) { - if (tl > 1) - dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); + dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); printk(KERN_EMERG "SUN4V-ITLB: Error at TPC[%lx], tl %d\n", regs->tpc, tl); @@ -2125,7 +2129,7 @@ void sun4v_itlb_error_report(struct pt_regs *regs, int tl) sun4v_err_itlb_vaddr, sun4v_err_itlb_ctx, sun4v_err_itlb_pte, sun4v_err_itlb_error); - prom_halt(); + sun4v_tlb_error(regs); } unsigned long sun4v_err_dtlb_vaddr; @@ -2135,8 +2139,7 @@ unsigned long sun4v_err_dtlb_error; void sun4v_dtlb_error_report(struct pt_regs *regs, int tl) { - if (tl > 1) - dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); + dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); printk(KERN_EMERG "SUN4V-DTLB: Error at TPC[%lx], tl %d\n", regs->tpc, tl); @@ -2149,7 +2152,7 @@ void sun4v_dtlb_error_report(struct pt_regs *regs, int tl) sun4v_err_dtlb_vaddr, sun4v_err_dtlb_ctx, sun4v_err_dtlb_pte, sun4v_err_dtlb_error); - prom_halt(); + sun4v_tlb_error(regs); } void hypervisor_tlbop_error(unsigned long err, unsigned long op) diff --git a/arch/sparc/kernel/tsb.S b/arch/sparc/kernel/tsb.S index 14158d40ba76..be98685c14c6 100644 --- a/arch/sparc/kernel/tsb.S +++ b/arch/sparc/kernel/tsb.S @@ -162,10 +162,10 @@ tsb_miss_page_table_walk_sun4v_fastpath: nop .previous - rdpr %tl, %g3 - cmp %g3, 1 + rdpr %tl, %g7 + cmp %g7, 1 bne,pn %xcc, winfix_trampoline - nop + mov %g3, %g4 ba,pt %xcc, etrap rd %pc, %g7 call hugetlb_setup diff --git a/arch/sparc/kernel/viohs.c b/arch/sparc/kernel/viohs.c index f8e7dd53e1c7..9c5fbd0b8a04 100644 --- a/arch/sparc/kernel/viohs.c +++ b/arch/sparc/kernel/viohs.c @@ -714,7 +714,7 @@ int vio_ldc_alloc(struct vio_driver_state *vio, cfg.tx_irq = vio->vdev->tx_irq; cfg.rx_irq = vio->vdev->rx_irq; - lp = ldc_alloc(vio->vdev->channel_id, &cfg, event_arg); + lp = ldc_alloc(vio->vdev->channel_id, &cfg, event_arg, vio->name); if (IS_ERR(lp)) return PTR_ERR(lp); @@ -746,7 +746,7 @@ void vio_port_up(struct vio_driver_state *vio) err = 0; if (state == LDC_STATE_INIT) { - err = ldc_bind(vio->lp, vio->name); + err = ldc_bind(vio->lp); if (err) printk(KERN_WARNING "%s: Port %lu bind failed, " "err=%d\n", diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S index 932ff90fd760..09243057cb0b 100644 --- a/arch/sparc/kernel/vmlinux.lds.S +++ b/arch/sparc/kernel/vmlinux.lds.S @@ -35,8 +35,9 @@ jiffies = jiffies_64; SECTIONS { - /* swapper_low_pmd_dir is sparc64 only */ - swapper_low_pmd_dir = 0x0000000000402000; +#ifdef CONFIG_SPARC64 + swapper_pg_dir = 0x0000000000402000; +#endif . = INITIAL_ADDRESS; .text TEXTSTART : { @@ -122,11 +123,6 @@ SECTIONS *(.swapper_4m_tsb_phys_patch) __swapper_4m_tsb_phys_patch_end = .; } - .page_offset_shift_patch : { - __page_offset_shift_patch = .; - *(.page_offset_shift_patch) - __page_offset_shift_patch_end = .; - } .popc_3insn_patch : { __popc_3insn_patch = .; *(.popc_3insn_patch) diff --git a/arch/sparc/lib/NG4memcpy.S b/arch/sparc/lib/NG4memcpy.S index 9cf2ee01cee3..140527a20e7d 100644 --- a/arch/sparc/lib/NG4memcpy.S +++ b/arch/sparc/lib/NG4memcpy.S @@ -41,6 +41,10 @@ #endif #endif +#if !defined(EX_LD) && !defined(EX_ST) +#define NON_USER_COPY +#endif + #ifndef EX_LD #define EX_LD(x) x #endif @@ -197,9 +201,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ mov EX_RETVAL(%o3), %o0 .Llarge_src_unaligned: +#ifdef NON_USER_COPY + VISEntryHalfFast(.Lmedium_vis_entry_fail) +#else + VISEntryHalf +#endif andn %o2, 0x3f, %o4 sub %o2, %o4, %o2 - VISEntryHalf alignaddr %o1, %g0, %g1 add %o1, %o4, %o1 EX_LD(LOAD(ldd, %g1 + 0x00, %f0)) @@ -240,6 +248,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ nop ba,a,pt %icc, .Lmedium_unaligned +#ifdef NON_USER_COPY +.Lmedium_vis_entry_fail: + or %o0, %o1, %g2 +#endif .Lmedium: LOAD(prefetch, %o1 + 0x40, #n_reads_strong) andcc %g2, 0x7, %g0 diff --git a/arch/sparc/lib/memset.S b/arch/sparc/lib/memset.S index 99c017be8719..f75e6906df14 100644 --- a/arch/sparc/lib/memset.S +++ b/arch/sparc/lib/memset.S @@ -3,8 +3,9 @@ * Copyright (C) 1996,1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz) * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) * - * Returns 0, if ok, and number of bytes not yet set if exception - * occurs and we were called as clear_user. + * Calls to memset returns initial %o0. Calls to bzero returns 0, if ok, and + * number of bytes not yet set if exception occurs and we were called as + * clear_user. */ #include @@ -65,6 +66,8 @@ __bzero_begin: .globl __memset_start, __memset_end __memset_start: memset: + mov %o0, %g1 + mov 1, %g4 and %o1, 0xff, %g3 sll %g3, 8, %g2 or %g3, %g2, %g3 @@ -89,6 +92,7 @@ memset: sub %o0, %o2, %o0 __bzero: + clr %g4 mov %g0, %g3 1: cmp %o1, 7 @@ -151,8 +155,8 @@ __bzero: bne,a 8f EX(stb %g3, [%o0], and %o1, 1) 8: - retl - clr %o0 + b 0f + nop 7: be 13b orcc %o1, 0, %g0 @@ -164,6 +168,12 @@ __bzero: bne 8b EX(stb %g3, [%o0 - 1], add %o1, 1) 0: + andcc %g4, 1, %g0 + be 5f + nop + retl + mov %g1, %o0 +5: retl clr %o0 __memset_end: diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index 587cd0565128..18fcd7167095 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -346,6 +346,9 @@ retry: down_read(&mm->mmap_sem); } + if (fault_code & FAULT_CODE_BAD_RA) + goto do_sigbus; + vma = find_vma(mm, address); if (!vma) goto bad_area; diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c index 1aed0432c64b..ae6ce383d4df 100644 --- a/arch/sparc/mm/gup.c +++ b/arch/sparc/mm/gup.c @@ -160,6 +160,36 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, return 1; } +int __get_user_pages_fast(unsigned long start, int nr_pages, int write, + struct page **pages) +{ + struct mm_struct *mm = current->mm; + unsigned long addr, len, end; + unsigned long next, flags; + pgd_t *pgdp; + int nr = 0; + + start &= PAGE_MASK; + addr = start; + len = (unsigned long) nr_pages << PAGE_SHIFT; + end = start + len; + + local_irq_save(flags); + pgdp = pgd_offset(mm, addr); + do { + pgd_t pgd = *pgdp; + + next = pgd_addr_end(addr, end); + if (pgd_none(pgd)) + break; + if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) + break; + } while (pgdp++, addr = next, addr != end); + local_irq_restore(flags); + + return nr; +} + int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages) { diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 98ac8e80adae..04bc826135b4 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -75,7 +75,6 @@ unsigned long kern_linear_pte_xor[4] __read_mostly; * 'cpu' properties, but we need to have this table setup before the * MDESC is initialized. */ -unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)]; #ifndef CONFIG_DEBUG_PAGEALLOC /* A special kernel TSB for 4MB, 256MB, 2GB and 16GB linear mappings. @@ -84,10 +83,11 @@ unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)]; */ extern struct tsb swapper_4m_tsb[KERNEL_TSB4M_NENTRIES]; #endif +extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES]; static unsigned long cpu_pgsz_mask; -#define MAX_BANKS 32 +#define MAX_BANKS 1024 static struct linux_prom64_registers pavail[MAX_BANKS]; static int pavail_ents; @@ -165,10 +165,6 @@ static void __init read_obp_memory(const char *property, cmp_p64, NULL); } -unsigned long sparc64_valid_addr_bitmap[VALID_ADDR_BITMAP_BYTES / - sizeof(unsigned long)]; -EXPORT_SYMBOL(sparc64_valid_addr_bitmap); - /* Kernel physical address base and size in bytes. */ unsigned long kern_base __read_mostly; unsigned long kern_size __read_mostly; @@ -840,7 +836,10 @@ static int find_node(unsigned long addr) if ((addr & p->mask) == p->val) return i; } - return -1; + /* The following condition has been observed on LDOM guests.*/ + WARN_ONCE(1, "find_node: A physical address doesn't match a NUMA node" + " rule. Some physical memory will be owned by node 0."); + return 0; } static u64 memblock_nid_range(u64 start, u64 end, int *nid) @@ -1366,9 +1365,144 @@ static unsigned long __init bootmem_init(unsigned long phys_base) static struct linux_prom64_registers pall[MAX_BANKS] __initdata; static int pall_ents __initdata; -#ifdef CONFIG_DEBUG_PAGEALLOC +static unsigned long max_phys_bits = 40; + +bool kern_addr_valid(unsigned long addr) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + if ((long)addr < 0L) { + unsigned long pa = __pa(addr); + + if ((addr >> max_phys_bits) != 0UL) + return false; + + return pfn_valid(pa >> PAGE_SHIFT); + } + + if (addr >= (unsigned long) KERNBASE && + addr < (unsigned long)&_end) + return true; + + pgd = pgd_offset_k(addr); + if (pgd_none(*pgd)) + return 0; + + pud = pud_offset(pgd, addr); + if (pud_none(*pud)) + return 0; + + if (pud_large(*pud)) + return pfn_valid(pud_pfn(*pud)); + + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) + return 0; + + if (pmd_large(*pmd)) + return pfn_valid(pmd_pfn(*pmd)); + + pte = pte_offset_kernel(pmd, addr); + if (pte_none(*pte)) + return 0; + + return pfn_valid(pte_pfn(*pte)); +} +EXPORT_SYMBOL(kern_addr_valid); + +static unsigned long __ref kernel_map_hugepud(unsigned long vstart, + unsigned long vend, + pud_t *pud) +{ + const unsigned long mask16gb = (1UL << 34) - 1UL; + u64 pte_val = vstart; + + /* Each PUD is 8GB */ + if ((vstart & mask16gb) || + (vend - vstart <= mask16gb)) { + pte_val ^= kern_linear_pte_xor[2]; + pud_val(*pud) = pte_val | _PAGE_PUD_HUGE; + + return vstart + PUD_SIZE; + } + + pte_val ^= kern_linear_pte_xor[3]; + pte_val |= _PAGE_PUD_HUGE; + + vend = vstart + mask16gb + 1UL; + while (vstart < vend) { + pud_val(*pud) = pte_val; + + pte_val += PUD_SIZE; + vstart += PUD_SIZE; + pud++; + } + return vstart; +} + +static bool kernel_can_map_hugepud(unsigned long vstart, unsigned long vend, + bool guard) +{ + if (guard && !(vstart & ~PUD_MASK) && (vend - vstart) >= PUD_SIZE) + return true; + + return false; +} + +static unsigned long __ref kernel_map_hugepmd(unsigned long vstart, + unsigned long vend, + pmd_t *pmd) +{ + const unsigned long mask256mb = (1UL << 28) - 1UL; + const unsigned long mask2gb = (1UL << 31) - 1UL; + u64 pte_val = vstart; + + /* Each PMD is 8MB */ + if ((vstart & mask256mb) || + (vend - vstart <= mask256mb)) { + pte_val ^= kern_linear_pte_xor[0]; + pmd_val(*pmd) = pte_val | _PAGE_PMD_HUGE; + + return vstart + PMD_SIZE; + } + + if ((vstart & mask2gb) || + (vend - vstart <= mask2gb)) { + pte_val ^= kern_linear_pte_xor[1]; + pte_val |= _PAGE_PMD_HUGE; + vend = vstart + mask256mb + 1UL; + } else { + pte_val ^= kern_linear_pte_xor[2]; + pte_val |= _PAGE_PMD_HUGE; + vend = vstart + mask2gb + 1UL; + } + + while (vstart < vend) { + pmd_val(*pmd) = pte_val; + + pte_val += PMD_SIZE; + vstart += PMD_SIZE; + pmd++; + } + + return vstart; +} + +static bool kernel_can_map_hugepmd(unsigned long vstart, unsigned long vend, + bool guard) +{ + if (guard && !(vstart & ~PMD_MASK) && (vend - vstart) >= PMD_SIZE) + return true; + + return false; +} + static unsigned long __ref kernel_map_range(unsigned long pstart, - unsigned long pend, pgprot_t prot) + unsigned long pend, pgprot_t prot, + bool use_huge) { unsigned long vstart = PAGE_OFFSET + pstart; unsigned long vend = PAGE_OFFSET + pend; @@ -1387,19 +1521,34 @@ static unsigned long __ref kernel_map_range(unsigned long pstart, pmd_t *pmd; pte_t *pte; + if (pgd_none(*pgd)) { + pud_t *new; + + new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); + alloc_bytes += PAGE_SIZE; + pgd_populate(&init_mm, pgd, new); + } pud = pud_offset(pgd, vstart); if (pud_none(*pud)) { pmd_t *new; + if (kernel_can_map_hugepud(vstart, vend, use_huge)) { + vstart = kernel_map_hugepud(vstart, vend, pud); + continue; + } new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); alloc_bytes += PAGE_SIZE; pud_populate(&init_mm, pud, new); } pmd = pmd_offset(pud, vstart); - if (!pmd_present(*pmd)) { + if (pmd_none(*pmd)) { pte_t *new; + if (kernel_can_map_hugepmd(vstart, vend, use_huge)) { + vstart = kernel_map_hugepmd(vstart, vend, pmd); + continue; + } new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); alloc_bytes += PAGE_SIZE; pmd_populate_kernel(&init_mm, pmd, new); @@ -1422,100 +1571,34 @@ static unsigned long __ref kernel_map_range(unsigned long pstart, return alloc_bytes; } -extern unsigned int kvmap_linear_patch[1]; -#endif /* CONFIG_DEBUG_PAGEALLOC */ - -static void __init kpte_set_val(unsigned long index, unsigned long val) +static void __init flush_all_kernel_tsbs(void) { - unsigned long *ptr = kpte_linear_bitmap; - - val <<= ((index % (BITS_PER_LONG / 2)) * 2); - ptr += (index / (BITS_PER_LONG / 2)); - - *ptr |= val; -} - -static const unsigned long kpte_shift_min = 28; /* 256MB */ -static const unsigned long kpte_shift_max = 34; /* 16GB */ -static const unsigned long kpte_shift_incr = 3; - -static unsigned long kpte_mark_using_shift(unsigned long start, unsigned long end, - unsigned long shift) -{ - unsigned long size = (1UL << shift); - unsigned long mask = (size - 1UL); - unsigned long remains = end - start; - unsigned long val; - - if (remains < size || (start & mask)) - return start; - - /* VAL maps: - * - * shift 28 --> kern_linear_pte_xor index 1 - * shift 31 --> kern_linear_pte_xor index 2 - * shift 34 --> kern_linear_pte_xor index 3 - */ - val = ((shift - kpte_shift_min) / kpte_shift_incr) + 1; - - remains &= ~mask; - if (shift != kpte_shift_max) - remains = size; - - while (remains) { - unsigned long index = start >> kpte_shift_min; + int i; - kpte_set_val(index, val); + for (i = 0; i < KERNEL_TSB_NENTRIES; i++) { + struct tsb *ent = &swapper_tsb[i]; - start += 1UL << kpte_shift_min; - remains -= 1UL << kpte_shift_min; + ent->tag = (1UL << TSB_TAG_INVALID_BIT); } +#ifndef CONFIG_DEBUG_PAGEALLOC + for (i = 0; i < KERNEL_TSB4M_NENTRIES; i++) { + struct tsb *ent = &swapper_4m_tsb[i]; - return start; -} - -static void __init mark_kpte_bitmap(unsigned long start, unsigned long end) -{ - unsigned long smallest_size, smallest_mask; - unsigned long s; - - smallest_size = (1UL << kpte_shift_min); - smallest_mask = (smallest_size - 1UL); - - while (start < end) { - unsigned long orig_start = start; - - for (s = kpte_shift_max; s >= kpte_shift_min; s -= kpte_shift_incr) { - start = kpte_mark_using_shift(start, end, s); - - if (start != orig_start) - break; - } - - if (start == orig_start) - start = (start + smallest_size) & ~smallest_mask; + ent->tag = (1UL << TSB_TAG_INVALID_BIT); } +#endif } -static void __init init_kpte_bitmap(void) -{ - unsigned long i; - - for (i = 0; i < pall_ents; i++) { - unsigned long phys_start, phys_end; - - phys_start = pall[i].phys_addr; - phys_end = phys_start + pall[i].reg_size; - - mark_kpte_bitmap(phys_start, phys_end); - } -} +extern unsigned int kvmap_linear_patch[1]; static void __init kernel_physical_mapping_init(void) { -#ifdef CONFIG_DEBUG_PAGEALLOC unsigned long i, mem_alloced = 0UL; + bool use_huge = true; +#ifdef CONFIG_DEBUG_PAGEALLOC + use_huge = false; +#endif for (i = 0; i < pall_ents; i++) { unsigned long phys_start, phys_end; @@ -1523,7 +1606,7 @@ static void __init kernel_physical_mapping_init(void) phys_end = phys_start + pall[i].reg_size; mem_alloced += kernel_map_range(phys_start, phys_end, - PAGE_KERNEL); + PAGE_KERNEL, use_huge); } printk("Allocated %ld bytes for kernel page tables.\n", @@ -1532,8 +1615,9 @@ static void __init kernel_physical_mapping_init(void) kvmap_linear_patch[0] = 0x01000000; /* nop */ flushi(&kvmap_linear_patch[0]); + flush_all_kernel_tsbs(); + __flush_tlb_all(); -#endif } #ifdef CONFIG_DEBUG_PAGEALLOC @@ -1543,7 +1627,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable) unsigned long phys_end = phys_start + (numpages * PAGE_SIZE); kernel_map_range(phys_start, phys_end, - (enable ? PAGE_KERNEL : __pgprot(0))); + (enable ? PAGE_KERNEL : __pgprot(0)), false); flush_tsb_kernel_range(PAGE_OFFSET + phys_start, PAGE_OFFSET + phys_end); @@ -1571,76 +1655,56 @@ unsigned long __init find_ecache_flush_span(unsigned long size) unsigned long PAGE_OFFSET; EXPORT_SYMBOL(PAGE_OFFSET); -static void __init page_offset_shift_patch_one(unsigned int *insn, unsigned long phys_bits) -{ - unsigned long final_shift; - unsigned int val = *insn; - unsigned int cnt; - - /* We are patching in ilog2(max_supported_phys_address), and - * we are doing so in a manner similar to a relocation addend. - * That is, we are adding the shift value to whatever value - * is in the shift instruction count field already. - */ - cnt = (val & 0x3f); - val &= ~0x3f; - - /* If we are trying to shift >= 64 bits, clear the destination - * register. This can happen when phys_bits ends up being equal - * to MAX_PHYS_ADDRESS_BITS. - */ - final_shift = (cnt + (64 - phys_bits)); - if (final_shift >= 64) { - unsigned int rd = (val >> 25) & 0x1f; - - val = 0x80100000 | (rd << 25); - } else { - val |= final_shift; - } - *insn = val; - - __asm__ __volatile__("flush %0" - : /* no outputs */ - : "r" (insn)); -} - -static void __init page_offset_shift_patch(unsigned long phys_bits) -{ - extern unsigned int __page_offset_shift_patch; - extern unsigned int __page_offset_shift_patch_end; - unsigned int *p; - - p = &__page_offset_shift_patch; - while (p < &__page_offset_shift_patch_end) { - unsigned int *insn = (unsigned int *)(unsigned long)*p; +unsigned long VMALLOC_END = 0x0000010000000000UL; +EXPORT_SYMBOL(VMALLOC_END); - page_offset_shift_patch_one(insn, phys_bits); - - p++; - } -} +unsigned long sparc64_va_hole_top = 0xfffff80000000000UL; +unsigned long sparc64_va_hole_bottom = 0x0000080000000000UL; static void __init setup_page_offset(void) { - unsigned long max_phys_bits = 40; - if (tlb_type == cheetah || tlb_type == cheetah_plus) { + /* Cheetah/Panther support a full 64-bit virtual + * address, so we can use all that our page tables + * support. + */ + sparc64_va_hole_top = 0xfff0000000000000UL; + sparc64_va_hole_bottom = 0x0010000000000000UL; + max_phys_bits = 42; } else if (tlb_type == hypervisor) { switch (sun4v_chip_type) { case SUN4V_CHIP_NIAGARA1: case SUN4V_CHIP_NIAGARA2: + /* T1 and T2 support 48-bit virtual addresses. */ + sparc64_va_hole_top = 0xffff800000000000UL; + sparc64_va_hole_bottom = 0x0000800000000000UL; + max_phys_bits = 39; break; case SUN4V_CHIP_NIAGARA3: + /* T3 supports 48-bit virtual addresses. */ + sparc64_va_hole_top = 0xffff800000000000UL; + sparc64_va_hole_bottom = 0x0000800000000000UL; + max_phys_bits = 43; break; case SUN4V_CHIP_NIAGARA4: case SUN4V_CHIP_NIAGARA5: case SUN4V_CHIP_SPARC64X: - default: + case SUN4V_CHIP_SPARC_M6: + /* T4 and later support 52-bit virtual addresses. */ + sparc64_va_hole_top = 0xfff8000000000000UL; + sparc64_va_hole_bottom = 0x0008000000000000UL; max_phys_bits = 47; break; + case SUN4V_CHIP_SPARC_M7: + default: + /* M7 and later support 52-bit virtual addresses. */ + sparc64_va_hole_top = 0xfff8000000000000UL; + sparc64_va_hole_bottom = 0x0008000000000000UL; + max_phys_bits = 49; + break; } } @@ -1650,12 +1714,16 @@ static void __init setup_page_offset(void) prom_halt(); } - PAGE_OFFSET = PAGE_OFFSET_BY_BITS(max_phys_bits); + PAGE_OFFSET = sparc64_va_hole_top; + VMALLOC_END = ((sparc64_va_hole_bottom >> 1) + + (sparc64_va_hole_bottom >> 2)); - pr_info("PAGE_OFFSET is 0x%016lx (max_phys_bits == %lu)\n", + pr_info("MM: PAGE_OFFSET is 0x%016lx (max_phys_bits == %lu)\n", PAGE_OFFSET, max_phys_bits); - - page_offset_shift_patch(max_phys_bits); + pr_info("MM: VMALLOC [0x%016lx --> 0x%016lx]\n", + VMALLOC_START, VMALLOC_END); + pr_info("MM: VMEMMAP [0x%016lx --> 0x%016lx]\n", + VMEMMAP_BASE, VMEMMAP_BASE << 1); } static void __init tsb_phys_patch(void) @@ -1700,21 +1768,42 @@ static void __init tsb_phys_patch(void) #define NUM_KTSB_DESCR 1 #endif static struct hv_tsb_descr ktsb_descr[NUM_KTSB_DESCR]; -extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES]; + +/* The swapper TSBs are loaded with a base sequence of: + * + * sethi %uhi(SYMBOL), REG1 + * sethi %hi(SYMBOL), REG2 + * or REG1, %ulo(SYMBOL), REG1 + * or REG2, %lo(SYMBOL), REG2 + * sllx REG1, 32, REG1 + * or REG1, REG2, REG1 + * + * When we use physical addressing for the TSB accesses, we patch the + * first four instructions in the above sequence. + */ static void patch_one_ktsb_phys(unsigned int *start, unsigned int *end, unsigned long pa) { - pa >>= KTSB_PHYS_SHIFT; + unsigned long high_bits, low_bits; + + high_bits = (pa >> 32) & 0xffffffff; + low_bits = (pa >> 0) & 0xffffffff; while (start < end) { unsigned int *ia = (unsigned int *)(unsigned long)*start; - ia[0] = (ia[0] & ~0x3fffff) | (pa >> 10); + ia[0] = (ia[0] & ~0x3fffff) | (high_bits >> 10); __asm__ __volatile__("flush %0" : : "r" (ia)); - ia[1] = (ia[1] & ~0x3ff) | (pa & 0x3ff); + ia[1] = (ia[1] & ~0x3fffff) | (low_bits >> 10); __asm__ __volatile__("flush %0" : : "r" (ia + 1)); + ia[2] = (ia[2] & ~0x1fff) | (high_bits & 0x3ff); + __asm__ __volatile__("flush %0" : : "r" (ia + 2)); + + ia[3] = (ia[3] & ~0x1fff) | (low_bits & 0x3ff); + __asm__ __volatile__("flush %0" : : "r" (ia + 3)); + start++; } } @@ -1853,7 +1942,6 @@ static void __init sun4v_linear_pte_xor_finalize(void) /* paging_init() sets up the page tables */ static unsigned long last_valid_pfn; -pgd_t swapper_pg_dir[PTRS_PER_PGD]; static void sun4u_pgprot_init(void); static void sun4v_pgprot_init(void); @@ -1956,16 +2044,10 @@ void __init paging_init(void) */ init_mm.pgd += ((shift) / (sizeof(pgd_t))); - memset(swapper_low_pmd_dir, 0, sizeof(swapper_low_pmd_dir)); + memset(swapper_pg_dir, 0, sizeof(swapper_pg_dir)); - /* Now can init the kernel/bad page tables. */ - pud_set(pud_offset(&swapper_pg_dir[0], 0), - swapper_low_pmd_dir + (shift / sizeof(pgd_t))); - inherit_prom_mappings(); - init_kpte_bitmap(); - /* Ok, we can use our TLB miss and window trap handlers safely. */ setup_tba(); @@ -2072,70 +2154,6 @@ int page_in_phys_avail(unsigned long paddr) return 0; } -static struct linux_prom64_registers pavail_rescan[MAX_BANKS] __initdata; -static int pavail_rescan_ents __initdata; - -/* Certain OBP calls, such as fetching "available" properties, can - * claim physical memory. So, along with initializing the valid - * address bitmap, what we do here is refetch the physical available - * memory list again, and make sure it provides at least as much - * memory as 'pavail' does. - */ -static void __init setup_valid_addr_bitmap_from_pavail(unsigned long *bitmap) -{ - int i; - - read_obp_memory("available", &pavail_rescan[0], &pavail_rescan_ents); - - for (i = 0; i < pavail_ents; i++) { - unsigned long old_start, old_end; - - old_start = pavail[i].phys_addr; - old_end = old_start + pavail[i].reg_size; - while (old_start < old_end) { - int n; - - for (n = 0; n < pavail_rescan_ents; n++) { - unsigned long new_start, new_end; - - new_start = pavail_rescan[n].phys_addr; - new_end = new_start + - pavail_rescan[n].reg_size; - - if (new_start <= old_start && - new_end >= (old_start + PAGE_SIZE)) { - set_bit(old_start >> ILOG2_4MB, bitmap); - goto do_next_page; - } - } - - prom_printf("mem_init: Lost memory in pavail\n"); - prom_printf("mem_init: OLD start[%lx] size[%lx]\n", - pavail[i].phys_addr, - pavail[i].reg_size); - prom_printf("mem_init: NEW start[%lx] size[%lx]\n", - pavail_rescan[i].phys_addr, - pavail_rescan[i].reg_size); - prom_printf("mem_init: Cannot continue, aborting.\n"); - prom_halt(); - - do_next_page: - old_start += PAGE_SIZE; - } - } -} - -static void __init patch_tlb_miss_handler_bitmap(void) -{ - extern unsigned int valid_addr_bitmap_insn[]; - extern unsigned int valid_addr_bitmap_patch[]; - - valid_addr_bitmap_insn[1] = valid_addr_bitmap_patch[1]; - mb(); - valid_addr_bitmap_insn[0] = valid_addr_bitmap_patch[0]; - flushi(&valid_addr_bitmap_insn[0]); -} - static void __init register_page_bootmem_info(void) { #ifdef CONFIG_NEED_MULTIPLE_NODES @@ -2148,18 +2166,6 @@ static void __init register_page_bootmem_info(void) } void __init mem_init(void) { - unsigned long addr, last; - - addr = PAGE_OFFSET + kern_base; - last = PAGE_ALIGN(kern_size) + addr; - while (addr < last) { - set_bit(__pa(addr) >> ILOG2_4MB, sparc64_valid_addr_bitmap); - addr += PAGE_SIZE; - } - - setup_valid_addr_bitmap_from_pavail(sparc64_valid_addr_bitmap); - patch_tlb_miss_handler_bitmap(); - high_memory = __va(last_valid_pfn << PAGE_SHIFT); register_page_bootmem_info(); @@ -2249,18 +2255,9 @@ unsigned long _PAGE_CACHE __read_mostly; EXPORT_SYMBOL(_PAGE_CACHE); #ifdef CONFIG_SPARSEMEM_VMEMMAP -unsigned long vmemmap_table[VMEMMAP_SIZE]; - -static long __meminitdata addr_start, addr_end; -static int __meminitdata node_start; - int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend, int node) { - unsigned long phys_start = (vstart - VMEMMAP_BASE); - unsigned long phys_end = (vend - VMEMMAP_BASE); - unsigned long addr = phys_start & VMEMMAP_CHUNK_MASK; - unsigned long end = VMEMMAP_ALIGN(phys_end); unsigned long pte_base; pte_base = (_PAGE_VALID | _PAGE_SZ4MB_4U | @@ -2271,47 +2268,52 @@ int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend, _PAGE_CP_4V | _PAGE_CV_4V | _PAGE_P_4V | _PAGE_W_4V); - for (; addr < end; addr += VMEMMAP_CHUNK) { - unsigned long *vmem_pp = - vmemmap_table + (addr >> VMEMMAP_CHUNK_SHIFT); - void *block; + pte_base |= _PAGE_PMD_HUGE; - if (!(*vmem_pp & _PAGE_VALID)) { - block = vmemmap_alloc_block(1UL << ILOG2_4MB, node); - if (!block) + vstart = vstart & PMD_MASK; + vend = ALIGN(vend, PMD_SIZE); + for (; vstart < vend; vstart += PMD_SIZE) { + pgd_t *pgd = pgd_offset_k(vstart); + unsigned long pte; + pud_t *pud; + pmd_t *pmd; + + if (pgd_none(*pgd)) { + pud_t *new = vmemmap_alloc_block(PAGE_SIZE, node); + + if (!new) return -ENOMEM; + pgd_populate(&init_mm, pgd, new); + } - *vmem_pp = pte_base | __pa(block); + pud = pud_offset(pgd, vstart); + if (pud_none(*pud)) { + pmd_t *new = vmemmap_alloc_block(PAGE_SIZE, node); - /* check to see if we have contiguous blocks */ - if (addr_end != addr || node_start != node) { - if (addr_start) - printk(KERN_DEBUG " [%lx-%lx] on node %d\n", - addr_start, addr_end-1, node_start); - addr_start = addr; - node_start = node; - } - addr_end = addr + VMEMMAP_CHUNK; + if (!new) + return -ENOMEM; + pud_populate(&init_mm, pud, new); } - } - return 0; -} -void __meminit vmemmap_populate_print_last(void) -{ - if (addr_start) { - printk(KERN_DEBUG " [%lx-%lx] on node %d\n", - addr_start, addr_end-1, node_start); - addr_start = 0; - addr_end = 0; - node_start = 0; + pmd = pmd_offset(pud, vstart); + + pte = pmd_val(*pmd); + if (!(pte & _PAGE_VALID)) { + void *block = vmemmap_alloc_block(PMD_SIZE, node); + + if (!block) + return -ENOMEM; + + pmd_val(*pmd) = pte_base | __pa(block); + } } + + return 0; } void vmemmap_free(unsigned long start, unsigned long end) { } - #endif /* CONFIG_SPARSEMEM_VMEMMAP */ static void prot_init_common(unsigned long page_none, @@ -2787,8 +2789,8 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end) do_flush_tlb_kernel_range(start, LOW_OBP_ADDRESS); } if (end > HI_OBP_ADDRESS) { - flush_tsb_kernel_range(end, HI_OBP_ADDRESS); - do_flush_tlb_kernel_range(end, HI_OBP_ADDRESS); + flush_tsb_kernel_range(HI_OBP_ADDRESS, end); + do_flush_tlb_kernel_range(HI_OBP_ADDRESS, end); } } else { flush_tsb_kernel_range(start, end); diff --git a/arch/sparc/mm/init_64.h b/arch/sparc/mm/init_64.h index 0668b364f44d..a4c09603b05c 100644 --- a/arch/sparc/mm/init_64.h +++ b/arch/sparc/mm/init_64.h @@ -8,15 +8,8 @@ */ #define MAX_PHYS_ADDRESS (1UL << MAX_PHYS_ADDRESS_BITS) -#define KPTE_BITMAP_CHUNK_SZ (256UL * 1024UL * 1024UL) -#define KPTE_BITMAP_BYTES \ - ((MAX_PHYS_ADDRESS / KPTE_BITMAP_CHUNK_SZ) / 4) -#define VALID_ADDR_BITMAP_CHUNK_SZ (4UL * 1024UL * 1024UL) -#define VALID_ADDR_BITMAP_BYTES \ - ((MAX_PHYS_ADDRESS / VALID_ADDR_BITMAP_CHUNK_SZ) / 8) extern unsigned long kern_linear_pte_xor[4]; -extern unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)]; extern unsigned int sparc64_highest_unlocked_tlb_ent; extern unsigned long sparc64_kern_pri_context; extern unsigned long sparc64_kern_pri_nuc_bits; @@ -38,15 +31,4 @@ extern unsigned long kern_locked_tte_data; void prom_world(int enter); -#ifdef CONFIG_SPARSEMEM_VMEMMAP -#define VMEMMAP_CHUNK_SHIFT 22 -#define VMEMMAP_CHUNK (1UL << VMEMMAP_CHUNK_SHIFT) -#define VMEMMAP_CHUNK_MASK ~(VMEMMAP_CHUNK - 1UL) -#define VMEMMAP_ALIGN(x) (((x)+VMEMMAP_CHUNK-1UL)&VMEMMAP_CHUNK_MASK) - -#define VMEMMAP_SIZE ((((1UL << MAX_PHYSADDR_BITS) >> PAGE_SHIFT) * \ - sizeof(struct page)) >> VMEMMAP_CHUNK_SHIFT) -extern unsigned long vmemmap_table[VMEMMAP_SIZE]; -#endif - #endif /* _SPARC64_MM_INIT_H */ diff --git a/arch/sparc/power/hibernate_asm.S b/arch/sparc/power/hibernate_asm.S index 79942166df84..d7d9017dcb15 100644 --- a/arch/sparc/power/hibernate_asm.S +++ b/arch/sparc/power/hibernate_asm.S @@ -54,8 +54,8 @@ ENTRY(swsusp_arch_resume) nop /* Write PAGE_OFFSET to %g7 */ - sethi %uhi(PAGE_OFFSET), %g7 - sllx %g7, 32, %g7 + sethi %hi(PAGE_OFFSET), %g7 + ldx [%g7 + %lo(PAGE_OFFSET)], %g7 setuw (PAGE_SIZE-8), %g3 diff --git a/arch/sparc/prom/bootstr_64.c b/arch/sparc/prom/bootstr_64.c index ab9ccc63b388..7149e77714a4 100644 --- a/arch/sparc/prom/bootstr_64.c +++ b/arch/sparc/prom/bootstr_64.c @@ -14,7 +14,10 @@ * the .bss section or it will break things. */ -#define BARG_LEN 256 +/* We limit BARG_LEN to 1024 because this is the size of the + * 'barg_out' command line buffer in the SILO bootloader. + */ +#define BARG_LEN 1024 struct { int bootstr_len; int bootstr_valid; diff --git a/arch/sparc/prom/cif.S b/arch/sparc/prom/cif.S index 9c86b4b7d429..8050f381f518 100644 --- a/arch/sparc/prom/cif.S +++ b/arch/sparc/prom/cif.S @@ -11,11 +11,10 @@ .text .globl prom_cif_direct prom_cif_direct: + save %sp, -192, %sp sethi %hi(p1275buf), %o1 or %o1, %lo(p1275buf), %o1 - ldx [%o1 + 0x0010], %o2 ! prom_cif_stack - save %o2, -192, %sp - ldx [%i1 + 0x0008], %l2 ! prom_cif_handler + ldx [%o1 + 0x0008], %l2 ! prom_cif_handler mov %g4, %l0 mov %g5, %l1 mov %g6, %l3 diff --git a/arch/sparc/prom/init_64.c b/arch/sparc/prom/init_64.c index d95db755828f..110b0d78b864 100644 --- a/arch/sparc/prom/init_64.c +++ b/arch/sparc/prom/init_64.c @@ -26,13 +26,13 @@ phandle prom_chosen_node; * It gets passed the pointer to the PROM vector. */ -extern void prom_cif_init(void *, void *); +extern void prom_cif_init(void *); -void __init prom_init(void *cif_handler, void *cif_stack) +void __init prom_init(void *cif_handler) { phandle node; - prom_cif_init(cif_handler, cif_stack); + prom_cif_init(cif_handler); prom_chosen_node = prom_finddevice(prom_chosen_path); if (!prom_chosen_node || (s32)prom_chosen_node == -1) diff --git a/arch/sparc/prom/p1275.c b/arch/sparc/prom/p1275.c index e58b81726319..545d8bb79b65 100644 --- a/arch/sparc/prom/p1275.c +++ b/arch/sparc/prom/p1275.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -19,7 +20,6 @@ struct { long prom_callback; /* 0x00 */ void (*prom_cif_handler)(long *); /* 0x08 */ - unsigned long prom_cif_stack; /* 0x10 */ } p1275buf; extern void prom_world(int); @@ -36,8 +36,8 @@ void p1275_cmd_direct(unsigned long *args) { unsigned long flags; - raw_local_save_flags(flags); - raw_local_irq_restore((unsigned long)PIL_NMI); + local_save_flags(flags); + local_irq_restore((unsigned long)PIL_NMI); raw_spin_lock(&prom_entry_lock); prom_world(1); @@ -45,11 +45,10 @@ void p1275_cmd_direct(unsigned long *args) prom_world(0); raw_spin_unlock(&prom_entry_lock); - raw_local_irq_restore(flags); + local_irq_restore(flags); } void prom_cif_init(void *cif_handler, void *cif_stack) { p1275buf.prom_cif_handler = (void (*)(long *))cif_handler; - p1275buf.prom_cif_stack = (unsigned long)cif_stack; } diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 7c492ed9087b..92d3486a6196 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -481,6 +481,7 @@ struct kvm_vcpu_arch { u64 mmio_gva; unsigned access; gfn_t mmio_gfn; + u64 mmio_gen; struct kvm_pmu pmu; diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 74e804ddc5c7..50ce7519ccef 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -144,6 +144,21 @@ static void early_init_intel(struct cpuinfo_x86 *c) setup_clear_cpu_cap(X86_FEATURE_ERMS); } } + + /* + * Intel Quark Core DevMan_001.pdf section 6.4.11 + * "The operating system also is required to invalidate (i.e., flush) + * the TLB when any changes are made to any of the page table entries. + * The operating system must reload CR3 to cause the TLB to be flushed" + * + * As a result cpu_has_pge() in arch/x86/include/asm/tlbflush.h should + * be false so that __flush_tlb_all() causes CR3 insted of CR4.PGE + * to be modified + */ + if (c->x86 == 5 && c->x86_model == 9) { + pr_info("Disabling PGE capability bit\n"); + setup_clear_cpu_cap(X86_FEATURE_PGE); + } } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 931467881da7..1cd2a5fbde07 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -199,16 +199,20 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask) EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask); /* - * spte bits of bit 3 ~ bit 11 are used as low 9 bits of generation number, - * the bits of bits 52 ~ bit 61 are used as high 10 bits of generation - * number. + * the low bit of the generation number is always presumed to be zero. + * This disables mmio caching during memslot updates. The concept is + * similar to a seqcount but instead of retrying the access we just punt + * and ignore the cache. + * + * spte bits 3-11 are used as bits 1-9 of the generation number, + * the bits 52-61 are used as bits 10-19 of the generation number. */ -#define MMIO_SPTE_GEN_LOW_SHIFT 3 +#define MMIO_SPTE_GEN_LOW_SHIFT 2 #define MMIO_SPTE_GEN_HIGH_SHIFT 52 -#define MMIO_GEN_SHIFT 19 -#define MMIO_GEN_LOW_SHIFT 9 -#define MMIO_GEN_LOW_MASK ((1 << MMIO_GEN_LOW_SHIFT) - 1) +#define MMIO_GEN_SHIFT 20 +#define MMIO_GEN_LOW_SHIFT 10 +#define MMIO_GEN_LOW_MASK ((1 << MMIO_GEN_LOW_SHIFT) - 2) #define MMIO_GEN_MASK ((1 << MMIO_GEN_SHIFT) - 1) #define MMIO_MAX_GEN ((1 << MMIO_GEN_SHIFT) - 1) @@ -236,12 +240,7 @@ static unsigned int get_mmio_spte_generation(u64 spte) static unsigned int kvm_current_mmio_generation(struct kvm *kvm) { - /* - * Init kvm generation close to MMIO_MAX_GEN to easily test the - * code of handling generation number wrap-around. - */ - return (kvm_memslots(kvm)->generation + - MMIO_MAX_GEN - 150) & MMIO_GEN_MASK; + return kvm_memslots(kvm)->generation & MMIO_GEN_MASK; } static void mark_mmio_spte(struct kvm *kvm, u64 *sptep, u64 gfn, @@ -3163,7 +3162,7 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu) if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) return; - vcpu_clear_mmio_info(vcpu, ~0ul); + vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY); kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC); if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) { hpa_t root = vcpu->arch.mmu.root_hpa; @@ -4433,7 +4432,7 @@ void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm) * The very rare case: if the generation-number is round, * zap all shadow pages. */ - if (unlikely(kvm_current_mmio_generation(kvm) >= MMIO_MAX_GEN)) { + if (unlikely(kvm_current_mmio_generation(kvm) == 0)) { printk_ratelimited(KERN_INFO "kvm: zapping shadow pages for mmio generation wraparound\n"); kvm_mmu_invalidate_zap_all_pages(kvm); } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index bfe11cf124a1..6a118fa378b5 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -453,6 +453,7 @@ struct vcpu_vmx { int gs_ldt_reload_needed; int fs_reload_needed; u64 msr_host_bndcfgs; + unsigned long vmcs_host_cr4; /* May not match real cr4 */ } host_state; struct { int vm86_active; @@ -4235,11 +4236,16 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx) u32 low32, high32; unsigned long tmpl; struct desc_ptr dt; + unsigned long cr4; vmcs_writel(HOST_CR0, read_cr0() & ~X86_CR0_TS); /* 22.2.3 */ - vmcs_writel(HOST_CR4, read_cr4()); /* 22.2.3, 22.2.5 */ vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */ + /* Save the most likely value for this task's CR4 in the VMCS. */ + cr4 = read_cr4(); + vmcs_writel(HOST_CR4, cr4); /* 22.2.3, 22.2.5 */ + vmx->host_state.vmcs_host_cr4 = cr4; + vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */ #ifdef CONFIG_X86_64 /* @@ -7376,7 +7382,7 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx) static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned long debugctlmsr; + unsigned long debugctlmsr, cr4; /* Record the guest's net vcpu time for enforced NMI injections. */ if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) @@ -7397,6 +7403,12 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty)) vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); + cr4 = read_cr4(); + if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) { + vmcs_writel(HOST_CR4, cr4); + vmx->host_state.vmcs_host_cr4 = cr4; + } + /* When single-stepping over STI and MOV SS, we must clear the * corresponding interruptibility bits in the guest state. Otherwise * vmentry fails as it then expects bit 14 (BS) in pending debug diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 306a1b77581f..985fb2c006fa 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -88,15 +88,23 @@ static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu, vcpu->arch.mmio_gva = gva & PAGE_MASK; vcpu->arch.access = access; vcpu->arch.mmio_gfn = gfn; + vcpu->arch.mmio_gen = kvm_memslots(vcpu->kvm)->generation; +} + +static inline bool vcpu_match_mmio_gen(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.mmio_gen == kvm_memslots(vcpu->kvm)->generation; } /* - * Clear the mmio cache info for the given gva, - * specially, if gva is ~0ul, we clear all mmio cache info. + * Clear the mmio cache info for the given gva. If gva is MMIO_GVA_ANY, we + * clear all mmio cache info. */ +#define MMIO_GVA_ANY (~(gva_t)0) + static inline void vcpu_clear_mmio_info(struct kvm_vcpu *vcpu, gva_t gva) { - if (gva != (~0ul) && vcpu->arch.mmio_gva != (gva & PAGE_MASK)) + if (gva != MMIO_GVA_ANY && vcpu->arch.mmio_gva != (gva & PAGE_MASK)) return; vcpu->arch.mmio_gva = 0; @@ -104,7 +112,8 @@ static inline void vcpu_clear_mmio_info(struct kvm_vcpu *vcpu, gva_t gva) static inline bool vcpu_match_mmio_gva(struct kvm_vcpu *vcpu, unsigned long gva) { - if (vcpu->arch.mmio_gva && vcpu->arch.mmio_gva == (gva & PAGE_MASK)) + if (vcpu_match_mmio_gen(vcpu) && vcpu->arch.mmio_gva && + vcpu->arch.mmio_gva == (gva & PAGE_MASK)) return true; return false; @@ -112,7 +121,8 @@ static inline bool vcpu_match_mmio_gva(struct kvm_vcpu *vcpu, unsigned long gva) static inline bool vcpu_match_mmio_gpa(struct kvm_vcpu *vcpu, gpa_t gpa) { - if (vcpu->arch.mmio_gfn && vcpu->arch.mmio_gfn == gpa >> PAGE_SHIFT) + if (vcpu_match_mmio_gen(vcpu) && vcpu->arch.mmio_gfn && + vcpu->arch.mmio_gfn == gpa >> PAGE_SHIFT) return true; return false; diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c index 3c562f5a60bb..e1bce26cd4f9 100644 --- a/crypto/async_tx/async_xor.c +++ b/crypto/async_tx/async_xor.c @@ -78,8 +78,6 @@ do_async_xor(struct dma_chan *chan, struct dmaengine_unmap_data *unmap, tx = dma->device_prep_dma_xor(chan, dma_dest, src_list, xor_src_cnt, unmap->len, dma_flags); - src_list[0] = tmp; - if (unlikely(!tx)) async_tx_quiesce(&submit->depend_tx); @@ -92,6 +90,7 @@ do_async_xor(struct dma_chan *chan, struct dmaengine_unmap_data *unmap, xor_src_cnt, unmap->len, dma_flags); } + src_list[0] = tmp; dma_set_unmap(tx, unmap); async_tx_submit(chan, tx, submit); diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c index bf424305f3dc..3d785ebb48d3 100644 --- a/drivers/base/firmware_class.c +++ b/drivers/base/firmware_class.c @@ -1105,6 +1105,9 @@ _request_firmware(const struct firmware **firmware_p, const char *name, if (!firmware_p) return -EINVAL; + if (!name || name[0] == '\0') + return -EINVAL; + ret = _request_firmware_prepare(&fw, name, device); if (ret <= 0) /* error or already assigned */ goto out; diff --git a/drivers/base/regmap/regmap-debugfs.c b/drivers/base/regmap/regmap-debugfs.c index 0c94b661c16f..5799a0b9e6cc 100644 --- a/drivers/base/regmap/regmap-debugfs.c +++ b/drivers/base/regmap/regmap-debugfs.c @@ -473,6 +473,7 @@ void regmap_debugfs_init(struct regmap *map, const char *name) { struct rb_node *next; struct regmap_range_node *range_node; + const char *devname = "dummy"; /* If we don't have the debugfs root yet, postpone init */ if (!regmap_debugfs_root) { @@ -491,12 +492,15 @@ void regmap_debugfs_init(struct regmap *map, const char *name) INIT_LIST_HEAD(&map->debugfs_off_cache); mutex_init(&map->cache_lock); + if (map->dev) + devname = dev_name(map->dev); + if (name) { map->debugfs_name = kasprintf(GFP_KERNEL, "%s-%s", - dev_name(map->dev), name); + devname, name); name = map->debugfs_name; } else { - name = dev_name(map->dev); + name = devname; } map->debugfs = debugfs_create_dir(name, regmap_debugfs_root); diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 1cf427bc0d4a..3a785a4f4ff6 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -1408,7 +1408,7 @@ int _regmap_write(struct regmap *map, unsigned int reg, } #ifdef LOG_DEVICE - if (strcmp(dev_name(map->dev), LOG_DEVICE) == 0) + if (map->dev && strcmp(dev_name(map->dev), LOG_DEVICE) == 0) dev_info(map->dev, "%x <= %x\n", reg, val); #endif @@ -1659,6 +1659,9 @@ out: } else { void *wval; + if (!val_count) + return -EINVAL; + wval = kmemdup(val, val_count * val_bytes, GFP_KERNEL); if (!wval) { dev_err(map->dev, "Error in memory allocation\n"); @@ -2058,7 +2061,7 @@ static int _regmap_read(struct regmap *map, unsigned int reg, ret = map->reg_read(context, reg, val); if (ret == 0) { #ifdef LOG_DEVICE - if (strcmp(dev_name(map->dev), LOG_DEVICE) == 0) + if (map->dev && strcmp(dev_name(map->dev), LOG_DEVICE) == 0) dev_info(map->dev, "%x => %x\n", reg, *val); #endif diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 292c38e8aa17..f0ea79064d4f 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -330,6 +330,9 @@ static void btusb_intr_complete(struct urb *urb) BT_ERR("%s corrupted event packet", hdev->name); hdev->stat.err_rx++; } + } else if (urb->status == -ENOENT) { + /* Avoid suspend failed when usb_kill_urb */ + return; } if (!test_bit(BTUSB_INTR_RUNNING, &data->flags)) @@ -418,6 +421,9 @@ static void btusb_bulk_complete(struct urb *urb) BT_ERR("%s corrupted ACL packet", hdev->name); hdev->stat.err_rx++; } + } else if (urb->status == -ENOENT) { + /* Avoid suspend failed when usb_kill_urb */ + return; } if (!test_bit(BTUSB_BULK_RUNNING, &data->flags)) @@ -512,6 +518,9 @@ static void btusb_isoc_complete(struct urb *urb) hdev->stat.err_rx++; } } + } else if (urb->status == -ENOENT) { + /* Avoid suspend failed when usb_kill_urb */ + return; } if (!test_bit(BTUSB_ISOC_RUNNING, &data->flags)) diff --git a/drivers/bluetooth/hci_h5.c b/drivers/bluetooth/hci_h5.c index caacb422995d..a22838669b4e 100644 --- a/drivers/bluetooth/hci_h5.c +++ b/drivers/bluetooth/hci_h5.c @@ -237,7 +237,7 @@ static void h5_pkt_cull(struct h5 *h5) break; to_remove--; - seq = (seq - 1) % 8; + seq = (seq - 1) & 0x07; } if (seq != h5->rx_ack) diff --git a/drivers/clk/qcom/gcc-ipq806x.c b/drivers/clk/qcom/gcc-ipq806x.c index 3b83b7dd78c7..5cd62a709ac7 100644 --- a/drivers/clk/qcom/gcc-ipq806x.c +++ b/drivers/clk/qcom/gcc-ipq806x.c @@ -32,6 +32,33 @@ #include "clk-branch.h" #include "reset.h" +static struct clk_pll pll0 = { + .l_reg = 0x30c4, + .m_reg = 0x30c8, + .n_reg = 0x30cc, + .config_reg = 0x30d4, + .mode_reg = 0x30c0, + .status_reg = 0x30d8, + .status_bit = 16, + .clkr.hw.init = &(struct clk_init_data){ + .name = "pll0", + .parent_names = (const char *[]){ "pxo" }, + .num_parents = 1, + .ops = &clk_pll_ops, + }, +}; + +static struct clk_regmap pll0_vote = { + .enable_reg = 0x34c0, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "pll0_vote", + .parent_names = (const char *[]){ "pll0" }, + .num_parents = 1, + .ops = &clk_pll_vote_ops, + }, +}; + static struct clk_pll pll3 = { .l_reg = 0x3164, .m_reg = 0x3168, @@ -154,7 +181,7 @@ static const u8 gcc_pxo_pll8_pll0[] = { static const char *gcc_pxo_pll8_pll0_map[] = { "pxo", "pll8_vote", - "pll0", + "pll0_vote", }; static struct freq_tbl clk_tbl_gsbi_uart[] = { @@ -2133,6 +2160,8 @@ static struct clk_branch usb_fs1_h_clk = { }; static struct clk_regmap *gcc_ipq806x_clks[] = { + [PLL0] = &pll0.clkr, + [PLL0_VOTE] = &pll0_vote, [PLL3] = &pll3.clkr, [PLL8] = &pll8.clkr, [PLL8_VOTE] = &pll8_vote, diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index d5149aacd2fe..026484ade10d 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -2755,8 +2755,10 @@ probe_err3: list_del(&pch->chan.device_node); /* Flush the channel */ - pl330_control(&pch->chan, DMA_TERMINATE_ALL, 0); - pl330_free_chan_resources(&pch->chan); + if (pch->thread) { + pl330_control(&pch->chan, DMA_TERMINATE_ALL, 0); + pl330_free_chan_resources(&pch->chan); + } } probe_err2: pl330_del(pl330); @@ -2782,8 +2784,10 @@ static int pl330_remove(struct amba_device *adev) list_del(&pch->chan.device_node); /* Flush the channel */ - pl330_control(&pch->chan, DMA_TERMINATE_ALL, 0); - pl330_free_chan_resources(&pch->chan); + if (pch->thread) { + pl330_control(&pch->chan, DMA_TERMINATE_ALL, 0); + pl330_free_chan_resources(&pch->chan); + } } pl330_del(pl330); diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c index f4aec2e6ef56..7d3742edbaa2 100644 --- a/drivers/edac/mpc85xx_edac.c +++ b/drivers/edac/mpc85xx_edac.c @@ -633,7 +633,7 @@ static int mpc85xx_l2_err_probe(struct platform_device *op) if (edac_op_state == EDAC_OPSTATE_INT) { pdata->irq = irq_of_parse_and_map(op->dev.of_node, 0); res = devm_request_irq(&op->dev, pdata->irq, - mpc85xx_l2_isr, 0, + mpc85xx_l2_isr, IRQF_SHARED, "[EDAC] L2 err", edac_dev); if (res < 0) { printk(KERN_ERR diff --git a/drivers/hid/hid-rmi.c b/drivers/hid/hid-rmi.c index 8389e8109218..3cccff73b9b9 100644 --- a/drivers/hid/hid-rmi.c +++ b/drivers/hid/hid-rmi.c @@ -320,10 +320,7 @@ static int rmi_f11_input_event(struct hid_device *hdev, u8 irq, u8 *data, int offset; int i; - if (size < hdata->f11.report_size) - return 0; - - if (!(irq & hdata->f11.irq_mask)) + if (!(irq & hdata->f11.irq_mask) || size <= 0) return 0; offset = (hdata->max_fingers >> 2) + 1; @@ -332,9 +329,19 @@ static int rmi_f11_input_event(struct hid_device *hdev, u8 irq, u8 *data, int fs_bit_position = (i & 0x3) << 1; int finger_state = (data[fs_byte_position] >> fs_bit_position) & 0x03; + int position = offset + 5 * i; + + if (position + 5 > size) { + /* partial report, go on with what we received */ + printk_once(KERN_WARNING + "%s %s: Detected incomplete finger report. Finger reports may occasionally get dropped on this platform.\n", + dev_driver_string(&hdev->dev), + dev_name(&hdev->dev)); + hid_dbg(hdev, "Incomplete finger report\n"); + break; + } - rmi_f11_process_touch(hdata, i, finger_state, - &data[offset + 5 * i]); + rmi_f11_process_touch(hdata, i, finger_state, &data[position]); } input_mt_sync_frame(hdata->input); input_sync(hdata->input); @@ -352,6 +359,11 @@ static int rmi_f30_input_event(struct hid_device *hdev, u8 irq, u8 *data, if (!(irq & hdata->f30.irq_mask)) return 0; + if (size < (int)hdata->f30.report_size) { + hid_warn(hdev, "Click Button pressed, but the click data is missing\n"); + return 0; + } + for (i = 0; i < hdata->gpio_led_count; i++) { if (test_bit(i, &hdata->button_mask)) { value = (data[i / 8] >> (i & 0x07)) & BIT(0); @@ -412,9 +424,29 @@ static int rmi_read_data_event(struct hid_device *hdev, u8 *data, int size) return 1; } +static int rmi_check_sanity(struct hid_device *hdev, u8 *data, int size) +{ + int valid_size = size; + /* + * On the Dell XPS 13 9333, the bus sometimes get confused and fills + * the report with a sentinel value "ff". Synaptics told us that such + * behavior does not comes from the touchpad itself, so we filter out + * such reports here. + */ + + while ((data[valid_size - 1] == 0xff) && valid_size > 0) + valid_size--; + + return valid_size; +} + static int rmi_raw_event(struct hid_device *hdev, struct hid_report *report, u8 *data, int size) { + size = rmi_check_sanity(hdev, data, size); + if (size < 2) + return 0; + switch (data[0]) { case RMI_READ_DATA_REPORT_ID: return rmi_read_data_event(hdev, data, size); diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c index f0db7eca9023..129fd330dd27 100644 --- a/drivers/hid/wacom_sys.c +++ b/drivers/hid/wacom_sys.c @@ -23,13 +23,13 @@ #define WAC_CMD_ICON_BT_XFER 0x26 #define WAC_CMD_RETRIES 10 -static int wacom_get_report(struct hid_device *hdev, u8 type, u8 id, - void *buf, size_t size, unsigned int retries) +static int wacom_get_report(struct hid_device *hdev, u8 type, u8 *buf, + size_t size, unsigned int retries) { int retval; do { - retval = hid_hw_raw_request(hdev, id, buf, size, type, + retval = hid_hw_raw_request(hdev, buf[0], buf, size, type, HID_REQ_GET_REPORT); } while ((retval == -ETIMEDOUT || retval == -EPIPE) && --retries); @@ -106,12 +106,24 @@ static void wacom_feature_mapping(struct hid_device *hdev, { struct wacom *wacom = hid_get_drvdata(hdev); struct wacom_features *features = &wacom->wacom_wac.features; + u8 *data; + int ret; switch (usage->hid) { case HID_DG_CONTACTMAX: /* leave touch_max as is if predefined */ - if (!features->touch_max) - features->touch_max = field->value[0]; + if (!features->touch_max) { + /* read manually */ + data = kzalloc(2, GFP_KERNEL); + if (!data) + break; + data[0] = field->report->id; + ret = wacom_get_report(hdev, HID_FEATURE_REPORT, + data, 2, 0); + if (ret == 2) + features->touch_max = data[1]; + kfree(data); + } break; } } @@ -255,7 +267,7 @@ static int wacom_set_device_mode(struct hid_device *hdev, int report_id, length, 1); if (error >= 0) error = wacom_get_report(hdev, HID_FEATURE_REPORT, - report_id, rep_data, length, 1); + rep_data, length, 1); } while ((error < 0 || rep_data[1] != mode) && limit++ < WAC_MSG_RETRIES); kfree(rep_data); @@ -1245,6 +1257,8 @@ static int wacom_probe(struct hid_device *hdev, if (!id->driver_data) return -EINVAL; + hdev->quirks |= HID_QUIRK_NO_INIT_REPORTS; + wacom = kzalloc(sizeof(struct wacom), GFP_KERNEL); if (!wacom) return -ENOMEM; diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index 531a593912ec..19bad59073e6 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -165,8 +165,10 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size, ret = vmbus_post_msg(open_msg, sizeof(struct vmbus_channel_open_channel)); - if (ret != 0) + if (ret != 0) { + err = ret; goto error1; + } t = wait_for_completion_timeout(&open_info->waitevent, 5*HZ); if (t == 0) { @@ -363,7 +365,6 @@ int vmbus_establish_gpadl(struct vmbus_channel *channel, void *kbuffer, u32 next_gpadl_handle; unsigned long flags; int ret = 0; - int t; next_gpadl_handle = atomic_read(&vmbus_connection.next_gpadl_handle); atomic_inc(&vmbus_connection.next_gpadl_handle); @@ -410,9 +411,7 @@ int vmbus_establish_gpadl(struct vmbus_channel *channel, void *kbuffer, } } - t = wait_for_completion_timeout(&msginfo->waitevent, 5*HZ); - BUG_ON(t == 0); - + wait_for_completion(&msginfo->waitevent); /* At this point, we received the gpadl created msg */ *gpadl_handle = gpadlmsg->gpadl; @@ -435,7 +434,7 @@ int vmbus_teardown_gpadl(struct vmbus_channel *channel, u32 gpadl_handle) struct vmbus_channel_gpadl_teardown *msg; struct vmbus_channel_msginfo *info; unsigned long flags; - int ret, t; + int ret; info = kmalloc(sizeof(*info) + sizeof(struct vmbus_channel_gpadl_teardown), GFP_KERNEL); @@ -457,11 +456,12 @@ int vmbus_teardown_gpadl(struct vmbus_channel *channel, u32 gpadl_handle) ret = vmbus_post_msg(msg, sizeof(struct vmbus_channel_gpadl_teardown)); - BUG_ON(ret != 0); - t = wait_for_completion_timeout(&info->waitevent, 5*HZ); - BUG_ON(t == 0); + if (ret) + goto post_msg_err; + + wait_for_completion(&info->waitevent); - /* Received a torndown response */ +post_msg_err: spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); list_del(&info->msglistentry); spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); @@ -478,7 +478,7 @@ static void reset_channel_cb(void *arg) channel->onchannel_callback = NULL; } -static void vmbus_close_internal(struct vmbus_channel *channel) +static int vmbus_close_internal(struct vmbus_channel *channel) { struct vmbus_channel_close_channel *msg; int ret; @@ -501,11 +501,28 @@ static void vmbus_close_internal(struct vmbus_channel *channel) ret = vmbus_post_msg(msg, sizeof(struct vmbus_channel_close_channel)); - BUG_ON(ret != 0); + if (ret) { + pr_err("Close failed: close post msg return is %d\n", ret); + /* + * If we failed to post the close msg, + * it is perhaps better to leak memory. + */ + return ret; + } + /* Tear down the gpadl for the channel's ring buffer */ - if (channel->ringbuffer_gpadlhandle) - vmbus_teardown_gpadl(channel, - channel->ringbuffer_gpadlhandle); + if (channel->ringbuffer_gpadlhandle) { + ret = vmbus_teardown_gpadl(channel, + channel->ringbuffer_gpadlhandle); + if (ret) { + pr_err("Close failed: teardown gpadl return %d\n", ret); + /* + * If we failed to teardown gpadl, + * it is perhaps better to leak memory. + */ + return ret; + } + } /* Cleanup the ring buffers for this channel */ hv_ringbuffer_cleanup(&channel->outbound); @@ -514,7 +531,7 @@ static void vmbus_close_internal(struct vmbus_channel *channel) free_pages((unsigned long)channel->ringbuffer_pages, get_order(channel->ringbuffer_pagecount * PAGE_SIZE)); - + return ret; } /* diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c index ae22e3c1fc4c..e206619b946e 100644 --- a/drivers/hv/connection.c +++ b/drivers/hv/connection.c @@ -427,10 +427,21 @@ int vmbus_post_msg(void *buffer, size_t buflen) * insufficient resources. Retry the operation a couple of * times before giving up. */ - while (retries < 3) { - ret = hv_post_message(conn_id, 1, buffer, buflen); - if (ret != HV_STATUS_INSUFFICIENT_BUFFERS) + while (retries < 10) { + ret = hv_post_message(conn_id, 1, buffer, buflen); + + switch (ret) { + case HV_STATUS_INSUFFICIENT_BUFFERS: + ret = -ENOMEM; + case -ENOMEM: + break; + case HV_STATUS_SUCCESS: return ret; + default: + pr_err("hv_post_msg() failed; error code:%d\n", ret); + return -EINVAL; + } + retries++; msleep(100); } diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index edfc8488cb03..3e4235c7a47f 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -138,6 +138,8 @@ int hv_init(void) memset(hv_context.synic_event_page, 0, sizeof(void *) * NR_CPUS); memset(hv_context.synic_message_page, 0, sizeof(void *) * NR_CPUS); + memset(hv_context.post_msg_page, 0, + sizeof(void *) * NR_CPUS); memset(hv_context.vp_index, 0, sizeof(int) * NR_CPUS); memset(hv_context.event_dpc, 0, @@ -217,26 +219,18 @@ int hv_post_message(union hv_connection_id connection_id, enum hv_message_type message_type, void *payload, size_t payload_size) { - struct aligned_input { - u64 alignment8; - struct hv_input_post_message msg; - }; struct hv_input_post_message *aligned_msg; u16 status; - unsigned long addr; if (payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT) return -EMSGSIZE; - addr = (unsigned long)kmalloc(sizeof(struct aligned_input), GFP_ATOMIC); - if (!addr) - return -ENOMEM; - aligned_msg = (struct hv_input_post_message *) - (ALIGN(addr, HV_HYPERCALL_PARAM_ALIGN)); + hv_context.post_msg_page[get_cpu()]; aligned_msg->connectionid = connection_id; + aligned_msg->reserved = 0; aligned_msg->message_type = message_type; aligned_msg->payload_size = payload_size; memcpy((void *)aligned_msg->payload, payload, payload_size); @@ -244,8 +238,7 @@ int hv_post_message(union hv_connection_id connection_id, status = do_hypercall(HVCALL_POST_MESSAGE, aligned_msg, NULL) & 0xFFFF; - kfree((void *)addr); - + put_cpu(); return status; } @@ -294,6 +287,14 @@ int hv_synic_alloc(void) pr_err("Unable to allocate SYNIC event page\n"); goto err; } + + hv_context.post_msg_page[cpu] = + (void *)get_zeroed_page(GFP_ATOMIC); + + if (hv_context.post_msg_page[cpu] == NULL) { + pr_err("Unable to allocate post msg page\n"); + goto err; + } } return 0; @@ -308,6 +309,8 @@ static void hv_synic_free_cpu(int cpu) free_page((unsigned long)hv_context.synic_event_page[cpu]); if (hv_context.synic_message_page[cpu]) free_page((unsigned long)hv_context.synic_message_page[cpu]); + if (hv_context.post_msg_page[cpu]) + free_page((unsigned long)hv_context.post_msg_page[cpu]); } void hv_synic_free(void) diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 22b750749a39..c386d8dc7223 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -515,6 +515,10 @@ struct hv_context { * per-cpu list of the channels based on their CPU affinity. */ struct list_head percpu_list[NR_CPUS]; + /* + * buffer to post messages to the host. + */ + void *post_msg_page[NR_CPUS]; }; extern struct hv_context hv_context; diff --git a/drivers/message/fusion/mptspi.c b/drivers/message/fusion/mptspi.c index 787933d43d32..613231c16194 100644 --- a/drivers/message/fusion/mptspi.c +++ b/drivers/message/fusion/mptspi.c @@ -1419,6 +1419,11 @@ mptspi_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto out_mptspi_probe; } + /* VMWare emulation doesn't properly implement WRITE_SAME + */ + if (pdev->subsystem_vendor == 0x15AD) + sh->no_write_same = 1; + spin_lock_irqsave(&ioc->FreeQlock, flags); /* Attach the SCSI Host to the IOC structure diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c index 0e993ef28b94..8fd9466266b6 100644 --- a/drivers/misc/mei/bus.c +++ b/drivers/misc/mei/bus.c @@ -70,7 +70,7 @@ static int mei_cl_device_probe(struct device *dev) dev_dbg(dev, "Device probe\n"); - strncpy(id.name, dev_name(dev), sizeof(id.name)); + strlcpy(id.name, dev_name(dev), sizeof(id.name)); return driver->probe(device, &id); } diff --git a/drivers/net/wireless/ath/ath9k/ar5008_phy.c b/drivers/net/wireless/ath/ath9k/ar5008_phy.c index 00fb8badbacc..3b3e91057a4c 100644 --- a/drivers/net/wireless/ath/ath9k/ar5008_phy.c +++ b/drivers/net/wireless/ath/ath9k/ar5008_phy.c @@ -1004,9 +1004,11 @@ static bool ar5008_hw_ani_control_new(struct ath_hw *ah, case ATH9K_ANI_FIRSTEP_LEVEL:{ u32 level = param; - value = level; + value = level * 2; REG_RMW_FIELD(ah, AR_PHY_FIND_SIG, AR_PHY_FIND_SIG_FIRSTEP, value); + REG_RMW_FIELD(ah, AR_PHY_FIND_SIG_LOW, + AR_PHY_FIND_SIG_FIRSTEP_LOW, value); if (level != aniState->firstepLevel) { ath_dbg(common, ANI, diff --git a/drivers/net/wireless/iwlwifi/mvm/constants.h b/drivers/net/wireless/iwlwifi/mvm/constants.h index ca79f7160573..72da88d879c7 100644 --- a/drivers/net/wireless/iwlwifi/mvm/constants.h +++ b/drivers/net/wireless/iwlwifi/mvm/constants.h @@ -82,7 +82,7 @@ #define IWL_MVM_BT_COEX_EN_RED_TXP_THRESH 62 #define IWL_MVM_BT_COEX_DIS_RED_TXP_THRESH 65 #define IWL_MVM_BT_COEX_SYNC2SCO 1 -#define IWL_MVM_BT_COEX_CORUNNING 1 +#define IWL_MVM_BT_COEX_CORUNNING 0 #define IWL_MVM_BT_COEX_MPLUT 1 #endif /* __MVM_CONSTANTS_H */ diff --git a/drivers/net/wireless/iwlwifi/pcie/drv.c b/drivers/net/wireless/iwlwifi/pcie/drv.c index 073a68b97a72..bc6a5db283f0 100644 --- a/drivers/net/wireless/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/iwlwifi/pcie/drv.c @@ -273,6 +273,8 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x08B1, 0x4070, iwl7260_2ac_cfg)}, {IWL_PCI_DEVICE(0x08B1, 0x4072, iwl7260_2ac_cfg)}, {IWL_PCI_DEVICE(0x08B1, 0x4170, iwl7260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x08B1, 0x4C60, iwl7260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x08B1, 0x4C70, iwl7260_2ac_cfg)}, {IWL_PCI_DEVICE(0x08B1, 0x4060, iwl7260_2n_cfg)}, {IWL_PCI_DEVICE(0x08B1, 0x406A, iwl7260_2n_cfg)}, {IWL_PCI_DEVICE(0x08B1, 0x4160, iwl7260_2n_cfg)}, @@ -316,6 +318,8 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x08B1, 0xC770, iwl7260_2ac_cfg)}, {IWL_PCI_DEVICE(0x08B1, 0xC760, iwl7260_2n_cfg)}, {IWL_PCI_DEVICE(0x08B2, 0xC270, iwl7260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x08B1, 0xCC70, iwl7260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x08B1, 0xCC60, iwl7260_2ac_cfg)}, {IWL_PCI_DEVICE(0x08B2, 0xC272, iwl7260_2ac_cfg)}, {IWL_PCI_DEVICE(0x08B2, 0xC260, iwl7260_2n_cfg)}, {IWL_PCI_DEVICE(0x08B2, 0xC26A, iwl7260_n_cfg)}, diff --git a/drivers/net/wireless/rt2x00/rt2800.h b/drivers/net/wireless/rt2x00/rt2800.h index a394a9a95919..7cf6081a05a1 100644 --- a/drivers/net/wireless/rt2x00/rt2800.h +++ b/drivers/net/wireless/rt2x00/rt2800.h @@ -2039,7 +2039,7 @@ struct mac_iveiv_entry { * 2 - drop tx power by 12dBm, * 3 - increase tx power by 6dBm */ -#define BBP1_TX_POWER_CTRL FIELD8(0x07) +#define BBP1_TX_POWER_CTRL FIELD8(0x03) #define BBP1_TX_ANTENNA FIELD8(0x18) /* diff --git a/drivers/pci/host/pci-mvebu.c b/drivers/pci/host/pci-mvebu.c index a8c6f1a92e0f..b1315e197ffb 100644 --- a/drivers/pci/host/pci-mvebu.c +++ b/drivers/pci/host/pci-mvebu.c @@ -873,7 +873,7 @@ static int mvebu_get_tgt_attr(struct device_node *np, int devfn, rangesz = pna + na + ns; nranges = rlen / sizeof(__be32) / rangesz; - for (i = 0; i < nranges; i++) { + for (i = 0; i < nranges; i++, range += rangesz) { u32 flags = of_read_number(range, 1); u32 slot = of_read_number(range + 1, 1); u64 cpuaddr = of_read_number(range + na, pna); @@ -883,14 +883,14 @@ static int mvebu_get_tgt_attr(struct device_node *np, int devfn, rtype = IORESOURCE_IO; else if (DT_FLAGS_TO_TYPE(flags) == DT_TYPE_MEM32) rtype = IORESOURCE_MEM; + else + continue; if (slot == PCI_SLOT(devfn) && type == rtype) { *tgt = DT_CPUADDR_TO_TARGET(cpuaddr); *attr = DT_CPUADDR_TO_ATTR(cpuaddr); return 0; } - - range += rangesz; } return -ENOENT; diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 9ff0a901ecf7..76ef7914c9aa 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -177,7 +177,7 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, { struct pci_dev *pci_dev = to_pci_dev(dev); - return sprintf(buf, "pci:v%08Xd%08Xsv%08Xsd%08Xbc%02Xsc%02Xi%02x\n", + return sprintf(buf, "pci:v%08Xd%08Xsv%08Xsd%08Xbc%02Xsc%02Xi%02X\n", pci_dev->vendor, pci_dev->device, pci_dev->subsystem_vendor, pci_dev->subsystem_device, (u8)(pci_dev->class >> 16), (u8)(pci_dev->class >> 8), diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 80c2d014283d..feaa5c23e991 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -24,6 +24,7 @@ #include #include #include +#include #include /* isa_dma_bridge_buggy */ #include "pci.h" @@ -287,6 +288,25 @@ static void quirk_citrine(struct pci_dev *dev) } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CITRINE, quirk_citrine); +/* On IBM Crocodile ipr SAS adapters, expand BAR to system page size */ +static void quirk_extend_bar_to_page(struct pci_dev *dev) +{ + int i; + + for (i = 0; i < PCI_STD_RESOURCE_END; i++) { + struct resource *r = &dev->resource[i]; + + if (r->flags & IORESOURCE_MEM && resource_size(r) < PAGE_SIZE) { + r->end = PAGE_SIZE - 1; + r->start = 0; + r->flags |= IORESOURCE_UNSET; + dev_info(&dev->dev, "expanded BAR %d to page size: %pR\n", + i, r); + } + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_IBM, 0x034a, quirk_extend_bar_to_page); + /* * S3 868 and 968 chips report region size equal to 32M, but they decode 64M. * If it's needed, re-allocate the region. diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 6373985ad3f7..0482235eee92 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -1652,7 +1652,7 @@ void pci_assign_unassigned_bridge_resources(struct pci_dev *bridge) struct pci_dev_resource *fail_res; int retval; unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM | - IORESOURCE_PREFETCH; + IORESOURCE_PREFETCH | IORESOURCE_MEM_64; again: __pci_bus_size_bridges(parent, &add_list); diff --git a/drivers/regulator/ltc3589.c b/drivers/regulator/ltc3589.c index c756955bfcc5..0ce8e4e0fa73 100644 --- a/drivers/regulator/ltc3589.c +++ b/drivers/regulator/ltc3589.c @@ -372,6 +372,7 @@ static bool ltc3589_volatile_reg(struct device *dev, unsigned int reg) switch (reg) { case LTC3589_IRQSTAT: case LTC3589_PGSTAT: + case LTC3589_VCCR: return true; } return false; diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index b0e4a3eb33c7..5b2e76159b41 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -856,7 +856,7 @@ static void __exit cmos_do_remove(struct device *dev) cmos->dev = NULL; } -#ifdef CONFIG_PM_SLEEP +#ifdef CONFIG_PM static int cmos_suspend(struct device *dev) { @@ -907,6 +907,8 @@ static inline int cmos_poweroff(struct device *dev) return cmos_suspend(dev); } +#ifdef CONFIG_PM_SLEEP + static int cmos_resume(struct device *dev) { struct cmos_rtc *cmos = dev_get_drvdata(dev); @@ -954,6 +956,7 @@ static int cmos_resume(struct device *dev) return 0; } +#endif #else static inline int cmos_poweroff(struct device *dev) diff --git a/drivers/scsi/be2iscsi/be_mgmt.c b/drivers/scsi/be2iscsi/be_mgmt.c index 665afcb74a56..3f3544f62259 100644 --- a/drivers/scsi/be2iscsi/be_mgmt.c +++ b/drivers/scsi/be2iscsi/be_mgmt.c @@ -943,17 +943,20 @@ mgmt_static_ip_modify(struct beiscsi_hba *phba, if (ip_action == IP_ACTION_ADD) { memcpy(req->ip_params.ip_record.ip_addr.addr, ip_param->value, - ip_param->len); + sizeof(req->ip_params.ip_record.ip_addr.addr)); if (subnet_param) memcpy(req->ip_params.ip_record.ip_addr.subnet_mask, - subnet_param->value, subnet_param->len); + subnet_param->value, + sizeof(req->ip_params.ip_record.ip_addr.subnet_mask)); } else { memcpy(req->ip_params.ip_record.ip_addr.addr, - if_info->ip_addr.addr, ip_param->len); + if_info->ip_addr.addr, + sizeof(req->ip_params.ip_record.ip_addr.addr)); memcpy(req->ip_params.ip_record.ip_addr.subnet_mask, - if_info->ip_addr.subnet_mask, ip_param->len); + if_info->ip_addr.subnet_mask, + sizeof(req->ip_params.ip_record.ip_addr.subnet_mask)); } rc = mgmt_exec_nonemb_cmd(phba, &nonemb_cmd, NULL, 0); @@ -981,7 +984,7 @@ static int mgmt_modify_gateway(struct beiscsi_hba *phba, uint8_t *gt_addr, req->action = gtway_action; req->ip_addr.ip_type = BE2_IPV4; - memcpy(req->ip_addr.addr, gt_addr, param_len); + memcpy(req->ip_addr.addr, gt_addr, sizeof(req->ip_addr.addr)); return mgmt_exec_nonemb_cmd(phba, &nonemb_cmd, NULL, 0); } diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index be9698d920c2..8252c0e6682c 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -3119,10 +3119,8 @@ qla2x00_unmap_iobases(struct qla_hw_data *ha) } static void -qla2x00_clear_drv_active(scsi_qla_host_t *vha) +qla2x00_clear_drv_active(struct qla_hw_data *ha) { - struct qla_hw_data *ha = vha->hw; - if (IS_QLA8044(ha)) { qla8044_idc_lock(ha); qla8044_clear_drv_active(ha); @@ -3193,7 +3191,7 @@ qla2x00_remove_one(struct pci_dev *pdev) scsi_host_put(base_vha->host); - qla2x00_clear_drv_active(base_vha); + qla2x00_clear_drv_active(ha); qla2x00_unmap_iobases(ha); diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index e632e14180cf..bcc449a0c3a7 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -1431,12 +1431,10 @@ static inline void qlt_unmap_sg(struct scsi_qla_host *vha, static int qlt_check_reserve_free_req(struct scsi_qla_host *vha, uint32_t req_cnt) { - struct qla_hw_data *ha = vha->hw; - device_reg_t __iomem *reg = ha->iobase; uint32_t cnt; if (vha->req->cnt < (req_cnt + 2)) { - cnt = (uint16_t)RD_REG_DWORD(®->isp24.req_q_out); + cnt = (uint16_t)RD_REG_DWORD(vha->req->req_q_out); ql_dbg(ql_dbg_tgt, vha, 0xe00a, "Request ring circled: cnt=%d, vha->->ring_index=%d, " @@ -3277,6 +3275,7 @@ static int qlt_handle_cmd_for_atio(struct scsi_qla_host *vha, return -ENOMEM; memcpy(&op->atio, atio, sizeof(*atio)); + op->vha = vha; INIT_WORK(&op->work, qlt_create_sess_from_atio); queue_work(qla_tgt_wq, &op->work); return 0; diff --git a/drivers/spi/spi-dw-mid.c b/drivers/spi/spi-dw-mid.c index 6d207afec8cb..a4c45ea8f688 100644 --- a/drivers/spi/spi-dw-mid.c +++ b/drivers/spi/spi-dw-mid.c @@ -89,7 +89,13 @@ err_exit: static void mid_spi_dma_exit(struct dw_spi *dws) { + if (!dws->dma_inited) + return; + + dmaengine_terminate_all(dws->txchan); dma_release_channel(dws->txchan); + + dmaengine_terminate_all(dws->rxchan); dma_release_channel(dws->rxchan); } @@ -136,7 +142,7 @@ static int mid_spi_dma_transfer(struct dw_spi *dws, int cs_change) txconf.dst_addr = dws->dma_addr; txconf.dst_maxburst = LNW_DMA_MSIZE_16; txconf.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; - txconf.dst_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES; + txconf.dst_addr_width = dws->dma_width; txconf.device_fc = false; txchan->device->device_control(txchan, DMA_SLAVE_CONFIG, @@ -159,7 +165,7 @@ static int mid_spi_dma_transfer(struct dw_spi *dws, int cs_change) rxconf.src_addr = dws->dma_addr; rxconf.src_maxburst = LNW_DMA_MSIZE_16; rxconf.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; - rxconf.src_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES; + rxconf.src_addr_width = dws->dma_width; rxconf.device_fc = false; rxchan->device->device_control(rxchan, DMA_SLAVE_CONFIG, diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c index 3afc266b666d..f96ea8a38d64 100644 --- a/drivers/spi/spi-rockchip.c +++ b/drivers/spi/spi-rockchip.c @@ -415,7 +415,7 @@ static void rockchip_spi_dma_txcb(void *data) spin_unlock_irqrestore(&rs->lock, flags); } -static int rockchip_spi_dma_transfer(struct rockchip_spi *rs) +static void rockchip_spi_prepare_dma(struct rockchip_spi *rs) { unsigned long flags; struct dma_slave_config rxconf, txconf; @@ -474,8 +474,6 @@ static int rockchip_spi_dma_transfer(struct rockchip_spi *rs) dmaengine_submit(txdesc); dma_async_issue_pending(rs->dma_tx.ch); } - - return 1; } static void rockchip_spi_config(struct rockchip_spi *rs) @@ -557,16 +555,17 @@ static int rockchip_spi_transfer_one( else if (rs->rx) rs->tmode = CR0_XFM_RO; - if (master->can_dma && master->can_dma(master, spi, xfer)) + /* we need prepare dma before spi was enabled */ + if (master->can_dma && master->can_dma(master, spi, xfer)) { rs->use_dma = 1; - else + rockchip_spi_prepare_dma(rs); + } else { rs->use_dma = 0; + } rockchip_spi_config(rs); - if (rs->use_dma) - ret = rockchip_spi_dma_transfer(rs); - else + if (!rs->use_dma) ret = rockchip_spi_pio_transfer(rs); return ret; diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c index d017cec8a34a..e454b7c2ecd9 100644 --- a/drivers/tty/serial/omap-serial.c +++ b/drivers/tty/serial/omap-serial.c @@ -254,8 +254,16 @@ serial_omap_baud_is_mode16(struct uart_port *port, unsigned int baud) { unsigned int n13 = port->uartclk / (13 * baud); unsigned int n16 = port->uartclk / (16 * baud); - int baudAbsDiff13 = baud - (port->uartclk / (13 * n13)); - int baudAbsDiff16 = baud - (port->uartclk / (16 * n16)); + int baudAbsDiff13; + int baudAbsDiff16; + + if (n13 == 0) + n13 = 1; + if (n16 == 0) + n16 = 1; + + baudAbsDiff13 = baud - (port->uartclk / (13 * n13)); + baudAbsDiff16 = baud - (port->uartclk / (16 * n16)); if (baudAbsDiff13 < 0) baudAbsDiff13 = -baudAbsDiff13; if (baudAbsDiff16 < 0) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index f7825332a325..9558da3f06a0 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -876,15 +876,11 @@ static void vfio_pci_remove(struct pci_dev *pdev) { struct vfio_pci_device *vdev; - mutex_lock(&driver_lock); - vdev = vfio_del_group_dev(&pdev->dev); if (vdev) { iommu_group_put(pdev->dev.iommu_group); kfree(vdev); } - - mutex_unlock(&driver_lock); } static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev, @@ -927,108 +923,90 @@ static struct pci_driver vfio_pci_driver = { .err_handler = &vfio_err_handlers, }; -/* - * Test whether a reset is necessary and possible. We mark devices as - * needs_reset when they are released, but don't have a function-local reset - * available. If any of these exist in the affected devices, we want to do - * a bus/slot reset. We also need all of the affected devices to be unused, - * so we abort if any device has a non-zero refcnt. driver_lock prevents a - * device from being opened during the scan or unbound from vfio-pci. - */ -static int vfio_pci_test_bus_reset(struct pci_dev *pdev, void *data) -{ - bool *needs_reset = data; - struct pci_driver *pci_drv = ACCESS_ONCE(pdev->driver); - int ret = -EBUSY; - - if (pci_drv == &vfio_pci_driver) { - struct vfio_device *device; - struct vfio_pci_device *vdev; - - device = vfio_device_get_from_dev(&pdev->dev); - if (!device) - return ret; - - vdev = vfio_device_data(device); - if (vdev) { - if (vdev->needs_reset) - *needs_reset = true; - - if (!vdev->refcnt) - ret = 0; - } - - vfio_device_put(device); - } - - /* - * TODO: vfio-core considers groups to be viable even if some devices - * are attached to known drivers, like pci-stub or pcieport. We can't - * freeze devices from being unbound to those drivers like we can - * here though, so it would be racy to test for them. We also can't - * use device_lock() to prevent changes as that would interfere with - * PCI-core taking device_lock during bus reset. For now, we require - * devices to be bound to vfio-pci to get a bus/slot reset on release. - */ - - return ret; -} +struct vfio_devices { + struct vfio_device **devices; + int cur_index; + int max_index; +}; -/* Clear needs_reset on all affected devices after successful bus/slot reset */ -static int vfio_pci_clear_needs_reset(struct pci_dev *pdev, void *data) +static int vfio_pci_get_devs(struct pci_dev *pdev, void *data) { + struct vfio_devices *devs = data; struct pci_driver *pci_drv = ACCESS_ONCE(pdev->driver); - if (pci_drv == &vfio_pci_driver) { - struct vfio_device *device; - struct vfio_pci_device *vdev; + if (pci_drv != &vfio_pci_driver) + return -EBUSY; - device = vfio_device_get_from_dev(&pdev->dev); - if (!device) - return 0; + if (devs->cur_index == devs->max_index) + return -ENOSPC; - vdev = vfio_device_data(device); - if (vdev) - vdev->needs_reset = false; - - vfio_device_put(device); - } + devs->devices[devs->cur_index] = vfio_device_get_from_dev(&pdev->dev); + if (!devs->devices[devs->cur_index]) + return -EINVAL; + devs->cur_index++; return 0; } /* * Attempt to do a bus/slot reset if there are devices affected by a reset for * this device that are needs_reset and all of the affected devices are unused - * (!refcnt). Callers of this function are required to hold driver_lock such - * that devices can not be unbound from vfio-pci or opened by a user while we - * test for and perform a bus/slot reset. + * (!refcnt). Callers are required to hold driver_lock when calling this to + * prevent device opens and concurrent bus reset attempts. We prevent device + * unbinds by acquiring and holding a reference to the vfio_device. + * + * NB: vfio-core considers a group to be viable even if some devices are + * bound to drivers like pci-stub or pcieport. Here we require all devices + * to be bound to vfio_pci since that's the only way we can be sure they + * stay put. */ static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev) { + struct vfio_devices devs = { .cur_index = 0 }; + int i = 0, ret = -EINVAL; bool needs_reset = false, slot = false; - int ret; + struct vfio_pci_device *tmp; if (!pci_probe_reset_slot(vdev->pdev->slot)) slot = true; else if (pci_probe_reset_bus(vdev->pdev->bus)) return; - if (vfio_pci_for_each_slot_or_bus(vdev->pdev, - vfio_pci_test_bus_reset, - &needs_reset, slot) || !needs_reset) + if (vfio_pci_for_each_slot_or_bus(vdev->pdev, vfio_pci_count_devs, + &i, slot) || !i) return; - if (slot) - ret = pci_try_reset_slot(vdev->pdev->slot); - else - ret = pci_try_reset_bus(vdev->pdev->bus); - - if (ret) + devs.max_index = i; + devs.devices = kcalloc(i, sizeof(struct vfio_device *), GFP_KERNEL); + if (!devs.devices) return; - vfio_pci_for_each_slot_or_bus(vdev->pdev, - vfio_pci_clear_needs_reset, NULL, slot); + if (vfio_pci_for_each_slot_or_bus(vdev->pdev, + vfio_pci_get_devs, &devs, slot)) + goto put_devs; + + for (i = 0; i < devs.cur_index; i++) { + tmp = vfio_device_data(devs.devices[i]); + if (tmp->needs_reset) + needs_reset = true; + if (tmp->refcnt) + goto put_devs; + } + + if (needs_reset) + ret = slot ? pci_try_reset_slot(vdev->pdev->slot) : + pci_try_reset_bus(vdev->pdev->bus); + +put_devs: + for (i = 0; i < devs.cur_index; i++) { + if (!ret) { + tmp = vfio_device_data(devs.devices[i]); + tmp->needs_reset = false; + } + vfio_device_put(devs.devices[i]); + } + + kfree(devs.devices); } static void __exit vfio_pci_cleanup(void) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 25ebe8eecdb7..c3eb93fc9261 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -163,8 +163,8 @@ static void release_pages_by_pfn(const u32 pfns[], unsigned int num) /* Find pfns pointing at start of each page, get pages and free them. */ for (i = 0; i < num; i += VIRTIO_BALLOON_PAGES_PER_PAGE) { struct page *page = balloon_pfn_to_page(pfns[i]); - balloon_page_free(page); adjust_managed_page_count(page, 1); + put_page(page); /* balloon reference */ } } @@ -395,6 +395,8 @@ static int virtballoon_migratepage(struct address_space *mapping, if (!mutex_trylock(&vb->balloon_lock)) return -EAGAIN; + get_page(newpage); /* balloon reference */ + /* balloon's page migration 1st step -- inflate "newpage" */ spin_lock_irqsave(&vb_dev_info->pages_lock, flags); balloon_page_insert(newpage, mapping, &vb_dev_info->pages); @@ -404,12 +406,7 @@ static int virtballoon_migratepage(struct address_space *mapping, set_page_pfns(vb->pfns, newpage); tell_host(vb, vb->inflate_vq); - /* - * balloon's page migration 2nd step -- deflate "page" - * - * It's safe to delete page->lru here because this page is at - * an isolated migration list, and this step is expected to happen here - */ + /* balloon's page migration 2nd step -- deflate "page" */ balloon_page_delete(page); vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE; set_page_pfns(vb->pfns, page); @@ -417,7 +414,9 @@ static int virtballoon_migratepage(struct address_space *mapping, mutex_unlock(&vb->balloon_lock); - return MIGRATEPAGE_BALLOON_SUCCESS; + put_page(page); /* balloon reference */ + + return MIGRATEPAGE_SUCCESS; } /* define the balloon_mapping->a_ops callback to allow balloon page migration */ diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index eea26e1b2fda..d738ff8ab81c 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -567,6 +567,8 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, btrfs_kobj_rm_device(fs_info, src_device); btrfs_kobj_add_device(fs_info, tgt_device); + btrfs_dev_replace_unlock(dev_replace); + btrfs_rm_dev_replace_blocked(fs_info); btrfs_rm_dev_replace_srcdev(fs_info, src_device); @@ -580,7 +582,6 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, * superblock is scratched out so that it is no longer marked to * belong to this filesystem. */ - btrfs_dev_replace_unlock(dev_replace); mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); mutex_unlock(&root->fs_info->chunk_mutex); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 3efe1c3877bf..98042c1a48b4 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4502,7 +4502,13 @@ again: space_info->flush = 1; } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) { used += orig_bytes; - if (need_do_async_reclaim(space_info, root->fs_info, used) && + /* + * We will do the space reservation dance during log replay, + * which means we won't have fs_info->fs_root set, so don't do + * the async reclaim as we will panic. + */ + if (!root->fs_info->log_root_recovering && + need_do_async_reclaim(space_info, root->fs_info, used) && !work_busy(&root->fs_info->async_reclaim_work)) queue_work(system_unbound_wq, &root->fs_info->async_reclaim_work); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index ff1cc0399b9a..68dd92cd7d54 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2621,23 +2621,28 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int whence) struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_map *em = NULL; struct extent_state *cached_state = NULL; - u64 lockstart = *offset; - u64 lockend = i_size_read(inode); - u64 start = *offset; - u64 len = i_size_read(inode); + u64 lockstart; + u64 lockend; + u64 start; + u64 len; int ret = 0; - lockend = max_t(u64, root->sectorsize, lockend); + if (inode->i_size == 0) + return -ENXIO; + + /* + * *offset can be negative, in this case we start finding DATA/HOLE from + * the very start of the file. + */ + start = max_t(loff_t, 0, *offset); + + lockstart = round_down(start, root->sectorsize); + lockend = round_up(i_size_read(inode), root->sectorsize); if (lockend <= lockstart) lockend = lockstart + root->sectorsize; - lockend--; len = lockend - lockstart + 1; - len = max_t(u64, len, root->sectorsize); - if (inode->i_size == 0) - return -ENXIO; - lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, 0, &cached_state); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 016c403bfe7e..886d8d42640d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3662,7 +3662,8 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans, * without delay */ if (!btrfs_is_free_space_inode(inode) - && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) { + && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID + && !root->fs_info->log_root_recovering) { btrfs_update_root_times(trans, root); ret = btrfs_delayed_update_inode(trans, root, inode); @@ -5202,42 +5203,6 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) iput(inode); inode = ERR_PTR(ret); } - /* - * If orphan cleanup did remove any orphans, it means the tree - * was modified and therefore the commit root is not the same as - * the current root anymore. This is a problem, because send - * uses the commit root and therefore can see inode items that - * don't exist in the current root anymore, and for example make - * calls to btrfs_iget, which will do tree lookups based on the - * current root and not on the commit root. Those lookups will - * fail, returning a -ESTALE error, and making send fail with - * that error. So make sure a send does not see any orphans we - * have just removed, and that it will see the same inodes - * regardless of whether a transaction commit happened before - * it started (meaning that the commit root will be the same as - * the current root) or not. - */ - if (sub_root->node != sub_root->commit_root) { - u64 sub_flags = btrfs_root_flags(&sub_root->root_item); - - if (sub_flags & BTRFS_ROOT_SUBVOL_RDONLY) { - struct extent_buffer *eb; - - /* - * Assert we can't have races between dentry - * lookup called through the snapshot creation - * ioctl and the VFS. - */ - ASSERT(mutex_is_locked(&dir->i_mutex)); - - down_write(&root->fs_info->commit_root_sem); - eb = sub_root->commit_root; - sub_root->commit_root = - btrfs_root_node(sub_root); - up_write(&root->fs_info->commit_root_sem); - free_extent_buffer(eb); - } - } } return inode; @@ -6191,21 +6156,60 @@ out_fail_inode: goto out_fail; } +/* Find next extent map of a given extent map, caller needs to ensure locks */ +static struct extent_map *next_extent_map(struct extent_map *em) +{ + struct rb_node *next; + + next = rb_next(&em->rb_node); + if (!next) + return NULL; + return container_of(next, struct extent_map, rb_node); +} + +static struct extent_map *prev_extent_map(struct extent_map *em) +{ + struct rb_node *prev; + + prev = rb_prev(&em->rb_node); + if (!prev) + return NULL; + return container_of(prev, struct extent_map, rb_node); +} + /* helper for btfs_get_extent. Given an existing extent in the tree, + * the existing extent is the nearest extent to map_start, * and an extent that you want to insert, deal with overlap and insert - * the new extent into the tree. + * the best fitted new extent into the tree. */ static int merge_extent_mapping(struct extent_map_tree *em_tree, struct extent_map *existing, struct extent_map *em, u64 map_start) { + struct extent_map *prev; + struct extent_map *next; + u64 start; + u64 end; u64 start_diff; BUG_ON(map_start < em->start || map_start >= extent_map_end(em)); - start_diff = map_start - em->start; - em->start = map_start; - em->len = existing->start - em->start; + + if (existing->start > map_start) { + next = existing; + prev = prev_extent_map(next); + } else { + prev = existing; + next = next_extent_map(prev); + } + + start = prev ? extent_map_end(prev) : em->start; + start = max_t(u64, start, em->start); + end = next ? next->start : extent_map_end(em); + end = min_t(u64, end, extent_map_end(em)); + start_diff = start - em->start; + em->start = start; + em->len = end - start; if (em->block_start < EXTENT_MAP_LAST_BYTE && !test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { em->block_start += start_diff; @@ -6482,25 +6486,21 @@ insert: ret = 0; - existing = lookup_extent_mapping(em_tree, start, len); - if (existing && (existing->start > start || - existing->start + existing->len <= start)) { + existing = search_extent_mapping(em_tree, start, len); + /* + * existing will always be non-NULL, since there must be + * extent causing the -EEXIST. + */ + if (start >= extent_map_end(existing) || + start <= existing->start) { + /* + * The existing extent map is the one nearest to + * the [start, start + len) range which overlaps + */ + err = merge_extent_mapping(em_tree, existing, + em, start); free_extent_map(existing); - existing = NULL; - } - if (!existing) { - existing = lookup_extent_mapping(em_tree, em->start, - em->len); - if (existing) { - err = merge_extent_mapping(em_tree, existing, - em, start); - free_extent_map(existing); - if (err) { - free_extent_map(em); - em = NULL; - } - } else { - err = -EIO; + if (err) { free_extent_map(em); em = NULL; } diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 8a8e29878c34..b765d412cbb6 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -332,6 +332,9 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) goto out_drop; } else { + ret = btrfs_set_prop(inode, "btrfs.compression", NULL, 0, 0); + if (ret && ret != -ENODATA) + goto out_drop; ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS); } @@ -711,6 +714,39 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, if (ret) goto fail; + ret = btrfs_orphan_cleanup(pending_snapshot->snap); + if (ret) + goto fail; + + /* + * If orphan cleanup did remove any orphans, it means the tree was + * modified and therefore the commit root is not the same as the + * current root anymore. This is a problem, because send uses the + * commit root and therefore can see inode items that don't exist + * in the current root anymore, and for example make calls to + * btrfs_iget, which will do tree lookups based on the current root + * and not on the commit root. Those lookups will fail, returning a + * -ESTALE error, and making send fail with that error. So make sure + * a send does not see any orphans we have just removed, and that it + * will see the same inodes regardless of whether a transaction + * commit happened before it started (meaning that the commit root + * will be the same as the current root) or not. + */ + if (readonly && pending_snapshot->snap->node != + pending_snapshot->snap->commit_root) { + trans = btrfs_join_transaction(pending_snapshot->snap); + if (IS_ERR(trans) && PTR_ERR(trans) != -ENOENT) { + ret = PTR_ERR(trans); + goto fail; + } + if (!IS_ERR(trans)) { + ret = btrfs_commit_transaction(trans, + pending_snapshot->snap); + if (ret) + goto fail; + } + } + inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry); if (IS_ERR(inode)) { ret = PTR_ERR(inode); @@ -5283,6 +5319,12 @@ long btrfs_ioctl(struct file *file, unsigned int if (ret) return ret; ret = btrfs_sync_fs(file->f_dentry->d_sb, 1); + /* + * The transaction thread may want to do more work, + * namely it pokes the cleaner ktread that will start + * processing uncleaned subvols. + */ + wake_up_process(root->fs_info->transaction_kthread); return ret; } case BTRFS_IOC_START_SYNC: diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index ded5c601d916..d094534c3b53 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -551,9 +551,15 @@ static int add_qgroup_item(struct btrfs_trans_handle *trans, key.type = BTRFS_QGROUP_INFO_KEY; key.offset = qgroupid; + /* + * Avoid a transaction abort by catching -EEXIST here. In that + * case, we proceed by re-initializing the existing structure + * on disk. + */ + ret = btrfs_insert_empty_item(trans, quota_root, path, &key, sizeof(*qgroup_info)); - if (ret) + if (ret && ret != -EEXIST) goto out; leaf = path->nodes[0]; @@ -572,7 +578,7 @@ static int add_qgroup_item(struct btrfs_trans_handle *trans, key.type = BTRFS_QGROUP_LIMIT_KEY; ret = btrfs_insert_empty_item(trans, quota_root, path, &key, sizeof(*qgroup_limit)); - if (ret) + if (ret && ret != -EEXIST) goto out; leaf = path->nodes[0]; diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 65245a07275b..56fe6ec409ac 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -736,7 +736,8 @@ again: err = ret; goto out; } - BUG_ON(!ret || !path1->slots[0]); + ASSERT(ret); + ASSERT(path1->slots[0]); path1->slots[0]--; @@ -746,10 +747,10 @@ again: * the backref was added previously when processing * backref of type BTRFS_TREE_BLOCK_REF_KEY */ - BUG_ON(!list_is_singular(&cur->upper)); + ASSERT(list_is_singular(&cur->upper)); edge = list_entry(cur->upper.next, struct backref_edge, list[LOWER]); - BUG_ON(!list_empty(&edge->list[UPPER])); + ASSERT(list_empty(&edge->list[UPPER])); exist = edge->node[UPPER]; /* * add the upper level block to pending list if we need @@ -831,7 +832,7 @@ again: cur->cowonly = 1; } #else - BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY); + ASSERT(key.type != BTRFS_EXTENT_REF_V0_KEY); if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) { #endif if (key.objectid == key.offset) { @@ -840,7 +841,7 @@ again: * backref of this type. */ root = find_reloc_root(rc, cur->bytenr); - BUG_ON(!root); + ASSERT(root); cur->root = root; break; } @@ -868,7 +869,7 @@ again: } else { upper = rb_entry(rb_node, struct backref_node, rb_node); - BUG_ON(!upper->checked); + ASSERT(upper->checked); INIT_LIST_HEAD(&edge->list[UPPER]); } list_add_tail(&edge->list[LOWER], &cur->upper); @@ -892,7 +893,7 @@ again: if (btrfs_root_level(&root->root_item) == cur->level) { /* tree root */ - BUG_ON(btrfs_root_bytenr(&root->root_item) != + ASSERT(btrfs_root_bytenr(&root->root_item) == cur->bytenr); if (should_ignore_root(root)) list_add(&cur->list, &useless); @@ -927,7 +928,7 @@ again: need_check = true; for (; level < BTRFS_MAX_LEVEL; level++) { if (!path2->nodes[level]) { - BUG_ON(btrfs_root_bytenr(&root->root_item) != + ASSERT(btrfs_root_bytenr(&root->root_item) == lower->bytenr); if (should_ignore_root(root)) list_add(&lower->list, &useless); @@ -977,12 +978,15 @@ again: need_check = false; list_add_tail(&edge->list[UPPER], &list); - } else + } else { + if (upper->checked) + need_check = true; INIT_LIST_HEAD(&edge->list[UPPER]); + } } else { upper = rb_entry(rb_node, struct backref_node, rb_node); - BUG_ON(!upper->checked); + ASSERT(upper->checked); INIT_LIST_HEAD(&edge->list[UPPER]); if (!upper->owner) upper->owner = btrfs_header_owner(eb); @@ -1026,7 +1030,7 @@ next: * everything goes well, connect backref nodes and insert backref nodes * into the cache. */ - BUG_ON(!node->checked); + ASSERT(node->checked); cowonly = node->cowonly; if (!cowonly) { rb_node = tree_insert(&cache->rb_root, node->bytenr, @@ -1062,8 +1066,21 @@ next: continue; } - BUG_ON(!upper->checked); - BUG_ON(cowonly != upper->cowonly); + if (!upper->checked) { + /* + * Still want to blow up for developers since this is a + * logic bug. + */ + ASSERT(0); + err = -EINVAL; + goto out; + } + if (cowonly != upper->cowonly) { + ASSERT(0); + err = -EINVAL; + goto out; + } + if (!cowonly) { rb_node = tree_insert(&cache->rb_root, upper->bytenr, &upper->rb_node); @@ -1086,7 +1103,7 @@ next: while (!list_empty(&useless)) { upper = list_entry(useless.next, struct backref_node, list); list_del_init(&upper->list); - BUG_ON(!list_empty(&upper->upper)); + ASSERT(list_empty(&upper->upper)); if (upper == node) node = NULL; if (upper->lowest) { @@ -1119,29 +1136,45 @@ out: if (err) { while (!list_empty(&useless)) { lower = list_entry(useless.next, - struct backref_node, upper); - list_del_init(&lower->upper); + struct backref_node, list); + list_del_init(&lower->list); } - upper = node; - INIT_LIST_HEAD(&list); - while (upper) { - if (RB_EMPTY_NODE(&upper->rb_node)) { - list_splice_tail(&upper->upper, &list); - free_backref_node(cache, upper); - } - - if (list_empty(&list)) - break; - - edge = list_entry(list.next, struct backref_edge, - list[LOWER]); + while (!list_empty(&list)) { + edge = list_first_entry(&list, struct backref_edge, + list[UPPER]); + list_del(&edge->list[UPPER]); list_del(&edge->list[LOWER]); + lower = edge->node[LOWER]; upper = edge->node[UPPER]; free_backref_edge(cache, edge); + + /* + * Lower is no longer linked to any upper backref nodes + * and isn't in the cache, we can free it ourselves. + */ + if (list_empty(&lower->upper) && + RB_EMPTY_NODE(&lower->rb_node)) + list_add(&lower->list, &useless); + + if (!RB_EMPTY_NODE(&upper->rb_node)) + continue; + + /* Add this guy's upper edges to the list to proces */ + list_for_each_entry(edge, &upper->upper, list[LOWER]) + list_add_tail(&edge->list[UPPER], &list); + if (list_empty(&upper->upper)) + list_add(&upper->list, &useless); + } + + while (!list_empty(&useless)) { + lower = list_entry(useless.next, + struct backref_node, list); + list_del_init(&lower->list); + free_backref_node(cache, lower); } return ERR_PTR(err); } - BUG_ON(node && node->detached); + ASSERT(!node || !node->detached); return node; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index d89c6d3542ca..98a25df1c430 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -609,7 +609,6 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) if (transid <= root->fs_info->last_trans_committed) goto out; - ret = -EINVAL; /* find specified transaction */ spin_lock(&root->fs_info->trans_lock); list_for_each_entry(t, &root->fs_info->trans_list, list) { @@ -625,9 +624,16 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) } } spin_unlock(&root->fs_info->trans_lock); - /* The specified transaction doesn't exist */ - if (!cur_trans) + + /* + * The specified transaction doesn't exist, or we + * raced with btrfs_commit_transaction + */ + if (!cur_trans) { + if (transid > root->fs_info->last_trans_committed) + ret = -EINVAL; goto out; + } } else { /* find newest transaction that is committing | committed */ spin_lock(&root->fs_info->trans_lock); diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index d4a9431ec73c..57ee4c53b4f8 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -1039,7 +1039,7 @@ ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value, } rc = vfs_setxattr(lower_dentry, name, value, size, flags); - if (!rc) + if (!rc && dentry->d_inode) fsstack_copy_attr_all(dentry->d_inode, lower_dentry->d_inode); out: return rc; diff --git a/fs/namei.c b/fs/namei.c index a7b05bf82d31..3ddb044f3702 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3074,7 +3074,7 @@ opened: error = open_check_o_direct(file); if (error) goto exit_fput; - error = ima_file_check(file, op->acc_mode); + error = ima_file_check(file, op->acc_mode, *opened); if (error) goto exit_fput; diff --git a/fs/namespace.c b/fs/namespace.c index ef42d9bee212..7f67b463a5b4 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1356,6 +1356,8 @@ static int do_umount(struct mount *mnt, int flags) * Special case for "unmounting" root ... * we just try to remount it readonly. */ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; down_write(&sb->s_umount); if (!(sb->s_flags & MS_RDONLY)) retval = do_remount_sb(sb, MS_RDONLY, NULL, 0); diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 6a4f3666e273..94088517039f 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1318,7 +1318,7 @@ static int nfs_server_list_show(struct seq_file *m, void *v) */ static int nfs_volume_list_open(struct inode *inode, struct file *file) { - return seq_open_net(inode, file, &nfs_server_list_ops, + return seq_open_net(inode, file, &nfs_volume_list_ops, sizeof(struct seq_net_private)); } diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 90978075f730..f59713e091a8 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -1031,7 +1031,7 @@ filelayout_clear_request_commit(struct nfs_page *req, } out: nfs_request_remove_commit_list(req, cinfo); - pnfs_put_lseg_async(freeme); + pnfs_put_lseg_locked(freeme); } static void diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6ca0c8e7a945..0422d77b73c7 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7353,7 +7353,7 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cr int ret = 0; if ((renew_flags & NFS4_RENEW_TIMEOUT) == 0) - return 0; + return -EAGAIN; task = _nfs41_proc_sequence(clp, cred, false); if (IS_ERR(task)) ret = PTR_ERR(task); diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index 1720d32ffa54..e1ba58c3d1ad 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c @@ -88,10 +88,18 @@ nfs4_renew_state(struct work_struct *work) } nfs_expire_all_delegations(clp); } else { + int ret; + /* Queue an asynchronous RENEW. */ - ops->sched_state_renewal(clp, cred, renew_flags); + ret = ops->sched_state_renewal(clp, cred, renew_flags); put_rpccred(cred); - goto out_exp; + switch (ret) { + default: + goto out_exp; + case -EAGAIN: + case -ENOMEM: + break; + } } } else { dprintk("%s: failed to call renewd. Reason: lease not expired \n", diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 22fe35104c0c..5194933ed419 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1705,7 +1705,8 @@ restart: if (status < 0) { set_bit(ops->owner_flag_bit, &sp->so_flags); nfs4_put_state_owner(sp); - return nfs4_recovery_handle_error(clp, status); + status = nfs4_recovery_handle_error(clp, status); + return (status != 0) ? status : -EAGAIN; } nfs4_put_state_owner(sp); @@ -1714,7 +1715,7 @@ restart: spin_unlock(&clp->cl_lock); } rcu_read_unlock(); - return status; + return 0; } static int nfs4_check_lease(struct nfs_client *clp) @@ -1761,7 +1762,6 @@ static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status) break; case -NFS4ERR_STALE_CLIENTID: clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); - nfs4_state_clear_reclaim_reboot(clp); nfs4_state_start_reclaim_reboot(clp); break; case -NFS4ERR_CLID_INUSE: @@ -2345,6 +2345,7 @@ static void nfs4_state_manager(struct nfs_client *clp) status = nfs4_check_lease(clp); if (status < 0) goto out_error; + continue; } if (test_and_clear_bit(NFS4CLNT_MOVED, &clp->cl_state)) { @@ -2366,14 +2367,11 @@ static void nfs4_state_manager(struct nfs_client *clp) section = "reclaim reboot"; status = nfs4_do_reclaim(clp, clp->cl_mvops->reboot_recovery_ops); - if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) || - test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state)) - continue; - nfs4_state_end_reclaim_reboot(clp); - if (test_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) + if (status == -EAGAIN) continue; if (status < 0) goto out_error; + nfs4_state_end_reclaim_reboot(clp); } /* Now recover expired state... */ @@ -2381,9 +2379,7 @@ static void nfs4_state_manager(struct nfs_client *clp) section = "reclaim nograce"; status = nfs4_do_reclaim(clp, clp->cl_mvops->nograce_recovery_ops); - if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) || - test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) || - test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) + if (status == -EAGAIN) continue; if (status < 0) goto out_error; diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index be7cbce6e4c7..9229d4780f87 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -518,7 +518,8 @@ EXPORT_SYMBOL_GPL(nfs_pgio_header_free); */ void nfs_pgio_data_destroy(struct nfs_pgio_header *hdr) { - put_nfs_open_context(hdr->args.context); + if (hdr->args.context) + put_nfs_open_context(hdr->args.context); if (hdr->page_array.pagevec != hdr->page_array.page_array) kfree(hdr->page_array.pagevec); } @@ -743,12 +744,11 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, nfs_list_remove_request(req); nfs_list_add_request(req, &hdr->pages); - if (WARN_ON_ONCE(pageused >= pagecount)) - return nfs_pgio_error(desc, hdr); - if (!last_page || last_page != req->wb_page) { - *pages++ = last_page = req->wb_page; pageused++; + if (pageused > pagecount) + break; + *pages++ = last_page = req->wb_page; } } if (WARN_ON_ONCE(pageused != pagecount)) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index a3851debf8a2..5480720bdc0f 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -361,22 +361,43 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg) } EXPORT_SYMBOL_GPL(pnfs_put_lseg); -static void pnfs_put_lseg_async_work(struct work_struct *work) +static void pnfs_free_lseg_async_work(struct work_struct *work) { struct pnfs_layout_segment *lseg; + struct pnfs_layout_hdr *lo; lseg = container_of(work, struct pnfs_layout_segment, pls_work); + lo = lseg->pls_layout; - pnfs_put_lseg(lseg); + pnfs_free_lseg(lseg); + pnfs_put_layout_hdr(lo); } -void -pnfs_put_lseg_async(struct pnfs_layout_segment *lseg) +static void pnfs_free_lseg_async(struct pnfs_layout_segment *lseg) { - INIT_WORK(&lseg->pls_work, pnfs_put_lseg_async_work); + INIT_WORK(&lseg->pls_work, pnfs_free_lseg_async_work); schedule_work(&lseg->pls_work); } -EXPORT_SYMBOL_GPL(pnfs_put_lseg_async); + +void +pnfs_put_lseg_locked(struct pnfs_layout_segment *lseg) +{ + if (!lseg) + return; + + assert_spin_locked(&lseg->pls_layout->plh_inode->i_lock); + + dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg, + atomic_read(&lseg->pls_refcount), + test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); + if (atomic_dec_and_test(&lseg->pls_refcount)) { + struct pnfs_layout_hdr *lo = lseg->pls_layout; + pnfs_get_layout_hdr(lo); + pnfs_layout_remove_lseg(lo, lseg); + pnfs_free_lseg_async(lseg); + } +} +EXPORT_SYMBOL_GPL(pnfs_put_lseg_locked); static u64 end_offset(u64 start, u64 len) diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index aca3dff5dae6..bc2db1c2a5ee 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -183,7 +183,7 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); /* pnfs.c */ void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo); void pnfs_put_lseg(struct pnfs_layout_segment *lseg); -void pnfs_put_lseg_async(struct pnfs_layout_segment *lseg); +void pnfs_put_lseg_locked(struct pnfs_layout_segment *lseg); void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32); void unset_pnfs_layoutdriver(struct nfs_server *); @@ -422,10 +422,6 @@ static inline void pnfs_put_lseg(struct pnfs_layout_segment *lseg) { } -static inline void pnfs_put_lseg_async(struct pnfs_layout_segment *lseg) -{ -} - static inline int pnfs_return_layout(struct inode *ino) { return 0; diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index b01f6e100ee8..353aac85a3e3 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1670,6 +1670,14 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) readbytes += nfsd4_max_reply(argp->rqstp, op); } else max_reply += nfsd4_max_reply(argp->rqstp, op); + /* + * OP_LOCK may return a conflicting lock. (Special case + * because it will just skip encoding this if it runs + * out of xdr buffer space, and it is the only operation + * that behaves this way.) + */ + if (op->opnum == OP_LOCK) + max_reply += NFS4_OPAQUE_LIMIT; if (op->status) { argp->opcnt = i+1; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index f501a9b5c9df..6ab077bb897e 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -708,7 +708,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, host_err = PTR_ERR(*filp); *filp = NULL; } else { - host_err = ima_file_check(*filp, may_flags); + host_err = ima_file_check(*filp, may_flags, 0); if (may_flags & NFSD_MAY_64BIT_COOKIE) (*filp)->f_mode |= FMODE_64BITHASH; diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index b13992a41bd9..c991616acca9 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -78,7 +78,7 @@ static int create_fd(struct fsnotify_group *group, pr_debug("%s: group=%p event=%p\n", __func__, group, event); - client_fd = get_unused_fd(); + client_fd = get_unused_fd_flags(group->fanotify_data.f_flags); if (client_fd < 0) return client_fd; diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 08598843288f..c9b4df5810d5 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -1277,7 +1277,7 @@ update_time: */ #define UDF_MAX_ICB_NESTING 1024 -static int udf_read_inode(struct inode *inode) +static int udf_read_inode(struct inode *inode, bool hidden_inode) { struct buffer_head *bh = NULL; struct fileEntry *fe; @@ -1436,8 +1436,11 @@ reread: link_count = le16_to_cpu(fe->fileLinkCount); if (!link_count) { - ret = -ESTALE; - goto out; + if (!hidden_inode) { + ret = -ESTALE; + goto out; + } + link_count = 1; } set_nlink(inode, link_count); @@ -1826,7 +1829,8 @@ out: return err; } -struct inode *udf_iget(struct super_block *sb, struct kernel_lb_addr *ino) +struct inode *__udf_iget(struct super_block *sb, struct kernel_lb_addr *ino, + bool hidden_inode) { unsigned long block = udf_get_lb_pblock(sb, ino, 0); struct inode *inode = iget_locked(sb, block); @@ -1839,7 +1843,7 @@ struct inode *udf_iget(struct super_block *sb, struct kernel_lb_addr *ino) return inode; memcpy(&UDF_I(inode)->i_location, ino, sizeof(struct kernel_lb_addr)); - err = udf_read_inode(inode); + err = udf_read_inode(inode, hidden_inode); if (err < 0) { iget_failed(inode); return ERR_PTR(err); diff --git a/fs/udf/super.c b/fs/udf/super.c index 5401fc33f5cc..e229315bbf7a 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -959,7 +959,7 @@ struct inode *udf_find_metadata_inode_efe(struct super_block *sb, addr.logicalBlockNum = meta_file_loc; addr.partitionReferenceNum = partition_num; - metadata_fe = udf_iget(sb, &addr); + metadata_fe = udf_iget_special(sb, &addr); if (IS_ERR(metadata_fe)) { udf_warn(sb, "metadata inode efe not found\n"); @@ -1020,7 +1020,7 @@ static int udf_load_metadata_files(struct super_block *sb, int partition) udf_debug("Bitmap file location: block = %d part = %d\n", addr.logicalBlockNum, addr.partitionReferenceNum); - fe = udf_iget(sb, &addr); + fe = udf_iget_special(sb, &addr); if (IS_ERR(fe)) { if (sb->s_flags & MS_RDONLY) udf_warn(sb, "bitmap inode efe not found but it's ok since the disc is mounted read-only\n"); @@ -1119,7 +1119,7 @@ static int udf_fill_partdesc_info(struct super_block *sb, }; struct inode *inode; - inode = udf_iget(sb, &loc); + inode = udf_iget_special(sb, &loc); if (IS_ERR(inode)) { udf_debug("cannot load unallocSpaceTable (part %d)\n", p_index); @@ -1154,7 +1154,7 @@ static int udf_fill_partdesc_info(struct super_block *sb, }; struct inode *inode; - inode = udf_iget(sb, &loc); + inode = udf_iget_special(sb, &loc); if (IS_ERR(inode)) { udf_debug("cannot load freedSpaceTable (part %d)\n", p_index); @@ -1198,7 +1198,7 @@ static void udf_find_vat_block(struct super_block *sb, int p_index, vat_block >= map->s_partition_root && vat_block >= start_block - 3; vat_block--) { ino.logicalBlockNum = vat_block - map->s_partition_root; - inode = udf_iget(sb, &ino); + inode = udf_iget_special(sb, &ino); if (!IS_ERR(inode)) { sbi->s_vat_inode = inode; break; diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index 742557be9936..1cc3c993ebd0 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h @@ -138,7 +138,18 @@ extern int udf_write_fi(struct inode *inode, struct fileIdentDesc *, /* file.c */ extern long udf_ioctl(struct file *, unsigned int, unsigned long); /* inode.c */ -extern struct inode *udf_iget(struct super_block *, struct kernel_lb_addr *); +extern struct inode *__udf_iget(struct super_block *, struct kernel_lb_addr *, + bool hidden_inode); +static inline struct inode *udf_iget_special(struct super_block *sb, + struct kernel_lb_addr *ino) +{ + return __udf_iget(sb, ino, true); +} +static inline struct inode *udf_iget(struct super_block *sb, + struct kernel_lb_addr *ino) +{ + return __udf_iget(sb, ino, false); +} extern int udf_expand_file_adinicb(struct inode *); extern struct buffer_head *udf_expand_dir_adinicb(struct inode *, int *, int *); extern struct buffer_head *udf_bread(struct inode *, int, int, int *); diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index b984647c24db..2f502537a39c 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -434,10 +434,22 @@ xfs_start_page_writeback( { ASSERT(PageLocked(page)); ASSERT(!PageWriteback(page)); - if (clear_dirty) + + /* + * if the page was not fully cleaned, we need to ensure that the higher + * layers come back to it correctly. That means we need to keep the page + * dirty, and for WB_SYNC_ALL writeback we need to ensure the + * PAGECACHE_TAG_TOWRITE index mark is not removed so another attempt to + * write this page in this writeback sweep will be made. + */ + if (clear_dirty) { clear_page_dirty_for_io(page); - set_page_writeback(page); + set_page_writeback(page); + } else + set_page_writeback_keepwrite(page); + unlock_page(page); + /* If no buffers on the page are to be written, finish it here */ if (!buffers) end_page_writeback(page); diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index f71be9c68017..f1deb961a296 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -639,7 +639,8 @@ next_ag: xfs_buf_relse(agbp); agbp = NULL; agino = 0; - } while (++agno < mp->m_sb.sb_agcount); + agno++; + } while (agno < mp->m_sb.sb_agcount); if (!error) { if (bufidx) { diff --git a/include/linux/balloon_compaction.h b/include/linux/balloon_compaction.h index 089743ade734..38aa07d5b81c 100644 --- a/include/linux/balloon_compaction.h +++ b/include/linux/balloon_compaction.h @@ -27,10 +27,13 @@ * counter raised only while it is under our special handling; * * iii. after the lockless scan step have selected a potential balloon page for - * isolation, re-test the page->mapping flags and the page ref counter + * isolation, re-test the PageBalloon mark and the PagePrivate flag * under the proper page lock, to ensure isolating a valid balloon page * (not yet isolated, nor under release procedure) * + * iv. isolation or dequeueing procedure must clear PagePrivate flag under + * page lock together with removing page from balloon device page list. + * * The functions provided by this interface are placed to help on coping with * the aforementioned balloon page corner case, as well as to ensure the simple * set of exposed rules are satisfied while we are dealing with balloon pages @@ -71,28 +74,6 @@ static inline void balloon_devinfo_free(struct balloon_dev_info *b_dev_info) kfree(b_dev_info); } -/* - * balloon_page_free - release a balloon page back to the page free lists - * @page: ballooned page to be set free - * - * This function must be used to properly set free an isolated/dequeued balloon - * page at the end of a sucessful page migration, or at the balloon driver's - * page release procedure. - */ -static inline void balloon_page_free(struct page *page) -{ - /* - * Balloon pages always get an extra refcount before being isolated - * and before being dequeued to help on sorting out fortuite colisions - * between a thread attempting to isolate and another thread attempting - * to release the very same balloon page. - * - * Before we handle the page back to Buddy, lets drop its extra refcnt. - */ - put_page(page); - __free_page(page); -} - #ifdef CONFIG_BALLOON_COMPACTION extern bool balloon_page_isolate(struct page *page); extern void balloon_page_putback(struct page *page); @@ -108,74 +89,33 @@ static inline void balloon_mapping_free(struct address_space *balloon_mapping) } /* - * page_flags_cleared - helper to perform balloon @page ->flags tests. - * - * As balloon pages are obtained from buddy and we do not play with page->flags - * at driver level (exception made when we get the page lock for compaction), - * we can safely identify a ballooned page by checking if the - * PAGE_FLAGS_CHECK_AT_PREP page->flags are all cleared. This approach also - * helps us skip ballooned pages that are locked for compaction or release, thus - * mitigating their racy check at balloon_page_movable() - */ -static inline bool page_flags_cleared(struct page *page) -{ - return !(page->flags & PAGE_FLAGS_CHECK_AT_PREP); -} - -/* - * __is_movable_balloon_page - helper to perform @page mapping->flags tests + * __is_movable_balloon_page - helper to perform @page PageBalloon tests */ static inline bool __is_movable_balloon_page(struct page *page) { - struct address_space *mapping = page->mapping; - return mapping_balloon(mapping); + return PageBalloon(page); } /* - * balloon_page_movable - test page->mapping->flags to identify balloon pages - * that can be moved by compaction/migration. - * - * This function is used at core compaction's page isolation scheme, therefore - * most pages exposed to it are not enlisted as balloon pages and so, to avoid - * undesired side effects like racing against __free_pages(), we cannot afford - * holding the page locked while testing page->mapping->flags here. + * balloon_page_movable - test PageBalloon to identify balloon pages + * and PagePrivate to check that the page is not + * isolated and can be moved by compaction/migration. * * As we might return false positives in the case of a balloon page being just - * released under us, the page->mapping->flags need to be re-tested later, - * under the proper page lock, at the functions that will be coping with the - * balloon page case. + * released under us, this need to be re-tested later, under the page lock. */ static inline bool balloon_page_movable(struct page *page) { - /* - * Before dereferencing and testing mapping->flags, let's make sure - * this is not a page that uses ->mapping in a different way - */ - if (page_flags_cleared(page) && !page_mapped(page) && - page_count(page) == 1) - return __is_movable_balloon_page(page); - - return false; + return PageBalloon(page) && PagePrivate(page); } /* * isolated_balloon_page - identify an isolated balloon page on private * compaction/migration page lists. - * - * After a compaction thread isolates a balloon page for migration, it raises - * the page refcount to prevent concurrent compaction threads from re-isolating - * the same page. For that reason putback_movable_pages(), or other routines - * that need to identify isolated balloon pages on private pagelists, cannot - * rely on balloon_page_movable() to accomplish the task. */ static inline bool isolated_balloon_page(struct page *page) { - /* Already isolated balloon pages, by default, have a raised refcount */ - if (page_flags_cleared(page) && !page_mapped(page) && - page_count(page) >= 2) - return __is_movable_balloon_page(page); - - return false; + return PageBalloon(page); } /* @@ -192,6 +132,8 @@ static inline void balloon_page_insert(struct page *page, struct address_space *mapping, struct list_head *head) { + __SetPageBalloon(page); + SetPagePrivate(page); page->mapping = mapping; list_add(&page->lru, head); } @@ -206,8 +148,12 @@ static inline void balloon_page_insert(struct page *page, */ static inline void balloon_page_delete(struct page *page) { + __ClearPageBalloon(page); page->mapping = NULL; - list_del(&page->lru); + if (PagePrivate(page)) { + ClearPagePrivate(page); + list_del(&page->lru); + } } /* @@ -258,6 +204,11 @@ static inline void balloon_page_delete(struct page *page) list_del(&page->lru); } +static inline bool __is_movable_balloon_page(struct page *page) +{ + return false; +} + static inline bool balloon_page_movable(struct page *page) { return false; diff --git a/include/linux/compiler-gcc5.h b/include/linux/compiler-gcc5.h new file mode 100644 index 000000000000..cdd1cc202d51 --- /dev/null +++ b/include/linux/compiler-gcc5.h @@ -0,0 +1,66 @@ +#ifndef __LINUX_COMPILER_H +#error "Please don't include directly, include instead." +#endif + +#define __used __attribute__((__used__)) +#define __must_check __attribute__((warn_unused_result)) +#define __compiler_offsetof(a, b) __builtin_offsetof(a, b) + +/* Mark functions as cold. gcc will assume any path leading to a call + to them will be unlikely. This means a lot of manual unlikely()s + are unnecessary now for any paths leading to the usual suspects + like BUG(), printk(), panic() etc. [but let's keep them for now for + older compilers] + + Early snapshots of gcc 4.3 don't support this and we can't detect this + in the preprocessor, but we can live with this because they're unreleased. + Maketime probing would be overkill here. + + gcc also has a __attribute__((__hot__)) to move hot functions into + a special section, but I don't see any sense in this right now in + the kernel context */ +#define __cold __attribute__((__cold__)) + +#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__) + +#ifndef __CHECKER__ +# define __compiletime_warning(message) __attribute__((warning(message))) +# define __compiletime_error(message) __attribute__((error(message))) +#endif /* __CHECKER__ */ + +/* + * Mark a position in code as unreachable. This can be used to + * suppress control flow warnings after asm blocks that transfer + * control elsewhere. + * + * Early snapshots of gcc 4.5 don't support this and we can't detect + * this in the preprocessor, but we can live with this because they're + * unreleased. Really, we need to have autoconf for the kernel. + */ +#define unreachable() __builtin_unreachable() + +/* Mark a function definition as prohibited from being cloned. */ +#define __noclone __attribute__((__noclone__)) + +/* + * Tell the optimizer that something else uses this function or variable. + */ +#define __visible __attribute__((externally_visible)) + +/* + * GCC 'asm goto' miscompiles certain code sequences: + * + * http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670 + * + * Work it around via a compiler barrier quirk suggested by Jakub Jelinek. + * Fixed in GCC 4.8.2 and later versions. + * + * (asm goto is automatically volatile - the naming reflects this.) + */ +#define asm_volatile_goto(x...) do { asm goto(x); asm (""); } while (0) + +#ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP +#define __HAVE_BUILTIN_BSWAP32__ +#define __HAVE_BUILTIN_BSWAP64__ +#define __HAVE_BUILTIN_BSWAP16__ +#endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP */ diff --git a/include/linux/ima.h b/include/linux/ima.h index 7cf5e9b32550..120ccc53fcb7 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -15,7 +15,7 @@ struct linux_binprm; #ifdef CONFIG_IMA extern int ima_bprm_check(struct linux_binprm *bprm); -extern int ima_file_check(struct file *file, int mask); +extern int ima_file_check(struct file *file, int mask, int opened); extern void ima_file_free(struct file *file); extern int ima_file_mmap(struct file *file, unsigned long prot); extern int ima_module_check(struct file *file); @@ -27,7 +27,7 @@ static inline int ima_bprm_check(struct linux_binprm *bprm) return 0; } -static inline int ima_file_check(struct file *file, int mask) +static inline int ima_file_check(struct file *file, int mask, int opened) { return 0; } diff --git a/include/linux/migrate.h b/include/linux/migrate.h index a2901c414664..b33347f4e4b7 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -13,18 +13,9 @@ typedef void free_page_t(struct page *page, unsigned long private); * Return values from addresss_space_operations.migratepage(): * - negative errno on page migration failure; * - zero on page migration success; - * - * The balloon page migration introduces this special case where a 'distinct' - * return code is used to flag a successful page migration to unmap_and_move(). - * This approach is necessary because page migration can race against balloon - * deflation procedure, and for such case we could introduce a nasty page leak - * if a successfully migrated balloon page gets released concurrently with - * migration's unmap_and_move() wrap-up steps. */ #define MIGRATEPAGE_SUCCESS 0 -#define MIGRATEPAGE_BALLOON_SUCCESS 1 /* special ret code for balloon page - * sucessful migration case. - */ + enum migrate_reason { MR_COMPACTION, MR_MEMORY_FAILURE, diff --git a/include/linux/mm.h b/include/linux/mm.h index 8981cc882ed2..16e6f1effef8 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -553,6 +553,25 @@ static inline void __ClearPageBuddy(struct page *page) atomic_set(&page->_mapcount, -1); } +#define PAGE_BALLOON_MAPCOUNT_VALUE (-256) + +static inline int PageBalloon(struct page *page) +{ + return atomic_read(&page->_mapcount) == PAGE_BALLOON_MAPCOUNT_VALUE; +} + +static inline void __SetPageBalloon(struct page *page) +{ + VM_BUG_ON_PAGE(atomic_read(&page->_mapcount) != -1, page); + atomic_set(&page->_mapcount, PAGE_BALLOON_MAPCOUNT_VALUE); +} + +static inline void __ClearPageBalloon(struct page *page) +{ + VM_BUG_ON_PAGE(!PageBalloon(page), page); + atomic_set(&page->_mapcount, -1); +} + void put_page(struct page *page); void put_pages_list(struct list_head *pages); diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 6ed0bb73a864..4e82195b1695 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2557,6 +2557,7 @@ #define PCI_DEVICE_ID_INTEL_MFD_EMMC0 0x0823 #define PCI_DEVICE_ID_INTEL_MFD_EMMC1 0x0824 #define PCI_DEVICE_ID_INTEL_MRST_SD2 0x084F +#define PCI_DEVICE_ID_INTEL_QUARK_X1000_ILB 0x095E #define PCI_DEVICE_ID_INTEL_I960 0x0960 #define PCI_DEVICE_ID_INTEL_I960RM 0x0962 #define PCI_DEVICE_ID_INTEL_CENTERTON_ILB 0x0c60 diff --git a/include/linux/sched.h b/include/linux/sched.h index b867a4dab38a..2b1d9e974382 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1934,11 +1934,13 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, #define tsk_used_math(p) ((p)->flags & PF_USED_MATH) #define used_math() tsk_used_math(current) -/* __GFP_IO isn't allowed if PF_MEMALLOC_NOIO is set in current->flags */ +/* __GFP_IO isn't allowed if PF_MEMALLOC_NOIO is set in current->flags + * __GFP_FS is also cleared as it implies __GFP_IO. + */ static inline gfp_t memalloc_noio_flags(gfp_t flags) { if (unlikely(current->flags & PF_MEMALLOC_NOIO)) - flags &= ~__GFP_IO; + flags &= ~(__GFP_IO | __GFP_FS); return flags; } diff --git a/include/uapi/linux/hyperv.h b/include/uapi/linux/hyperv.h index 78e4a86030dd..0a8e6badb29b 100644 --- a/include/uapi/linux/hyperv.h +++ b/include/uapi/linux/hyperv.h @@ -137,7 +137,7 @@ struct hv_do_fcopy { __u64 offset; __u32 size; __u8 data[DATA_FRAGMENT]; -}; +} __attribute__((packed)); /* * An implementation of HyperV key value pair (KVP) functionality for Linux. diff --git a/kernel/futex.c b/kernel/futex.c index 815d7af2ffe8..f3a3a071283c 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -343,6 +343,8 @@ static void get_futex_key_refs(union futex_key *key) case FUT_OFF_MMSHARED: futex_get_mm(key); /* implies MB (B) */ break; + default: + smp_mb(); /* explicit MB (B) */ } } diff --git a/lib/lzo/lzo1x_decompress_safe.c b/lib/lzo/lzo1x_decompress_safe.c index 8563081e8da3..a1c387f6afba 100644 --- a/lib/lzo/lzo1x_decompress_safe.c +++ b/lib/lzo/lzo1x_decompress_safe.c @@ -19,31 +19,21 @@ #include #include "lzodefs.h" -#define HAVE_IP(t, x) \ - (((size_t)(ip_end - ip) >= (size_t)(t + x)) && \ - (((t + x) >= t) && ((t + x) >= x))) +#define HAVE_IP(x) ((size_t)(ip_end - ip) >= (size_t)(x)) +#define HAVE_OP(x) ((size_t)(op_end - op) >= (size_t)(x)) +#define NEED_IP(x) if (!HAVE_IP(x)) goto input_overrun +#define NEED_OP(x) if (!HAVE_OP(x)) goto output_overrun +#define TEST_LB(m_pos) if ((m_pos) < out) goto lookbehind_overrun -#define HAVE_OP(t, x) \ - (((size_t)(op_end - op) >= (size_t)(t + x)) && \ - (((t + x) >= t) && ((t + x) >= x))) - -#define NEED_IP(t, x) \ - do { \ - if (!HAVE_IP(t, x)) \ - goto input_overrun; \ - } while (0) - -#define NEED_OP(t, x) \ - do { \ - if (!HAVE_OP(t, x)) \ - goto output_overrun; \ - } while (0) - -#define TEST_LB(m_pos) \ - do { \ - if ((m_pos) < out) \ - goto lookbehind_overrun; \ - } while (0) +/* This MAX_255_COUNT is the maximum number of times we can add 255 to a base + * count without overflowing an integer. The multiply will overflow when + * multiplying 255 by more than MAXINT/255. The sum will overflow earlier + * depending on the base count. Since the base count is taken from a u8 + * and a few bits, it is safe to assume that it will always be lower than + * or equal to 2*255, thus we can always prevent any overflow by accepting + * two less 255 steps. See Documentation/lzo.txt for more information. + */ +#define MAX_255_COUNT ((((size_t)~0) / 255) - 2) int lzo1x_decompress_safe(const unsigned char *in, size_t in_len, unsigned char *out, size_t *out_len) @@ -75,17 +65,24 @@ int lzo1x_decompress_safe(const unsigned char *in, size_t in_len, if (t < 16) { if (likely(state == 0)) { if (unlikely(t == 0)) { + size_t offset; + const unsigned char *ip_last = ip; + while (unlikely(*ip == 0)) { - t += 255; ip++; - NEED_IP(1, 0); + NEED_IP(1); } - t += 15 + *ip++; + offset = ip - ip_last; + if (unlikely(offset > MAX_255_COUNT)) + return LZO_E_ERROR; + + offset = (offset << 8) - offset; + t += offset + 15 + *ip++; } t += 3; copy_literal_run: #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) - if (likely(HAVE_IP(t, 15) && HAVE_OP(t, 15))) { + if (likely(HAVE_IP(t + 15) && HAVE_OP(t + 15))) { const unsigned char *ie = ip + t; unsigned char *oe = op + t; do { @@ -101,8 +98,8 @@ copy_literal_run: } else #endif { - NEED_OP(t, 0); - NEED_IP(t, 3); + NEED_OP(t); + NEED_IP(t + 3); do { *op++ = *ip++; } while (--t > 0); @@ -115,7 +112,7 @@ copy_literal_run: m_pos -= t >> 2; m_pos -= *ip++ << 2; TEST_LB(m_pos); - NEED_OP(2, 0); + NEED_OP(2); op[0] = m_pos[0]; op[1] = m_pos[1]; op += 2; @@ -136,13 +133,20 @@ copy_literal_run: } else if (t >= 32) { t = (t & 31) + (3 - 1); if (unlikely(t == 2)) { + size_t offset; + const unsigned char *ip_last = ip; + while (unlikely(*ip == 0)) { - t += 255; ip++; - NEED_IP(1, 0); + NEED_IP(1); } - t += 31 + *ip++; - NEED_IP(2, 0); + offset = ip - ip_last; + if (unlikely(offset > MAX_255_COUNT)) + return LZO_E_ERROR; + + offset = (offset << 8) - offset; + t += offset + 31 + *ip++; + NEED_IP(2); } m_pos = op - 1; next = get_unaligned_le16(ip); @@ -154,13 +158,20 @@ copy_literal_run: m_pos -= (t & 8) << 11; t = (t & 7) + (3 - 1); if (unlikely(t == 2)) { + size_t offset; + const unsigned char *ip_last = ip; + while (unlikely(*ip == 0)) { - t += 255; ip++; - NEED_IP(1, 0); + NEED_IP(1); } - t += 7 + *ip++; - NEED_IP(2, 0); + offset = ip - ip_last; + if (unlikely(offset > MAX_255_COUNT)) + return LZO_E_ERROR; + + offset = (offset << 8) - offset; + t += offset + 7 + *ip++; + NEED_IP(2); } next = get_unaligned_le16(ip); ip += 2; @@ -174,7 +185,7 @@ copy_literal_run: #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) if (op - m_pos >= 8) { unsigned char *oe = op + t; - if (likely(HAVE_OP(t, 15))) { + if (likely(HAVE_OP(t + 15))) { do { COPY8(op, m_pos); op += 8; @@ -184,7 +195,7 @@ copy_literal_run: m_pos += 8; } while (op < oe); op = oe; - if (HAVE_IP(6, 0)) { + if (HAVE_IP(6)) { state = next; COPY4(op, ip); op += next; @@ -192,7 +203,7 @@ copy_literal_run: continue; } } else { - NEED_OP(t, 0); + NEED_OP(t); do { *op++ = *m_pos++; } while (op < oe); @@ -201,7 +212,7 @@ copy_literal_run: #endif { unsigned char *oe = op + t; - NEED_OP(t, 0); + NEED_OP(t); op[0] = m_pos[0]; op[1] = m_pos[1]; op += 2; @@ -214,15 +225,15 @@ match_next: state = next; t = next; #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) - if (likely(HAVE_IP(6, 0) && HAVE_OP(4, 0))) { + if (likely(HAVE_IP(6) && HAVE_OP(4))) { COPY4(op, ip); op += t; ip += t; } else #endif { - NEED_IP(t, 3); - NEED_OP(t, 0); + NEED_IP(t + 3); + NEED_OP(t); while (t > 0) { *op++ = *ip++; t--; diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c index 6e45a5074bf0..52abeeb3cb9d 100644 --- a/mm/balloon_compaction.c +++ b/mm/balloon_compaction.c @@ -93,17 +93,12 @@ struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info) * to be released by the balloon driver. */ if (trylock_page(page)) { + if (!PagePrivate(page)) { + /* raced with isolation */ + unlock_page(page); + continue; + } spin_lock_irqsave(&b_dev_info->pages_lock, flags); - /* - * Raise the page refcount here to prevent any wrong - * attempt to isolate this page, in case of coliding - * with balloon_page_isolate() just after we release - * the page lock. - * - * balloon_page_free() will take care of dropping - * this extra refcount later. - */ - get_page(page); balloon_page_delete(page); spin_unlock_irqrestore(&b_dev_info->pages_lock, flags); unlock_page(page); @@ -187,7 +182,9 @@ static inline void __isolate_balloon_page(struct page *page) { struct balloon_dev_info *b_dev_info = page->mapping->private_data; unsigned long flags; + spin_lock_irqsave(&b_dev_info->pages_lock, flags); + ClearPagePrivate(page); list_del(&page->lru); b_dev_info->isolated_pages++; spin_unlock_irqrestore(&b_dev_info->pages_lock, flags); @@ -197,7 +194,9 @@ static inline void __putback_balloon_page(struct page *page) { struct balloon_dev_info *b_dev_info = page->mapping->private_data; unsigned long flags; + spin_lock_irqsave(&b_dev_info->pages_lock, flags); + SetPagePrivate(page); list_add(&page->lru, &b_dev_info->pages); b_dev_info->isolated_pages--; spin_unlock_irqrestore(&b_dev_info->pages_lock, flags); @@ -235,12 +234,11 @@ bool balloon_page_isolate(struct page *page) */ if (likely(trylock_page(page))) { /* - * A ballooned page, by default, has just one refcount. + * A ballooned page, by default, has PagePrivate set. * Prevent concurrent compaction threads from isolating - * an already isolated balloon page by refcount check. + * an already isolated balloon page by clearing it. */ - if (__is_movable_balloon_page(page) && - page_count(page) == 2) { + if (balloon_page_movable(page)) { __isolate_balloon_page(page); unlock_page(page); return true; diff --git a/mm/cma.c b/mm/cma.c index c17751c0dcaf..0ab564623ea8 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -57,7 +57,9 @@ unsigned long cma_get_size(struct cma *cma) static unsigned long cma_bitmap_aligned_mask(struct cma *cma, int align_order) { - return (1UL << (align_order >> cma->order_per_bit)) - 1; + if (align_order <= cma->order_per_bit) + return 0; + return (1UL << (align_order - cma->order_per_bit)) - 1; } static unsigned long cma_bitmap_maxno(struct cma *cma) diff --git a/mm/compaction.c b/mm/compaction.c index 21bf292b642a..0653f5f73bfa 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -597,7 +597,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, */ if (!PageLRU(page)) { if (unlikely(balloon_page_movable(page))) { - if (locked && balloon_page_isolate(page)) { + if (balloon_page_isolate(page)) { /* Successfully isolated */ goto isolate_success; } diff --git a/mm/migrate.c b/mm/migrate.c index 2740360cd216..01439953abf5 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -876,7 +876,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage, } } - if (unlikely(balloon_page_movable(page))) { + if (unlikely(isolated_balloon_page(page))) { /* * A ballooned page does not need any special attention from * physical to virtual reverse mapping procedures. @@ -955,17 +955,6 @@ static int unmap_and_move(new_page_t get_new_page, free_page_t put_new_page, rc = __unmap_and_move(page, newpage, force, mode); - if (unlikely(rc == MIGRATEPAGE_BALLOON_SUCCESS)) { - /* - * A ballooned page has been migrated already. - * Now, it's the time to wrap-up counters, - * handle the page back to Buddy and return. - */ - dec_zone_page_state(page, NR_ISOLATED_ANON + - page_is_file_cache(page)); - balloon_page_free(page); - return MIGRATEPAGE_SUCCESS; - } out: if (rc != -EAGAIN) { /* @@ -988,6 +977,9 @@ out: if (rc != MIGRATEPAGE_SUCCESS && put_new_page) { ClearPageSwapBacked(newpage); put_new_page(newpage, private); + } else if (unlikely(__is_movable_balloon_page(newpage))) { + /* drop our reference, page already in the balloon */ + put_page(newpage); } else putback_lru_page(newpage); diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index 206b65ccd5b8..075f20d050d6 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -39,6 +39,7 @@ static struct dentry *lowpan_control_debugfs; struct skb_cb { struct in6_addr addr; + struct in6_addr gw; struct l2cap_chan *chan; int status; }; @@ -158,6 +159,54 @@ static inline struct lowpan_peer *peer_lookup_conn(struct lowpan_dev *dev, return NULL; } +static inline struct lowpan_peer *peer_lookup_dst(struct lowpan_dev *dev, + struct in6_addr *daddr, + struct sk_buff *skb) +{ + struct lowpan_peer *peer, *tmp; + struct in6_addr *nexthop; + struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); + int count = atomic_read(&dev->peer_count); + + BT_DBG("peers %d addr %pI6c rt %p", count, daddr, rt); + + /* If we have multiple 6lowpan peers, then check where we should + * send the packet. If only one peer exists, then we can send the + * packet right away. + */ + if (count == 1) + return list_first_entry(&dev->peers, struct lowpan_peer, + list); + + if (!rt) { + nexthop = &lowpan_cb(skb)->gw; + + if (ipv6_addr_any(nexthop)) + return NULL; + } else { + nexthop = rt6_nexthop(rt); + + /* We need to remember the address because it is needed + * by bt_xmit() when sending the packet. In bt_xmit(), the + * destination routing info is not set. + */ + memcpy(&lowpan_cb(skb)->gw, nexthop, sizeof(struct in6_addr)); + } + + BT_DBG("gw %pI6c", nexthop); + + list_for_each_entry_safe(peer, tmp, &dev->peers, list) { + BT_DBG("dst addr %pMR dst type %d ip %pI6c", + &peer->chan->dst, peer->chan->dst_type, + &peer->peer_addr); + + if (!ipv6_addr_cmp(&peer->peer_addr, nexthop)) + return peer; + } + + return NULL; +} + static struct lowpan_peer *lookup_peer(struct l2cap_conn *conn) { struct lowpan_dev *entry, *tmp; @@ -415,8 +464,18 @@ static int header_create(struct sk_buff *skb, struct net_device *netdev, read_unlock_irqrestore(&devices_lock, flags); if (!peer) { - BT_DBG("no such peer %pMR found", &addr); - return -ENOENT; + /* The packet might be sent to 6lowpan interface + * because of routing (either via default route + * or user set route) so get peer according to + * the destination address. + */ + read_lock_irqsave(&devices_lock, flags); + peer = peer_lookup_dst(dev, &hdr->daddr, skb); + read_unlock_irqrestore(&devices_lock, flags); + if (!peer) { + BT_DBG("no such peer %pMR found", &addr); + return -ENOENT; + } } daddr = peer->eui64_addr; @@ -520,6 +579,8 @@ static netdev_tx_t bt_xmit(struct sk_buff *skb, struct net_device *netdev) read_lock_irqsave(&devices_lock, flags); peer = peer_lookup_ba(dev, &addr, addr_type); + if (!peer) + peer = peer_lookup_dst(dev, &lowpan_cb(skb)->addr, skb); read_unlock_irqrestore(&devices_lock, flags); BT_DBG("xmit %s to %pMR type %d IP %pI6c peer %p", @@ -671,6 +732,14 @@ static struct l2cap_chan *chan_open(struct l2cap_chan *pchan) return chan; } +static void set_ip_addr_bits(u8 addr_type, u8 *addr) +{ + if (addr_type == BDADDR_LE_PUBLIC) + *addr |= 0x02; + else + *addr &= ~0x02; +} + static struct l2cap_chan *add_peer_chan(struct l2cap_chan *chan, struct lowpan_dev *dev) { @@ -693,6 +762,11 @@ static struct l2cap_chan *add_peer_chan(struct l2cap_chan *chan, memcpy(&peer->eui64_addr, (u8 *)&peer->peer_addr.s6_addr + 8, EUI64_ADDR_LEN); + /* IPv6 address needs to have the U/L bit set properly so toggle + * it back here. + */ + set_ip_addr_bits(chan->dst_type, (u8 *)&peer->peer_addr.s6_addr + 8); + write_lock_irqsave(&devices_lock, flags); INIT_LIST_HEAD(&peer->list); peer_add(dev, peer); @@ -890,7 +964,7 @@ static void chan_resume_cb(struct l2cap_chan *chan) static long chan_get_sndtimeo_cb(struct l2cap_chan *chan) { - return msecs_to_jiffies(1000); + return L2CAP_CONN_TIMEOUT; } static const struct l2cap_ops bt_6lowpan_chan_ops = { diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 46547b920f88..14ca8ae7cfbe 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -2418,12 +2418,8 @@ static int l2cap_segment_le_sdu(struct l2cap_chan *chan, BT_DBG("chan %p, msg %p, len %zu", chan, msg, len); - pdu_len = chan->conn->mtu - L2CAP_HDR_SIZE; - - pdu_len = min_t(size_t, pdu_len, chan->remote_mps); - sdu_len = len; - pdu_len -= L2CAP_SDULEN_SIZE; + pdu_len = chan->remote_mps - L2CAP_SDULEN_SIZE; while (len > 0) { if (len <= pdu_len) diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index fd3294300803..7f0509e1d3bb 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -442,8 +442,11 @@ static int tk_request(struct l2cap_conn *conn, u8 remote_oob, u8 auth, } /* Not Just Works/Confirm results in MITM Authentication */ - if (method != JUST_CFM) + if (method != JUST_CFM) { set_bit(SMP_FLAG_MITM_AUTH, &smp->flags); + if (hcon->pending_sec_level < BT_SECURITY_HIGH) + hcon->pending_sec_level = BT_SECURITY_HIGH; + } /* If both devices have Keyoard-Display I/O, the master * Confirms and the slave Enters the passkey. diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h index 57da4bd7ba0c..0fb456c20eda 100644 --- a/security/integrity/ima/ima.h +++ b/security/integrity/ima/ima.h @@ -177,7 +177,7 @@ void ima_delete_rules(void); int ima_appraise_measurement(int func, struct integrity_iint_cache *iint, struct file *file, const unsigned char *filename, struct evm_ima_xattr_data *xattr_value, - int xattr_len); + int xattr_len, int opened); int ima_must_appraise(struct inode *inode, int mask, enum ima_hooks func); void ima_update_xattr(struct integrity_iint_cache *iint, struct file *file); enum integrity_status ima_get_cache_status(struct integrity_iint_cache *iint, @@ -193,7 +193,7 @@ static inline int ima_appraise_measurement(int func, struct file *file, const unsigned char *filename, struct evm_ima_xattr_data *xattr_value, - int xattr_len) + int xattr_len, int opened) { return INTEGRITY_UNKNOWN; } diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c index 86bfd5c5df85..225fd944a4ef 100644 --- a/security/integrity/ima/ima_appraise.c +++ b/security/integrity/ima/ima_appraise.c @@ -183,7 +183,7 @@ int ima_read_xattr(struct dentry *dentry, int ima_appraise_measurement(int func, struct integrity_iint_cache *iint, struct file *file, const unsigned char *filename, struct evm_ima_xattr_data *xattr_value, - int xattr_len) + int xattr_len, int opened) { static const char op[] = "appraise_data"; char *cause = "unknown"; @@ -202,8 +202,11 @@ int ima_appraise_measurement(int func, struct integrity_iint_cache *iint, goto out; cause = "missing-hash"; - status = - (inode->i_size == 0) ? INTEGRITY_PASS : INTEGRITY_NOLABEL; + status = INTEGRITY_NOLABEL; + if (opened & FILE_CREATED) { + iint->flags |= IMA_NEW_FILE; + status = INTEGRITY_PASS; + } goto out; } diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c index 0bd732843fe7..f7aac3cf19ae 100644 --- a/security/integrity/ima/ima_crypto.c +++ b/security/integrity/ima/ima_crypto.c @@ -80,19 +80,19 @@ static int ima_kernel_read(struct file *file, loff_t offset, { mm_segment_t old_fs; char __user *buf = addr; - ssize_t ret; + ssize_t ret = -EINVAL; if (!(file->f_mode & FMODE_READ)) return -EBADF; - if (!file->f_op->read && !file->f_op->aio_read) - return -EINVAL; old_fs = get_fs(); set_fs(get_ds()); if (file->f_op->read) ret = file->f_op->read(file, buf, count, &offset); - else + else if (file->f_op->aio_read) ret = do_sync_read(file, buf, count, &offset); + else if (file->f_op->read_iter) + ret = new_sync_read(file, buf, count, &offset); set_fs(old_fs); return ret; } diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index 2917f980bf30..f82cf9b8e92b 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -124,11 +124,13 @@ static void ima_check_last_writer(struct integrity_iint_cache *iint, return; mutex_lock(&inode->i_mutex); - if (atomic_read(&inode->i_writecount) == 1 && - iint->version != inode->i_version) { - iint->flags &= ~IMA_DONE_MASK; - if (iint->flags & IMA_APPRAISE) - ima_update_xattr(iint, file); + if (atomic_read(&inode->i_writecount) == 1) { + if ((iint->version != inode->i_version) || + (iint->flags & IMA_NEW_FILE)) { + iint->flags &= ~(IMA_DONE_MASK | IMA_NEW_FILE); + if (iint->flags & IMA_APPRAISE) + ima_update_xattr(iint, file); + } } mutex_unlock(&inode->i_mutex); } @@ -155,7 +157,7 @@ void ima_file_free(struct file *file) } static int process_measurement(struct file *file, const char *filename, - int mask, int function) + int mask, int function, int opened) { struct inode *inode = file_inode(file); struct integrity_iint_cache *iint; @@ -224,7 +226,7 @@ static int process_measurement(struct file *file, const char *filename, xattr_value, xattr_len); if (action & IMA_APPRAISE_SUBMASK) rc = ima_appraise_measurement(_func, iint, file, pathname, - xattr_value, xattr_len); + xattr_value, xattr_len, opened); if (action & IMA_AUDIT) ima_audit_measurement(iint, pathname); kfree(pathbuf); @@ -253,7 +255,7 @@ out: int ima_file_mmap(struct file *file, unsigned long prot) { if (file && (prot & PROT_EXEC)) - return process_measurement(file, NULL, MAY_EXEC, MMAP_CHECK); + return process_measurement(file, NULL, MAY_EXEC, MMAP_CHECK, 0); return 0; } @@ -275,7 +277,7 @@ int ima_bprm_check(struct linux_binprm *bprm) return process_measurement(bprm->file, (strcmp(bprm->filename, bprm->interp) == 0) ? bprm->filename : bprm->interp, - MAY_EXEC, BPRM_CHECK); + MAY_EXEC, BPRM_CHECK, 0); } /** @@ -288,12 +290,12 @@ int ima_bprm_check(struct linux_binprm *bprm) * On success return 0. On integrity appraisal error, assuming the file * is in policy and IMA-appraisal is in enforcing mode, return -EACCES. */ -int ima_file_check(struct file *file, int mask) +int ima_file_check(struct file *file, int mask, int opened) { ima_rdwr_violation_check(file); return process_measurement(file, NULL, mask & (MAY_READ | MAY_WRITE | MAY_EXEC), - FILE_CHECK); + FILE_CHECK, opened); } EXPORT_SYMBOL_GPL(ima_file_check); @@ -316,7 +318,7 @@ int ima_module_check(struct file *file) #endif return 0; /* We rely on module signature checking */ } - return process_measurement(file, NULL, MAY_EXEC, MODULE_CHECK); + return process_measurement(file, NULL, MAY_EXEC, MODULE_CHECK, 0); } int ima_fw_from_file(struct file *file, char *buf, size_t size) @@ -327,7 +329,7 @@ int ima_fw_from_file(struct file *file, char *buf, size_t size) return -EACCES; /* INTEGRITY_UNKNOWN */ return 0; } - return process_measurement(file, NULL, MAY_EXEC, FIRMWARE_CHECK); + return process_measurement(file, NULL, MAY_EXEC, FIRMWARE_CHECK, 0); } static int __init init_ima(void) diff --git a/security/integrity/integrity.h b/security/integrity/integrity.h index 19b8e314ca96..904e68abd49e 100644 --- a/security/integrity/integrity.h +++ b/security/integrity/integrity.h @@ -31,6 +31,7 @@ #define IMA_DIGSIG 0x01000000 #define IMA_DIGSIG_REQUIRED 0x02000000 #define IMA_PERMIT_DIRECTIO 0x04000000 +#define IMA_NEW_FILE 0x08000000 #define IMA_DO_MASK (IMA_MEASURE | IMA_APPRAISE | IMA_AUDIT | \ IMA_APPRAISE_SUBMASK) diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 8cd2f930ad0b..a95356f45606 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -3193,7 +3193,7 @@ static const struct vm_operations_struct snd_pcm_vm_ops_data_fault = { #ifndef ARCH_HAS_DMA_MMAP_COHERENT /* This should be defined / handled globally! */ -#ifdef CONFIG_ARM +#if defined(CONFIG_ARM) || defined(CONFIG_ARM64) #define ARCH_HAS_DMA_MMAP_COHERENT #endif #endif diff --git a/sound/firewire/bebob/bebob_terratec.c b/sound/firewire/bebob/bebob_terratec.c index eef8ea7d9b97..0e4c0bfc463b 100644 --- a/sound/firewire/bebob/bebob_terratec.c +++ b/sound/firewire/bebob/bebob_terratec.c @@ -17,10 +17,10 @@ phase88_rack_clk_src_get(struct snd_bebob *bebob, unsigned int *id) unsigned int enable_ext, enable_word; int err; - err = avc_audio_get_selector(bebob->unit, 0, 0, &enable_ext); + err = avc_audio_get_selector(bebob->unit, 0, 9, &enable_ext); if (err < 0) goto end; - err = avc_audio_get_selector(bebob->unit, 0, 0, &enable_word); + err = avc_audio_get_selector(bebob->unit, 0, 8, &enable_word); if (err < 0) goto end; diff --git a/sound/pci/emu10k1/emu10k1_callback.c b/sound/pci/emu10k1/emu10k1_callback.c index 3f3ef38d9b6e..874cd76c7b7f 100644 --- a/sound/pci/emu10k1/emu10k1_callback.c +++ b/sound/pci/emu10k1/emu10k1_callback.c @@ -85,6 +85,8 @@ snd_emu10k1_ops_setup(struct snd_emux *emux) * get more voice for pcm * * terminate most inactive voice and give it as a pcm voice. + * + * voice_lock is already held. */ int snd_emu10k1_synth_get_voice(struct snd_emu10k1 *hw) @@ -92,12 +94,10 @@ snd_emu10k1_synth_get_voice(struct snd_emu10k1 *hw) struct snd_emux *emu; struct snd_emux_voice *vp; struct best_voice best[V_END]; - unsigned long flags; int i; emu = hw->synth; - spin_lock_irqsave(&emu->voice_lock, flags); lookup_voices(emu, hw, best, 1); /* no OFF voices */ for (i = 0; i < V_END; i++) { if (best[i].voice >= 0) { @@ -113,11 +113,9 @@ snd_emu10k1_synth_get_voice(struct snd_emu10k1 *hw) vp->emu->num_voices--; vp->ch = -1; vp->state = SNDRV_EMUX_ST_OFF; - spin_unlock_irqrestore(&emu->voice_lock, flags); return ch; } } - spin_unlock_irqrestore(&emu->voice_lock, flags); /* not found */ return -ENOMEM; diff --git a/sound/pci/hda/hda_local.h b/sound/pci/hda/hda_local.h index 364bb413e02a..bb989ab316e8 100644 --- a/sound/pci/hda/hda_local.h +++ b/sound/pci/hda/hda_local.h @@ -425,7 +425,7 @@ struct snd_hda_pin_quirk { .subvendor = _subvendor,\ .name = _name,\ .value = _value,\ - .pins = (const struct hda_pintbl[]) { _pins } \ + .pins = (const struct hda_pintbl[]) { _pins, {0, 0}} \ } #else @@ -433,7 +433,7 @@ struct snd_hda_pin_quirk { { .codec = _codec,\ .subvendor = _subvendor,\ .value = _value,\ - .pins = (const struct hda_pintbl[]) { _pins } \ + .pins = (const struct hda_pintbl[]) { _pins, {0, 0}} \ } #endif diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 99d7d7fecaad..c3658df2359c 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -1577,19 +1577,22 @@ static bool hdmi_present_sense(struct hdmi_spec_per_pin *per_pin, int repoll) } } - if (pin_eld->eld_valid && !eld->eld_valid) { - update_eld = true; + if (pin_eld->eld_valid != eld->eld_valid) eld_changed = true; - } + + if (pin_eld->eld_valid && !eld->eld_valid) + update_eld = true; + if (update_eld) { bool old_eld_valid = pin_eld->eld_valid; pin_eld->eld_valid = eld->eld_valid; - eld_changed = pin_eld->eld_size != eld->eld_size || + if (pin_eld->eld_size != eld->eld_size || memcmp(pin_eld->eld_buffer, eld->eld_buffer, - eld->eld_size) != 0; - if (eld_changed) + eld->eld_size) != 0) { memcpy(pin_eld->eld_buffer, eld->eld_buffer, eld->eld_size); + eld_changed = true; + } pin_eld->eld_size = eld->eld_size; pin_eld->info = eld->info; diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 1ba22fb527c2..b7b293cc710e 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3125,6 +3125,9 @@ static void alc283_shutup(struct hda_codec *codec) alc_write_coef_idx(codec, 0x43, 0x9004); + /*depop hp during suspend*/ + alc_write_coef_idx(codec, 0x06, 0x2100); + snd_hda_codec_write(codec, hp_pin, 0, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE); @@ -5783,9 +5786,9 @@ static void alc662_led_gpio1_mute_hook(void *private_data, int enabled) unsigned int oldval = spec->gpio_led; if (enabled) - spec->gpio_led &= ~0x01; - else spec->gpio_led |= 0x01; + else + spec->gpio_led &= ~0x01; if (spec->gpio_led != oldval) snd_hda_codec_write(codec, 0x01, 0, AC_VERB_SET_GPIO_DATA, spec->gpio_led); diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index 223c47b33ba3..c657752a420c 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -385,6 +385,36 @@ YAMAHA_DEVICE(0x105d, NULL), } }, { + USB_DEVICE(0x0499, 0x1509), + .driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) { + /* .vendor_name = "Yamaha", */ + /* .product_name = "Steinberg UR22", */ + .ifnum = QUIRK_ANY_INTERFACE, + .type = QUIRK_COMPOSITE, + .data = (const struct snd_usb_audio_quirk[]) { + { + .ifnum = 1, + .type = QUIRK_AUDIO_STANDARD_INTERFACE + }, + { + .ifnum = 2, + .type = QUIRK_AUDIO_STANDARD_INTERFACE + }, + { + .ifnum = 3, + .type = QUIRK_MIDI_YAMAHA + }, + { + .ifnum = 4, + .type = QUIRK_IGNORE_INTERFACE + }, + { + .ifnum = -1 + } + } + } +}, +{ USB_DEVICE(0x0499, 0x150a), .driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) { /* .vendor_name = "Yamaha", */ diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 95519bc959ed..6a3f29bd43d7 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -52,6 +52,7 @@ #include #include +#include #include #include @@ -95,8 +96,6 @@ static int hardware_enable_all(void); static void hardware_disable_all(void); static void kvm_io_bus_destroy(struct kvm_io_bus *bus); -static void update_memslots(struct kvm_memslots *slots, - struct kvm_memory_slot *new, u64 last_generation); static void kvm_release_pfn_dirty(pfn_t pfn); static void mark_page_dirty_in_slot(struct kvm *kvm, @@ -476,6 +475,13 @@ static struct kvm *kvm_create_vm(unsigned long type) kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); if (!kvm->memslots) goto out_err_no_srcu; + + /* + * Init kvm generation close to the maximum to easily test the + * code of handling generation number wrap-around. + */ + kvm->memslots->generation = -150; + kvm_init_memslots_id(kvm); if (init_srcu_struct(&kvm->srcu)) goto out_err_no_srcu; @@ -687,8 +693,7 @@ static void sort_memslots(struct kvm_memslots *slots) } static void update_memslots(struct kvm_memslots *slots, - struct kvm_memory_slot *new, - u64 last_generation) + struct kvm_memory_slot *new) { if (new) { int id = new->id; @@ -699,8 +704,6 @@ static void update_memslots(struct kvm_memslots *slots, if (new->npages != npages) sort_memslots(slots); } - - slots->generation = last_generation + 1; } static int check_memory_region_flags(struct kvm_userspace_memory_region *mem) @@ -722,10 +725,24 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm, { struct kvm_memslots *old_memslots = kvm->memslots; - update_memslots(slots, new, kvm->memslots->generation); + /* + * Set the low bit in the generation, which disables SPTE caching + * until the end of synchronize_srcu_expedited. + */ + WARN_ON(old_memslots->generation & 1); + slots->generation = old_memslots->generation + 1; + + update_memslots(slots, new); rcu_assign_pointer(kvm->memslots, slots); synchronize_srcu_expedited(&kvm->srcu); + /* + * Increment the new memslot generation a second time. This prevents + * vm exits that race with memslot updates from caching a memslot + * generation that will (potentially) be valid forever. + */ + slots->generation++; + kvm_arch_memslots_updated(kvm); return old_memslots; @@ -1975,6 +1992,9 @@ static long kvm_vcpu_ioctl(struct file *filp, if (vcpu->kvm->mm != current->mm) return -EIO; + if (unlikely(_IOC_TYPE(ioctl) != KVMIO)) + return -EINVAL; + #if defined(CONFIG_S390) || defined(CONFIG_PPC) || defined(CONFIG_MIPS) /* * Special cases: vcpu ioctls that are asynchronous to vcpu execution,