diff --git a/Documentation/ABI/testing/sysfs-class-devfreq b/Documentation/ABI/testing/sysfs-class-devfreq index 5e6b74f304062..1e7e0bb4c14ec 100644 --- a/Documentation/ABI/testing/sysfs-class-devfreq +++ b/Documentation/ABI/testing/sysfs-class-devfreq @@ -52,6 +52,9 @@ Description: echo 0 > /sys/class/devfreq/.../trans_stat + If the transition table is bigger than PAGE_SIZE, reading + this will return an -EFBIG error. + What: /sys/class/devfreq/.../available_frequencies Date: October 2012 Contact: Nishanth Menon diff --git a/Documentation/admin-guide/abi-obsolete.rst b/Documentation/admin-guide/abi-obsolete.rst index d095867899c59..594e697aa1b2f 100644 --- a/Documentation/admin-guide/abi-obsolete.rst +++ b/Documentation/admin-guide/abi-obsolete.rst @@ -7,5 +7,5 @@ marked to be removed at some later point in time. The description of the interface will document the reason why it is obsolete and when it can be expected to be removed. -.. kernel-abi:: $srctree/Documentation/ABI/obsolete +.. kernel-abi:: ABI/obsolete :rst: diff --git a/Documentation/admin-guide/abi-removed.rst b/Documentation/admin-guide/abi-removed.rst index f7e9e43023c13..f9e000c81828e 100644 --- a/Documentation/admin-guide/abi-removed.rst +++ b/Documentation/admin-guide/abi-removed.rst @@ -1,5 +1,5 @@ ABI removed symbols =================== -.. kernel-abi:: $srctree/Documentation/ABI/removed +.. kernel-abi:: ABI/removed :rst: diff --git a/Documentation/admin-guide/abi-stable.rst b/Documentation/admin-guide/abi-stable.rst index 70490736e0d30..fc3361d847b12 100644 --- a/Documentation/admin-guide/abi-stable.rst +++ b/Documentation/admin-guide/abi-stable.rst @@ -10,5 +10,5 @@ for at least 2 years. Most interfaces (like syscalls) are expected to never change and always be available. -.. kernel-abi:: $srctree/Documentation/ABI/stable +.. kernel-abi:: ABI/stable :rst: diff --git a/Documentation/admin-guide/abi-testing.rst b/Documentation/admin-guide/abi-testing.rst index b205b16a72d08..19767926b3440 100644 --- a/Documentation/admin-guide/abi-testing.rst +++ b/Documentation/admin-guide/abi-testing.rst @@ -16,5 +16,5 @@ Programs that use these interfaces are strongly encouraged to add their name to the description of these interfaces, so that the kernel developers can easily notify them if any changes occur. -.. kernel-abi:: $srctree/Documentation/ABI/testing +.. kernel-abi:: ABI/testing :rst: diff --git a/Documentation/admin-guide/features.rst b/Documentation/admin-guide/features.rst index 8c167082a84f9..7651eca38227d 100644 --- a/Documentation/admin-guide/features.rst +++ b/Documentation/admin-guide/features.rst @@ -1,3 +1,3 @@ .. SPDX-License-Identifier: GPL-2.0 -.. kernel-feat:: $srctree/Documentation/features +.. kernel-feat:: features diff --git a/Documentation/arch/arc/features.rst b/Documentation/arch/arc/features.rst index b793583d688a4..49ff446ff744c 100644 --- a/Documentation/arch/arc/features.rst +++ b/Documentation/arch/arc/features.rst @@ -1,3 +1,3 @@ .. SPDX-License-Identifier: GPL-2.0 -.. kernel-feat:: $srctree/Documentation/features arc +.. kernel-feat:: features arc diff --git a/Documentation/arch/arm/features.rst b/Documentation/arch/arm/features.rst index 7414ec03dd157..0e76aaf68ecab 100644 --- a/Documentation/arch/arm/features.rst +++ b/Documentation/arch/arm/features.rst @@ -1,3 +1,3 @@ .. SPDX-License-Identifier: GPL-2.0 -.. kernel-feat:: $srctree/Documentation/features arm +.. kernel-feat:: features arm diff --git a/Documentation/arch/arm64/features.rst b/Documentation/arch/arm64/features.rst index dfa4cb3cd3efa..03321f4309d0b 100644 --- a/Documentation/arch/arm64/features.rst +++ b/Documentation/arch/arm64/features.rst @@ -1,3 +1,3 @@ .. SPDX-License-Identifier: GPL-2.0 -.. kernel-feat:: $srctree/Documentation/features arm64 +.. kernel-feat:: features arm64 diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst index f47f63bcf67c9..7acd64c61f50c 100644 --- a/Documentation/arch/arm64/silicon-errata.rst +++ b/Documentation/arch/arm64/silicon-errata.rst @@ -71,6 +71,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A510 | #2658417 | ARM64_ERRATUM_2658417 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A510 | #3117295 | ARM64_ERRATUM_3117295 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A520 | #2966298 | ARM64_ERRATUM_2966298 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A53 | #826319 | ARM64_ERRATUM_826319 | diff --git a/Documentation/arch/loongarch/features.rst b/Documentation/arch/loongarch/features.rst index ebacade3ea454..009f44c7951f8 100644 --- a/Documentation/arch/loongarch/features.rst +++ b/Documentation/arch/loongarch/features.rst @@ -1,3 +1,3 @@ .. SPDX-License-Identifier: GPL-2.0 -.. kernel-feat:: $srctree/Documentation/features loongarch +.. kernel-feat:: features loongarch diff --git a/Documentation/arch/m68k/features.rst b/Documentation/arch/m68k/features.rst index 5107a21194724..de7f0ccf7fc8e 100644 --- a/Documentation/arch/m68k/features.rst +++ b/Documentation/arch/m68k/features.rst @@ -1,3 +1,3 @@ .. SPDX-License-Identifier: GPL-2.0 -.. kernel-feat:: $srctree/Documentation/features m68k +.. kernel-feat:: features m68k diff --git a/Documentation/arch/mips/features.rst b/Documentation/arch/mips/features.rst index 1973d729b29a9..6e0ffe3e73540 100644 --- a/Documentation/arch/mips/features.rst +++ b/Documentation/arch/mips/features.rst @@ -1,3 +1,3 @@ .. SPDX-License-Identifier: GPL-2.0 -.. kernel-feat:: $srctree/Documentation/features mips +.. kernel-feat:: features mips diff --git a/Documentation/arch/nios2/features.rst b/Documentation/arch/nios2/features.rst index 8449e63f69b2b..89913810ccb5a 100644 --- a/Documentation/arch/nios2/features.rst +++ b/Documentation/arch/nios2/features.rst @@ -1,3 +1,3 @@ .. SPDX-License-Identifier: GPL-2.0 -.. kernel-feat:: $srctree/Documentation/features nios2 +.. kernel-feat:: features nios2 diff --git a/Documentation/arch/openrisc/features.rst b/Documentation/arch/openrisc/features.rst index 3f7c40d219f2c..bae2e25adfd64 100644 --- a/Documentation/arch/openrisc/features.rst +++ b/Documentation/arch/openrisc/features.rst @@ -1,3 +1,3 @@ .. SPDX-License-Identifier: GPL-2.0 -.. kernel-feat:: $srctree/Documentation/features openrisc +.. kernel-feat:: features openrisc diff --git a/Documentation/arch/parisc/features.rst b/Documentation/arch/parisc/features.rst index 501d7c4500379..b3aa4d243b936 100644 --- a/Documentation/arch/parisc/features.rst +++ b/Documentation/arch/parisc/features.rst @@ -1,3 +1,3 @@ .. SPDX-License-Identifier: GPL-2.0 -.. kernel-feat:: $srctree/Documentation/features parisc +.. kernel-feat:: features parisc diff --git a/Documentation/arch/s390/features.rst b/Documentation/arch/s390/features.rst index 57c296a9d8f30..2883dc9506817 100644 --- a/Documentation/arch/s390/features.rst +++ b/Documentation/arch/s390/features.rst @@ -1,3 +1,3 @@ .. SPDX-License-Identifier: GPL-2.0 -.. kernel-feat:: $srctree/Documentation/features s390 +.. kernel-feat:: features s390 diff --git a/Documentation/arch/sh/features.rst b/Documentation/arch/sh/features.rst index f722af3b6c993..fae48fe81e9bd 100644 --- a/Documentation/arch/sh/features.rst +++ b/Documentation/arch/sh/features.rst @@ -1,3 +1,3 @@ .. SPDX-License-Identifier: GPL-2.0 -.. kernel-feat:: $srctree/Documentation/features sh +.. kernel-feat:: features sh diff --git a/Documentation/arch/sparc/features.rst b/Documentation/arch/sparc/features.rst index c0c92468b0fe9..96835b6d598a1 100644 --- a/Documentation/arch/sparc/features.rst +++ b/Documentation/arch/sparc/features.rst @@ -1,3 +1,3 @@ .. SPDX-License-Identifier: GPL-2.0 -.. kernel-feat:: $srctree/Documentation/features sparc +.. kernel-feat:: features sparc diff --git a/Documentation/arch/x86/features.rst b/Documentation/arch/x86/features.rst index b663f15053ce8..a33616346a388 100644 --- a/Documentation/arch/x86/features.rst +++ b/Documentation/arch/x86/features.rst @@ -1,3 +1,3 @@ .. SPDX-License-Identifier: GPL-2.0 -.. kernel-feat:: $srctree/Documentation/features x86 +.. kernel-feat:: features x86 diff --git a/Documentation/arch/xtensa/features.rst b/Documentation/arch/xtensa/features.rst index 6b92c7bfa19da..28dcce1759be4 100644 --- a/Documentation/arch/xtensa/features.rst +++ b/Documentation/arch/xtensa/features.rst @@ -1,3 +1,3 @@ .. SPDX-License-Identifier: GPL-2.0 -.. kernel-feat:: $srctree/Documentation/features xtensa +.. kernel-feat:: features xtensa diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml index ddf9522a5dc23..5c2769dc689af 100644 --- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml +++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml @@ -394,6 +394,11 @@ properties: When a PFC frame is received with priorities matching the bitmask, the queue is blocked from transmitting for the pause time specified in the PFC frame. + + snps,coe-unsupported: + type: boolean + description: TX checksum offload is unsupported by the TX queue. + allOf: - if: required: diff --git a/Documentation/filesystems/directory-locking.rst b/Documentation/filesystems/directory-locking.rst index dccd61c7c5c3b..193c22687851a 100644 --- a/Documentation/filesystems/directory-locking.rst +++ b/Documentation/filesystems/directory-locking.rst @@ -22,13 +22,16 @@ exclusive. 3) object removal. Locking rules: caller locks parent, finds victim, locks victim and calls the method. Locks are exclusive. -4) rename() that is _not_ cross-directory. Locking rules: caller locks the -parent and finds source and target. We lock both (provided they exist). If we -need to lock two inodes of different type (dir vs non-dir), we lock directory -first. If we need to lock two inodes of the same type, lock them in inode -pointer order. Then call the method. All locks are exclusive. -NB: we might get away with locking the source (and target in exchange -case) shared. +4) rename() that is _not_ cross-directory. Locking rules: caller locks +the parent and finds source and target. Then we decide which of the +source and target need to be locked. Source needs to be locked if it's a +non-directory; target - if it's a non-directory or about to be removed. +Take the locks that need to be taken, in inode pointer order if need +to take both (that can happen only when both source and target are +non-directories - the source because it wouldn't be locked otherwise +and the target because mixing directory and non-directory is allowed +only with RENAME_EXCHANGE, and that won't be removing the target). +After the locks had been taken, call the method. All locks are exclusive. 5) link creation. Locking rules: @@ -44,20 +47,17 @@ rules: * lock the filesystem * lock parents in "ancestors first" order. If one is not ancestor of - the other, lock them in inode pointer order. + the other, lock the parent of source first. * find source and target. * if old parent is equal to or is a descendent of target fail with -ENOTEMPTY * if new parent is equal to or is a descendent of source fail with -ELOOP - * Lock both the source and the target provided they exist. If we - need to lock two inodes of different type (dir vs non-dir), we lock - the directory first. If we need to lock two inodes of the same type, - lock them in inode pointer order. + * Lock subdirectories involved (source before target). + * Lock non-directories involved, in inode pointer order. * call the method. -All ->i_rwsem are taken exclusive. Again, we might get away with locking -the source (and target in exchange case) shared. +All ->i_rwsem are taken exclusive. The rules above obviously guarantee that all directories that are going to be read, modified or removed by method will be locked by caller. @@ -67,6 +67,7 @@ If no directory is its own ancestor, the scheme above is deadlock-free. Proof: +[XXX: will be updated once we are done massaging the lock_rename()] First of all, at any moment we have a linear ordering of the objects - A < B iff (A is an ancestor of B) or (B is not an ancestor of A and ptr(A) < ptr(B)). diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst index 7be2900806c85..bd12f2f850ad3 100644 --- a/Documentation/filesystems/locking.rst +++ b/Documentation/filesystems/locking.rst @@ -101,7 +101,7 @@ symlink: exclusive mkdir: exclusive unlink: exclusive (both) rmdir: exclusive (both)(see below) -rename: exclusive (all) (see below) +rename: exclusive (both parents, some children) (see below) readlink: no get_link: no setattr: exclusive @@ -123,6 +123,9 @@ get_offset_ctx no Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_rwsem exclusive on victim. cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem. + ->unlink() and ->rename() have ->i_rwsem exclusive on all non-directories + involved. + ->rename() has ->i_rwsem exclusive on any subdirectory that changes parent. See Documentation/filesystems/directory-locking.rst for more detailed discussion of the locking scheme for directory operations. diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst index 4d05b9862451e..41d964b48e657 100644 --- a/Documentation/filesystems/porting.rst +++ b/Documentation/filesystems/porting.rst @@ -1045,3 +1045,21 @@ filesystem type is now moved to a later point when the devices are closed: As this is a VFS level change it has no practical consequences for filesystems other than that all of them must use one of the provided kill_litter_super(), kill_anon_super(), or kill_block_super() helpers. + +--- + +**mandatory** + +If ->rename() update of .. on cross-directory move needs an exclusion with +directory modifications, do *not* lock the subdirectory in question in your +->rename() - it's done by the caller now [that item should've been added in +28eceeda130f "fs: Lock moved directories"]. + +--- + +**mandatory** + +On same-directory ->rename() the (tautological) update of .. is not protected +by any locks; just don't do it if the old parent is the same as the new one. +We really can't lock two subdirectories in same-directory rename - not without +deadlocks. diff --git a/Documentation/gpu/drm-kms.rst b/Documentation/gpu/drm-kms.rst index a0c83fc481264..690d2ffe720ef 100644 --- a/Documentation/gpu/drm-kms.rst +++ b/Documentation/gpu/drm-kms.rst @@ -546,6 +546,8 @@ Plane Composition Properties .. kernel-doc:: drivers/gpu/drm/drm_blend.c :doc: overview +.. _damage_tracking_properties: + Damage Tracking Properties -------------------------- diff --git a/Documentation/gpu/todo.rst b/Documentation/gpu/todo.rst index 03fe5d1247be2..85bbe05436098 100644 --- a/Documentation/gpu/todo.rst +++ b/Documentation/gpu/todo.rst @@ -337,8 +337,8 @@ connector register/unregister fixes Level: Intermediate -Remove load/unload callbacks from all non-DRIVER_LEGACY drivers ---------------------------------------------------------------- +Remove load/unload callbacks +---------------------------- The load/unload callbacks in struct &drm_driver are very much midlayers, plus for historical reasons they get the ordering wrong (and we can't fix that) @@ -347,8 +347,7 @@ between setting up the &drm_driver structure and calling drm_dev_register(). - Rework drivers to no longer use the load/unload callbacks, directly coding the load/unload sequence into the driver's probe function. -- Once all non-DRIVER_LEGACY drivers are converted, disallow the load/unload - callbacks for all modern drivers. +- Once all drivers are converted, remove the load/unload callbacks. Contact: Daniel Vetter diff --git a/Documentation/powerpc/features.rst b/Documentation/powerpc/features.rst index aeae73df86b0c..ee4b95e04202d 100644 --- a/Documentation/powerpc/features.rst +++ b/Documentation/powerpc/features.rst @@ -1,3 +1,3 @@ .. SPDX-License-Identifier: GPL-2.0 -.. kernel-feat:: $srctree/Documentation/features powerpc +.. kernel-feat:: features powerpc diff --git a/Documentation/riscv/features.rst b/Documentation/riscv/features.rst index c70ef6ac2368c..36e90144adabd 100644 --- a/Documentation/riscv/features.rst +++ b/Documentation/riscv/features.rst @@ -1,3 +1,3 @@ .. SPDX-License-Identifier: GPL-2.0 -.. kernel-feat:: $srctree/Documentation/features riscv +.. kernel-feat:: features riscv diff --git a/Documentation/sphinx/kernel_abi.py b/Documentation/sphinx/kernel_abi.py index b5feb5b1d9054..6d8a637ad5664 100644 --- a/Documentation/sphinx/kernel_abi.py +++ b/Documentation/sphinx/kernel_abi.py @@ -39,8 +39,6 @@ import sys import re import kernellog -from os import path - from docutils import nodes, statemachine from docutils.statemachine import ViewList from docutils.parsers.rst import directives, Directive @@ -73,60 +71,26 @@ class KernelCmd(Directive): } def run(self): - doc = self.state.document if not doc.settings.file_insertion_enabled: raise self.warning("docutils: file insertion disabled") - env = doc.settings.env - cwd = path.dirname(doc.current_source) - cmd = "get_abi.pl rest --enable-lineno --dir " - cmd += self.arguments[0] - - if 'rst' in self.options: - cmd += " --rst-source" + srctree = os.path.abspath(os.environ["srctree"]) - srctree = path.abspath(os.environ["srctree"]) + args = [ + os.path.join(srctree, 'scripts/get_abi.pl'), + 'rest', + '--enable-lineno', + '--dir', os.path.join(srctree, 'Documentation', self.arguments[0]), + ] - fname = cmd - - # extend PATH with $(srctree)/scripts - path_env = os.pathsep.join([ - srctree + os.sep + "scripts", - os.environ["PATH"] - ]) - shell_env = os.environ.copy() - shell_env["PATH"] = path_env - shell_env["srctree"] = srctree + if 'rst' in self.options: + args.append('--rst-source') - lines = self.runCmd(cmd, shell=True, cwd=cwd, env=shell_env) + lines = subprocess.check_output(args, cwd=os.path.dirname(doc.current_source)).decode('utf-8') nodeList = self.nestedParse(lines, self.arguments[0]) return nodeList - def runCmd(self, cmd, **kwargs): - u"""Run command ``cmd`` and return its stdout as unicode.""" - - try: - proc = subprocess.Popen( - cmd - , stdout = subprocess.PIPE - , stderr = subprocess.PIPE - , **kwargs - ) - out, err = proc.communicate() - - out, err = codecs.decode(out, 'utf-8'), codecs.decode(err, 'utf-8') - - if proc.returncode != 0: - raise self.severe( - u"command '%s' failed with return code %d" - % (cmd, proc.returncode) - ) - except OSError as exc: - raise self.severe(u"problems with '%s' directive: %s." - % (self.name, ErrorString(exc))) - return out - def nestedParse(self, lines, fname): env = self.state.document.settings.env content = ViewList() diff --git a/Documentation/sphinx/kernel_feat.py b/Documentation/sphinx/kernel_feat.py index 27b701ed3681e..bdfaa3e4b202c 100644 --- a/Documentation/sphinx/kernel_feat.py +++ b/Documentation/sphinx/kernel_feat.py @@ -37,8 +37,6 @@ import re import subprocess import sys -from os import path - from docutils import nodes, statemachine from docutils.statemachine import ViewList from docutils.parsers.rst import directives, Directive @@ -76,33 +74,26 @@ class KernelFeat(Directive): self.state.document.settings.env.app.warn(message, prefix="") def run(self): - doc = self.state.document if not doc.settings.file_insertion_enabled: raise self.warning("docutils: file insertion disabled") env = doc.settings.env - cwd = path.dirname(doc.current_source) - cmd = "get_feat.pl rest --enable-fname --dir " - cmd += self.arguments[0] - - if len(self.arguments) > 1: - cmd += " --arch " + self.arguments[1] - srctree = path.abspath(os.environ["srctree"]) + srctree = os.path.abspath(os.environ["srctree"]) - fname = cmd + args = [ + os.path.join(srctree, 'scripts/get_feat.pl'), + 'rest', + '--enable-fname', + '--dir', + os.path.join(srctree, 'Documentation', self.arguments[0]), + ] - # extend PATH with $(srctree)/scripts - path_env = os.pathsep.join([ - srctree + os.sep + "scripts", - os.environ["PATH"] - ]) - shell_env = os.environ.copy() - shell_env["PATH"] = path_env - shell_env["srctree"] = srctree + if len(self.arguments) > 1: + args.extend(['--arch', self.arguments[1]]) - lines = self.runCmd(cmd, shell=True, cwd=cwd, env=shell_env) + lines = subprocess.check_output(args, cwd=os.path.dirname(doc.current_source)).decode('utf-8') line_regex = re.compile("^\.\. FILE (\S+)$") @@ -121,30 +112,6 @@ class KernelFeat(Directive): nodeList = self.nestedParse(out_lines, fname) return nodeList - def runCmd(self, cmd, **kwargs): - u"""Run command ``cmd`` and return its stdout as unicode.""" - - try: - proc = subprocess.Popen( - cmd - , stdout = subprocess.PIPE - , stderr = subprocess.PIPE - , **kwargs - ) - out, err = proc.communicate() - - out, err = codecs.decode(out, 'utf-8'), codecs.decode(err, 'utf-8') - - if proc.returncode != 0: - raise self.severe( - u"command '%s' failed with return code %d" - % (cmd, proc.returncode) - ) - except OSError as exc: - raise self.severe(u"problems with '%s' directive: %s." - % (self.name, ErrorString(exc))) - return out - def nestedParse(self, lines, fname): content = ViewList() node = nodes.section() diff --git a/Documentation/translations/zh_CN/arch/loongarch/features.rst b/Documentation/translations/zh_CN/arch/loongarch/features.rst index 82bfac180bdc0..cec38dda8298c 100644 --- a/Documentation/translations/zh_CN/arch/loongarch/features.rst +++ b/Documentation/translations/zh_CN/arch/loongarch/features.rst @@ -5,4 +5,4 @@ :Original: Documentation/arch/loongarch/features.rst :Translator: Huacai Chen -.. kernel-feat:: $srctree/Documentation/features loongarch +.. kernel-feat:: features loongarch diff --git a/Documentation/translations/zh_CN/arch/mips/features.rst b/Documentation/translations/zh_CN/arch/mips/features.rst index da1b956e4a40f..0d6df97db069b 100644 --- a/Documentation/translations/zh_CN/arch/mips/features.rst +++ b/Documentation/translations/zh_CN/arch/mips/features.rst @@ -10,4 +10,4 @@ .. _cn_features: -.. kernel-feat:: $srctree/Documentation/features mips +.. kernel-feat:: features mips diff --git a/Documentation/translations/zh_TW/dev-tools/index.rst b/Documentation/translations/zh_TW/dev-tools/index.rst new file mode 100644 index 0000000000000..8f101db5a07ff --- /dev/null +++ b/Documentation/translations/zh_TW/dev-tools/index.rst @@ -0,0 +1,40 @@ +.. include:: ../disclaimer-zh_TW.rst + +:Original: Documentation/dev-tools/index.rst +:Translator: Min-Hua Chen + +============ +內核開發工具 +============ + +本文檔是有關內核開發工具文檔的合集。 +目前這些文檔已經整理在一起,不需要再花費額外的精力。 +歡迎任何補丁。 + +有關測試專用工具的簡要概述,參見 +Documentation/dev-tools/testing-overview.rst + +.. class:: toc-title + + 目錄 + +.. toctree:: + :maxdepth: 2 + + sparse + +Todolist: + + - coccinelle + - kcov + - ubsan + - kmemleak + - kcsan + - kfence + - kgdb + - kselftest + - kunit/index + - testing-overview + - gcov + - kasan + - gdb-kernel-debugging diff --git a/Documentation/translations/zh_TW/dev-tools/sparse.txt b/Documentation/translations/zh_TW/dev-tools/sparse.txt new file mode 100644 index 0000000000000..35d3d1d748e6f --- /dev/null +++ b/Documentation/translations/zh_TW/dev-tools/sparse.txt @@ -0,0 +1,91 @@ +Chinese translated version of Documentation/dev-tools/sparse.rst + +If you have any comment or update to the content, please contact the +original document maintainer directly. However, if you have a problem +communicating in English you can also ask the Chinese maintainer for +help. Contact the Chinese maintainer if this translation is outdated +or if there is a problem with the translation. + +Traditional Chinese maintainer: Hu Haowen +--------------------------------------------------------------------- +Documentation/dev-tools/sparse.rst 的繁體中文翻譯 + +如果想評論或更新本文的內容,請直接聯繫原文檔的維護者。如果你使用英文 +交流有困難的話,也可以向繁體中文版維護者求助。如果本翻譯更新不及時或 +者翻譯存在問題,請聯繫繁體中文版維護者。 + +繁體中文版維護者: 胡皓文 Hu Haowen +繁體中文版翻譯者: 胡皓文 Hu Haowen + +以下爲正文 +--------------------------------------------------------------------- + +Copyright 2004 Linus Torvalds +Copyright 2004 Pavel Machek +Copyright 2006 Bob Copeland + +使用 sparse 工具做類型檢查 +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +"__bitwise" 是一種類型屬性,所以你應該這樣使用它: + + typedef int __bitwise pm_request_t; + + enum pm_request { + PM_SUSPEND = (__force pm_request_t) 1, + PM_RESUME = (__force pm_request_t) 2 + }; + +這樣會使 PM_SUSPEND 和 PM_RESUME 成爲位方式(bitwise)整數(使用"__force" +是因爲 sparse 會抱怨改變位方式的類型轉換,但是這裡我們確實需要強制進行轉 +換)。而且因爲所有枚舉值都使用了相同的類型,這裡的"enum pm_request"也將 +會使用那個類型做爲底層實現。 + +而且使用 gcc 編譯的時候,所有的 __bitwise/__force 都會消失,最後在 gcc +看來它們只不過是普通的整數。 + +坦白來說,你並不需要使用枚舉類型。上面那些實際都可以濃縮成一個特殊的"int +__bitwise"類型。 + +所以更簡單的辦法只要這樣做: + + typedef int __bitwise pm_request_t; + + #define PM_SUSPEND ((__force pm_request_t) 1) + #define PM_RESUME ((__force pm_request_t) 2) + +現在你就有了嚴格的類型檢查所需要的所有基礎架構。 + +一個小提醒:常數整數"0"是特殊的。你可以直接把常數零當作位方式整數使用而 +不用擔心 sparse 會抱怨。這是因爲"bitwise"(恰如其名)是用來確保不同位方 +式類型不會被弄混(小尾模式,大尾模式,cpu尾模式,或者其他),對他們來說 +常數"0"確實是特殊的。 + +獲取 sparse 工具 +~~~~~~~~~~~~~~~~ + +你可以從 Sparse 的主頁獲取最新的發布版本: + + https://www.kernel.org/pub/software/devel/sparse/dist/ + +或者,你也可以使用 git 克隆最新的 sparse 開發版本: + + git://git.kernel.org/pub/scm/devel/sparse/sparse.git + +一旦你下載了源碼,只要以普通用戶身份運行: + + make + make install + +它將會被自動安裝到你的 ~/bin 目錄下。 + +使用 sparse 工具 +~~~~~~~~~~~~~~~~ + +用"make C=1"命令來編譯內核,會對所有重新編譯的 C 文件使用 sparse 工具。 +或者使用"make C=2"命令,無論文件是否被重新編譯都會對其使用 sparse 工具。 +如果你已經編譯了內核,用後一種方式可以很快地檢查整個源碼樹。 + +make 的可選變量 CHECKFLAGS 可以用來向 sparse 工具傳遞參數。編譯系統會自 +動向 sparse 工具傳遞 -Wbitwise 參數。 + diff --git a/Documentation/translations/zh_TW/index.rst b/Documentation/translations/zh_TW/index.rst index d1cf0b4d8e46d..ffcaf3272fe70 100644 --- a/Documentation/translations/zh_TW/index.rst +++ b/Documentation/translations/zh_TW/index.rst @@ -55,11 +55,11 @@ TODOList: :maxdepth: 1 process/license-rules + dev-tools/index TODOList: * doc-guide/index -* dev-tools/index * dev-tools/testing-overview * kernel-hacking/index * rust/index diff --git a/Documentation/translations/zh_TW/sparse.txt b/Documentation/translations/zh_TW/sparse.txt deleted file mode 100644 index 35d3d1d748e6f..0000000000000 --- a/Documentation/translations/zh_TW/sparse.txt +++ /dev/null @@ -1,91 +0,0 @@ -Chinese translated version of Documentation/dev-tools/sparse.rst - -If you have any comment or update to the content, please contact the -original document maintainer directly. However, if you have a problem -communicating in English you can also ask the Chinese maintainer for -help. Contact the Chinese maintainer if this translation is outdated -or if there is a problem with the translation. - -Traditional Chinese maintainer: Hu Haowen ---------------------------------------------------------------------- -Documentation/dev-tools/sparse.rst 的繁體中文翻譯 - -如果想評論或更新本文的內容,請直接聯繫原文檔的維護者。如果你使用英文 -交流有困難的話,也可以向繁體中文版維護者求助。如果本翻譯更新不及時或 -者翻譯存在問題,請聯繫繁體中文版維護者。 - -繁體中文版維護者: 胡皓文 Hu Haowen -繁體中文版翻譯者: 胡皓文 Hu Haowen - -以下爲正文 ---------------------------------------------------------------------- - -Copyright 2004 Linus Torvalds -Copyright 2004 Pavel Machek -Copyright 2006 Bob Copeland - -使用 sparse 工具做類型檢查 -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -"__bitwise" 是一種類型屬性,所以你應該這樣使用它: - - typedef int __bitwise pm_request_t; - - enum pm_request { - PM_SUSPEND = (__force pm_request_t) 1, - PM_RESUME = (__force pm_request_t) 2 - }; - -這樣會使 PM_SUSPEND 和 PM_RESUME 成爲位方式(bitwise)整數(使用"__force" -是因爲 sparse 會抱怨改變位方式的類型轉換,但是這裡我們確實需要強制進行轉 -換)。而且因爲所有枚舉值都使用了相同的類型,這裡的"enum pm_request"也將 -會使用那個類型做爲底層實現。 - -而且使用 gcc 編譯的時候,所有的 __bitwise/__force 都會消失,最後在 gcc -看來它們只不過是普通的整數。 - -坦白來說,你並不需要使用枚舉類型。上面那些實際都可以濃縮成一個特殊的"int -__bitwise"類型。 - -所以更簡單的辦法只要這樣做: - - typedef int __bitwise pm_request_t; - - #define PM_SUSPEND ((__force pm_request_t) 1) - #define PM_RESUME ((__force pm_request_t) 2) - -現在你就有了嚴格的類型檢查所需要的所有基礎架構。 - -一個小提醒:常數整數"0"是特殊的。你可以直接把常數零當作位方式整數使用而 -不用擔心 sparse 會抱怨。這是因爲"bitwise"(恰如其名)是用來確保不同位方 -式類型不會被弄混(小尾模式,大尾模式,cpu尾模式,或者其他),對他們來說 -常數"0"確實是特殊的。 - -獲取 sparse 工具 -~~~~~~~~~~~~~~~~ - -你可以從 Sparse 的主頁獲取最新的發布版本: - - https://www.kernel.org/pub/software/devel/sparse/dist/ - -或者,你也可以使用 git 克隆最新的 sparse 開發版本: - - git://git.kernel.org/pub/scm/devel/sparse/sparse.git - -一旦你下載了源碼,只要以普通用戶身份運行: - - make - make install - -它將會被自動安裝到你的 ~/bin 目錄下。 - -使用 sparse 工具 -~~~~~~~~~~~~~~~~ - -用"make C=1"命令來編譯內核,會對所有重新編譯的 C 文件使用 sparse 工具。 -或者使用"make C=2"命令,無論文件是否被重新編譯都會對其使用 sparse 工具。 -如果你已經編譯了內核,用後一種方式可以很快地檢查整個源碼樹。 - -make 的可選變量 CHECKFLAGS 可以用來向 sparse 工具傳遞參數。編譯系統會自 -動向 sparse 工具傳遞 -Wbitwise 參數。 - diff --git a/Makefile b/Makefile index bad16eda67e2e..59db5f9c6f494 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 6 -SUBLEVEL = 14 +SUBLEVEL = 15 EXTRAVERSION = NAME = Hurr durr I'ma ninja sloth diff --git a/arch/alpha/kernel/rtc.c b/arch/alpha/kernel/rtc.c index fb3025396ac96..cfdf90bc8b3f8 100644 --- a/arch/alpha/kernel/rtc.c +++ b/arch/alpha/kernel/rtc.c @@ -80,7 +80,7 @@ init_rtc_epoch(void) static int alpha_rtc_read_time(struct device *dev, struct rtc_time *tm) { - int ret = mc146818_get_time(tm); + int ret = mc146818_get_time(tm, 10); if (ret < 0) { dev_err_ratelimited(dev, "unable to read current time\n"); diff --git a/arch/arm/boot/dts/nxp/imx/imx6q-apalis-ixora-v1.2.dts b/arch/arm/boot/dts/nxp/imx/imx6q-apalis-ixora-v1.2.dts index 717decda0cebd..3ac7a45016205 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6q-apalis-ixora-v1.2.dts +++ b/arch/arm/boot/dts/nxp/imx/imx6q-apalis-ixora-v1.2.dts @@ -76,6 +76,7 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enable_can1_power>; regulator-name = "can1_supply"; + startup-delay-us = <1000>; }; reg_can2_supply: regulator-can2-supply { @@ -85,6 +86,7 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enable_can2_power>; regulator-name = "can2_supply"; + startup-delay-us = <1000>; }; }; diff --git a/arch/arm/boot/dts/qcom/qcom-sdx55.dtsi b/arch/arm/boot/dts/qcom/qcom-sdx55.dtsi index 55ce87b752539..5b86b4de1a1bd 100644 --- a/arch/arm/boot/dts/qcom/qcom-sdx55.dtsi +++ b/arch/arm/boot/dts/qcom/qcom-sdx55.dtsi @@ -592,10 +592,10 @@ <&gcc GCC_USB30_MASTER_CLK>; assigned-clock-rates = <19200000>, <200000000>; - interrupts = , - , - , - ; + interrupts-extended = <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>, + <&pdc 51 IRQ_TYPE_LEVEL_HIGH>, + <&pdc 11 IRQ_TYPE_EDGE_BOTH>, + <&pdc 10 IRQ_TYPE_EDGE_BOTH>; interrupt-names = "hs_phy_irq", "ss_phy_irq", "dm_hs_phy_irq", "dp_hs_phy_irq"; @@ -619,7 +619,7 @@ compatible = "qcom,sdx55-pdc", "qcom,pdc"; reg = <0x0b210000 0x30000>; qcom,pdc-ranges = <0 179 52>; - #interrupt-cells = <3>; + #interrupt-cells = <2>; interrupt-parent = <&intc>; interrupt-controller; }; diff --git a/arch/arm/boot/dts/samsung/exynos4210-i9100.dts b/arch/arm/boot/dts/samsung/exynos4210-i9100.dts index a9ec1f6c1dea1..a076a1dfe41f8 100644 --- a/arch/arm/boot/dts/samsung/exynos4210-i9100.dts +++ b/arch/arm/boot/dts/samsung/exynos4210-i9100.dts @@ -527,6 +527,14 @@ regulator-name = "VT_CAM_1.8V"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; + + /* + * Force-enable this regulator; otherwise the + * kernel hangs very early in the boot process + * for about 12 seconds, without apparent + * reason. + */ + regulator-always-on; }; vcclcd_reg: LDO13 { diff --git a/arch/arm/boot/dts/samsung/exynos4212-tab3.dtsi b/arch/arm/boot/dts/samsung/exynos4212-tab3.dtsi index ce81e42bf5eb3..39469b708f910 100644 --- a/arch/arm/boot/dts/samsung/exynos4212-tab3.dtsi +++ b/arch/arm/boot/dts/samsung/exynos4212-tab3.dtsi @@ -435,6 +435,7 @@ }; &fimd { + samsung,invert-vclk; status = "okay"; }; diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 6062a52a084ff..b5df38c2a0067 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1037,8 +1037,12 @@ config ARM64_ERRATUM_2645198 If unsure, say Y. +config ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD + bool + config ARM64_ERRATUM_2966298 bool "Cortex-A520: 2966298: workaround for speculatively executed unprivileged load" + select ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD default y help This option adds the workaround for ARM Cortex-A520 erratum 2966298. @@ -1050,6 +1054,20 @@ config ARM64_ERRATUM_2966298 If unsure, say Y. +config ARM64_ERRATUM_3117295 + bool "Cortex-A510: 3117295: workaround for speculatively executed unprivileged load" + select ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD + default y + help + This option adds the workaround for ARM Cortex-A510 erratum 3117295. + + On an affected Cortex-A510 core, a speculatively executed unprivileged + load might leak data from a privileged level via a cache side channel. + + Work around this problem by executing a TLBI before returning to EL0. + + If unsure, say Y. + config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y diff --git a/arch/arm64/boot/dts/qcom/msm8916-longcheer-l8150.dts b/arch/arm64/boot/dts/qcom/msm8916-longcheer-l8150.dts index 3892ad4f639a8..4efc534b1d6e7 100644 --- a/arch/arm64/boot/dts/qcom/msm8916-longcheer-l8150.dts +++ b/arch/arm64/boot/dts/qcom/msm8916-longcheer-l8150.dts @@ -88,6 +88,7 @@ #size-cells = <0>; vcc-supply = <&pm8916_l17>; + vio-supply = <&pm8916_l6>; led@0 { reg = <0>; diff --git a/arch/arm64/boot/dts/qcom/msm8916-wingtech-wt88047.dts b/arch/arm64/boot/dts/qcom/msm8916-wingtech-wt88047.dts index 8e238976ab1ce..43078b890d860 100644 --- a/arch/arm64/boot/dts/qcom/msm8916-wingtech-wt88047.dts +++ b/arch/arm64/boot/dts/qcom/msm8916-wingtech-wt88047.dts @@ -118,6 +118,7 @@ #size-cells = <0>; vcc-supply = <&pm8916_l16>; + vio-supply = <&pm8916_l5>; led@0 { reg = <0>; diff --git a/arch/arm64/boot/dts/qcom/msm8916.dtsi b/arch/arm64/boot/dts/qcom/msm8916.dtsi index 3c934363368c3..961ceb83a91fa 100644 --- a/arch/arm64/boot/dts/qcom/msm8916.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8916.dtsi @@ -2085,6 +2085,7 @@ clock-names = "bam_clk"; #dma-cells = <1>; qcom,ee = <0>; + qcom,controlled-remotely; }; blsp_uart1: serial@78af000 { diff --git a/arch/arm64/boot/dts/qcom/msm8939.dtsi b/arch/arm64/boot/dts/qcom/msm8939.dtsi index 5a6b1942cfaa5..3fd64cafe99c5 100644 --- a/arch/arm64/boot/dts/qcom/msm8939.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8939.dtsi @@ -1661,6 +1661,7 @@ clock-names = "bam_clk"; #dma-cells = <1>; qcom,ee = <0>; + qcom,controlled-remotely; }; blsp_uart1: serial@78af000 { diff --git a/arch/arm64/boot/dts/qcom/msm8953-xiaomi-mido.dts b/arch/arm64/boot/dts/qcom/msm8953-xiaomi-mido.dts index ed95d09cedb1e..6b9245cd8b0c3 100644 --- a/arch/arm64/boot/dts/qcom/msm8953-xiaomi-mido.dts +++ b/arch/arm64/boot/dts/qcom/msm8953-xiaomi-mido.dts @@ -111,6 +111,7 @@ reg = <0x45>; vcc-supply = <&pm8953_l10>; + vio-supply = <&pm8953_l5>; #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/qcom/msm8953-xiaomi-tissot.dts b/arch/arm64/boot/dts/qcom/msm8953-xiaomi-tissot.dts index 61ff629c9bf34..9ac4f507e321a 100644 --- a/arch/arm64/boot/dts/qcom/msm8953-xiaomi-tissot.dts +++ b/arch/arm64/boot/dts/qcom/msm8953-xiaomi-tissot.dts @@ -104,6 +104,7 @@ reg = <0x45>; vcc-supply = <&pm8953_l10>; + vio-supply = <&pm8953_l5>; #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/qcom/msm8953-xiaomi-vince.dts b/arch/arm64/boot/dts/qcom/msm8953-xiaomi-vince.dts index 1a1d3f92a5116..b0588f30f8f1a 100644 --- a/arch/arm64/boot/dts/qcom/msm8953-xiaomi-vince.dts +++ b/arch/arm64/boot/dts/qcom/msm8953-xiaomi-vince.dts @@ -113,6 +113,7 @@ reg = <0x45>; vcc-supply = <&pm8953_l10>; + vio-supply = <&pm8953_l5>; #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi b/arch/arm64/boot/dts/qcom/sc7180.dtsi index 589ae40cd3cc9..f7c528ecb224b 100644 --- a/arch/arm64/boot/dts/qcom/sc7180.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi @@ -2978,8 +2978,8 @@ interrupts-extended = <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>, <&pdc 6 IRQ_TYPE_LEVEL_HIGH>, - <&pdc 8 IRQ_TYPE_LEVEL_HIGH>, - <&pdc 9 IRQ_TYPE_LEVEL_HIGH>; + <&pdc 8 IRQ_TYPE_EDGE_BOTH>, + <&pdc 9 IRQ_TYPE_EDGE_BOTH>; interrupt-names = "hs_phy_irq", "ss_phy_irq", "dm_hs_phy_irq", "dp_hs_phy_irq"; diff --git a/arch/arm64/boot/dts/qcom/sc7280.dtsi b/arch/arm64/boot/dts/qcom/sc7280.dtsi index ec5c36425a225..8e330d2e2e224 100644 --- a/arch/arm64/boot/dts/qcom/sc7280.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7280.dtsi @@ -3668,9 +3668,9 @@ assigned-clock-rates = <19200000>, <200000000>; interrupts-extended = <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>, - <&pdc 14 IRQ_TYPE_LEVEL_HIGH>, + <&pdc 14 IRQ_TYPE_EDGE_BOTH>, <&pdc 15 IRQ_TYPE_EDGE_BOTH>, - <&pdc 17 IRQ_TYPE_EDGE_BOTH>; + <&pdc 17 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "hs_phy_irq", "dp_hs_phy_irq", "dm_hs_phy_irq", diff --git a/arch/arm64/boot/dts/qcom/sc8180x.dtsi b/arch/arm64/boot/dts/qcom/sc8180x.dtsi index f4381424e70a2..f9e929bfa5508 100644 --- a/arch/arm64/boot/dts/qcom/sc8180x.dtsi +++ b/arch/arm64/boot/dts/qcom/sc8180x.dtsi @@ -2560,10 +2560,10 @@ usb_prim: usb@a6f8800 { compatible = "qcom,sc8180x-dwc3", "qcom,dwc3"; reg = <0 0x0a6f8800 0 0x400>; - interrupts = , - , - , - ; + interrupts-extended = <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>, + <&pdc 6 IRQ_TYPE_LEVEL_HIGH>, + <&pdc 8 IRQ_TYPE_EDGE_BOTH>, + <&pdc 9 IRQ_TYPE_EDGE_BOTH>; interrupt-names = "hs_phy_irq", "ss_phy_irq", "dm_hs_phy_irq", @@ -2634,10 +2634,10 @@ "xo"; resets = <&gcc GCC_USB30_SEC_BCR>; power-domains = <&gcc USB30_SEC_GDSC>; - interrupts = , - , - , - ; + interrupts-extended = <&intc GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>, + <&pdc 7 IRQ_TYPE_LEVEL_HIGH>, + <&pdc 10 IRQ_TYPE_EDGE_BOTH>, + <&pdc 11 IRQ_TYPE_EDGE_BOTH>; interrupt-names = "hs_phy_irq", "ss_phy_irq", "dm_hs_phy_irq", "dp_hs_phy_irq"; diff --git a/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts b/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts index e4861c61a65bd..ffc4406422ae2 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts +++ b/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts @@ -458,6 +458,8 @@ }; &mdss0_dp3_phy { + compatible = "qcom,sc8280xp-edp-phy"; + vdda-phy-supply = <&vreg_l6b>; vdda-pll-supply = <&vreg_l3b>; diff --git a/arch/arm64/boot/dts/qcom/sdm670.dtsi b/arch/arm64/boot/dts/qcom/sdm670.dtsi index ba2043d67370a..730c8351bcaa3 100644 --- a/arch/arm64/boot/dts/qcom/sdm670.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm670.dtsi @@ -1295,10 +1295,10 @@ <&gcc GCC_USB30_PRIM_MASTER_CLK>; assigned-clock-rates = <19200000>, <150000000>; - interrupts = , - , - , - ; + interrupts-extended = <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>, + <&pdc 6 IRQ_TYPE_LEVEL_HIGH>, + <&pdc 8 IRQ_TYPE_EDGE_BOTH>, + <&pdc 9 IRQ_TYPE_EDGE_BOTH>; interrupt-names = "hs_phy_irq", "ss_phy_irq", "dm_hs_phy_irq", "dp_hs_phy_irq"; diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi index 2cf1993a8190f..63f6515692e8c 100644 --- a/arch/arm64/boot/dts/qcom/sdm845.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi @@ -4084,10 +4084,10 @@ <&gcc GCC_USB30_PRIM_MASTER_CLK>; assigned-clock-rates = <19200000>, <150000000>; - interrupts = , - , - , - ; + interrupts-extended = <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>, + <&pdc_intc 6 IRQ_TYPE_LEVEL_HIGH>, + <&pdc_intc 8 IRQ_TYPE_EDGE_BOTH>, + <&pdc_intc 9 IRQ_TYPE_EDGE_BOTH>; interrupt-names = "hs_phy_irq", "ss_phy_irq", "dm_hs_phy_irq", "dp_hs_phy_irq"; @@ -4135,10 +4135,10 @@ <&gcc GCC_USB30_SEC_MASTER_CLK>; assigned-clock-rates = <19200000>, <150000000>; - interrupts = , - , - , - ; + interrupts-extended = <&intc GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>, + <&pdc_intc 7 IRQ_TYPE_LEVEL_HIGH>, + <&pdc_intc 10 IRQ_TYPE_EDGE_BOTH>, + <&pdc_intc 11 IRQ_TYPE_EDGE_BOTH>; interrupt-names = "hs_phy_irq", "ss_phy_irq", "dm_hs_phy_irq", "dp_hs_phy_irq"; diff --git a/arch/arm64/boot/dts/qcom/sm8150.dtsi b/arch/arm64/boot/dts/qcom/sm8150.dtsi index 32045fb481575..f7e35e2200182 100644 --- a/arch/arm64/boot/dts/qcom/sm8150.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8150.dtsi @@ -3592,10 +3592,10 @@ <&gcc GCC_USB30_PRIM_MASTER_CLK>; assigned-clock-rates = <19200000>, <200000000>; - interrupts = , - , - , - ; + interrupts-extended = <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>, + <&pdc 6 IRQ_TYPE_LEVEL_HIGH>, + <&pdc 8 IRQ_TYPE_EDGE_BOTH>, + <&pdc 9 IRQ_TYPE_EDGE_BOTH>; interrupt-names = "hs_phy_irq", "ss_phy_irq", "dm_hs_phy_irq", "dp_hs_phy_irq"; @@ -3645,10 +3645,10 @@ <&gcc GCC_USB30_SEC_MASTER_CLK>; assigned-clock-rates = <19200000>, <200000000>; - interrupts = , - , - , - ; + interrupts-extended = <&intc GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>, + <&pdc 7 IRQ_TYPE_LEVEL_HIGH>, + <&pdc 10 IRQ_TYPE_EDGE_BOTH>, + <&pdc 11 IRQ_TYPE_EDGE_BOTH>; interrupt-names = "hs_phy_irq", "ss_phy_irq", "dm_hs_phy_irq", "dp_hs_phy_irq"; diff --git a/arch/arm64/boot/dts/rockchip/rk3328-orangepi-r1-plus-lts.dts b/arch/arm64/boot/dts/rockchip/rk3328-orangepi-r1-plus-lts.dts index 5d7d567283e52..4237f2ee8fee3 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328-orangepi-r1-plus-lts.dts +++ b/arch/arm64/boot/dts/rockchip/rk3328-orangepi-r1-plus-lts.dts @@ -26,9 +26,11 @@ compatible = "ethernet-phy-ieee802.3-c22"; reg = <0>; + motorcomm,auto-sleep-disabled; motorcomm,clk-out-frequency-hz = <125000000>; motorcomm,keep-pll-enabled; - motorcomm,auto-sleep-disabled; + motorcomm,rx-clk-drv-microamp = <5020>; + motorcomm,rx-data-drv-microamp = <5020>; pinctrl-0 = <ð_phy_reset_pin>; pinctrl-names = "default"; diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi index 5544f66c6ff41..4aa516ff156df 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi @@ -890,6 +890,7 @@ reg = ; clocks = <&cru PCLK_PHP_ROOT>, <&cru ACLK_USB_ROOT>, + <&cru ACLK_USB>, <&cru HCLK_USB_ROOT>, <&cru HCLK_HOST0>, <&cru HCLK_HOST_ARB0>, diff --git a/arch/arm64/boot/dts/sprd/ums512.dtsi b/arch/arm64/boot/dts/sprd/ums512.dtsi index 024be594c47d1..97ac550af2f11 100644 --- a/arch/arm64/boot/dts/sprd/ums512.dtsi +++ b/arch/arm64/boot/dts/sprd/ums512.dtsi @@ -96,7 +96,7 @@ CPU6: cpu@600 { device_type = "cpu"; - compatible = "arm,cortex-a55"; + compatible = "arm,cortex-a75"; reg = <0x0 0x600>; enable-method = "psci"; cpu-idle-states = <&CORE_PD>; @@ -104,7 +104,7 @@ CPU7: cpu@700 { device_type = "cpu"; - compatible = "arm,cortex-a55"; + compatible = "arm,cortex-a75"; reg = <0x0 0x700>; enable-method = "psci"; cpu-idle-states = <&CORE_PD>; diff --git a/arch/arm64/boot/install.sh b/arch/arm64/boot/install.sh index 7399d706967a4..9b7a09808a3dd 100755 --- a/arch/arm64/boot/install.sh +++ b/arch/arm64/boot/install.sh @@ -17,7 +17,8 @@ # $3 - kernel map file # $4 - default install path (blank if root directory) -if [ "$(basename $2)" = "Image.gz" ]; then +if [ "$(basename $2)" = "Image.gz" ] || [ "$(basename $2)" = "vmlinuz.efi" ] +then # Compressed install echo "Installing compressed kernel" base=vmlinuz diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 5706e74c55786..87787a012bea8 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -432,6 +432,19 @@ static struct midr_range broken_aarch32_aes[] = { }; #endif /* CONFIG_ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE */ +#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD +static const struct midr_range erratum_spec_unpriv_load_list[] = { +#ifdef CONFIG_ARM64_ERRATUM_3117295 + MIDR_ALL_VERSIONS(MIDR_CORTEX_A510), +#endif +#ifdef CONFIG_ARM64_ERRATUM_2966298 + /* Cortex-A520 r0p0 to r0p1 */ + MIDR_REV_RANGE(MIDR_CORTEX_A520, 0, 0, 1), +#endif + {}, +}; +#endif + const struct arm64_cpu_capabilities arm64_errata[] = { #ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE { @@ -730,12 +743,12 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .cpu_enable = cpu_clear_bf16_from_user_emulation, }, #endif -#ifdef CONFIG_ARM64_ERRATUM_2966298 +#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD { - .desc = "ARM erratum 2966298", - .capability = ARM64_WORKAROUND_2966298, + .desc = "ARM errata 2966298, 3117295", + .capability = ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD, /* Cortex-A520 r0p0 - r0p1 */ - ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A520, 0, 0, 1), + ERRATA_MIDR_RANGE_LIST(erratum_spec_unpriv_load_list), }, #endif #ifdef CONFIG_AMPERE_ERRATUM_AC03_CPU_38 diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index a6030913cd58c..7fcbee0f6c0e4 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -428,16 +428,9 @@ alternative_else_nop_endif ldp x28, x29, [sp, #16 * 14] .if \el == 0 -alternative_if ARM64_WORKAROUND_2966298 - tlbi vale1, xzr - dsb nsh -alternative_else_nop_endif -alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0 - ldr lr, [sp, #S_LR] - add sp, sp, #PT_REGS_SIZE // restore sp - eret -alternative_else_nop_endif #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 + alternative_insn "b .L_skip_tramp_exit_\@", nop, ARM64_UNMAP_KERNEL_AT_EL0 + msr far_el1, x29 ldr_this_cpu x30, this_cpu_vector, x29 @@ -446,7 +439,18 @@ alternative_else_nop_endif ldr lr, [sp, #S_LR] // restore x30 add sp, sp, #PT_REGS_SIZE // restore sp br x29 + +.L_skip_tramp_exit_\@: #endif + ldr lr, [sp, #S_LR] + add sp, sp, #PT_REGS_SIZE // restore sp + + /* This must be after the last explicit memory access */ +alternative_if ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD + tlbi vale1, xzr + dsb nsh +alternative_else_nop_endif + eret .else ldr lr, [sp, #S_LR] add sp, sp, #PT_REGS_SIZE // restore sp diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 91e44ac7150f9..f9b3adebcb187 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1280,8 +1280,10 @@ void fpsimd_release_task(struct task_struct *dead_task) */ void sme_alloc(struct task_struct *task, bool flush) { - if (task->thread.sme_state && flush) { - memset(task->thread.sme_state, 0, sme_state_size(task)); + if (task->thread.sme_state) { + if (flush) + memset(task->thread.sme_state, 0, + sme_state_size(task)); return; } diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index dea3dc89234b0..5511bee15603a 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -84,7 +84,6 @@ WORKAROUND_2077057 WORKAROUND_2457168 WORKAROUND_2645198 WORKAROUND_2658417 -WORKAROUND_2966298 WORKAROUND_AMPERE_AC03_CPU_38 WORKAROUND_TRBE_OVERWRITE_FILL_MODE WORKAROUND_TSB_FLUSH_FAILURE @@ -100,3 +99,4 @@ WORKAROUND_NVIDIA_CARMEL_CNP WORKAROUND_QCOM_FALKOR_E1003 WORKAROUND_REPEAT_TLBI WORKAROUND_SPECULATIVE_AT +WORKAROUND_SPECULATIVE_UNPRIV_LOAD diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index ef35c871244f0..42e3a0e18956f 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -504,8 +504,9 @@ asmlinkage void start_secondary(void) unsigned int cpu; sync_counter(); - cpu = smp_processor_id(); + cpu = raw_smp_processor_id(); set_my_cpu_offset(per_cpu_offset(cpu)); + rcu_cpu_starting(cpu); cpu_probe(); constant_clockevent_init(); diff --git a/arch/mips/kernel/elf.c b/arch/mips/kernel/elf.c index 5582a4ca1e9e3..7aa2c2360ff60 100644 --- a/arch/mips/kernel/elf.c +++ b/arch/mips/kernel/elf.c @@ -11,6 +11,7 @@ #include #include +#include #ifdef CONFIG_MIPS_FP_SUPPORT @@ -309,6 +310,11 @@ void mips_set_personality_nan(struct arch_elf_state *state) struct cpuinfo_mips *c = &boot_cpu_data; struct task_struct *t = current; + /* Do this early so t->thread.fpu.fcr31 won't be clobbered in case + * we are preempted before the lose_fpu(0) in start_thread. + */ + lose_fpu(0); + t->thread.fpu.fcr31 = c->fpu_csr31; switch (state->nan_2008) { case 0: diff --git a/arch/mips/lantiq/prom.c b/arch/mips/lantiq/prom.c index a3cf293658581..0c45767eacf67 100644 --- a/arch/mips/lantiq/prom.c +++ b/arch/mips/lantiq/prom.c @@ -108,10 +108,9 @@ void __init prom_init(void) prom_init_cmdline(); #if defined(CONFIG_MIPS_MT_SMP) - if (cpu_has_mipsmt) { - lantiq_smp_ops = vsmp_smp_ops; + lantiq_smp_ops = vsmp_smp_ops; + if (cpu_has_mipsmt) lantiq_smp_ops.init_secondary = lantiq_init_secondary; - register_smp_ops(&lantiq_smp_ops); - } + register_smp_ops(&lantiq_smp_ops); #endif } diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 5dcb525a89954..6e368a4658b54 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -422,7 +422,12 @@ void __init paging_init(void) (highend_pfn - max_low_pfn) << (PAGE_SHIFT - 10)); max_zone_pfns[ZONE_HIGHMEM] = max_low_pfn; } + + max_mapnr = highend_pfn ? highend_pfn : max_low_pfn; +#else + max_mapnr = max_low_pfn; #endif + high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); free_area_init(max_zone_pfns); } @@ -458,13 +463,6 @@ void __init mem_init(void) */ BUILD_BUG_ON(IS_ENABLED(CONFIG_32BIT) && (PFN_PTE_SHIFT > PAGE_SHIFT)); -#ifdef CONFIG_HIGHMEM - max_mapnr = highend_pfn ? highend_pfn : max_low_pfn; -#else - max_mapnr = max_low_pfn; -#endif - high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); - maar_init(); memblock_free_all(); setup_zero_pages(); /* Setup zeroed pages. */ diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c index 81078abec521a..56e694f6acc2d 100644 --- a/arch/parisc/kernel/firmware.c +++ b/arch/parisc/kernel/firmware.c @@ -123,10 +123,10 @@ static unsigned long f_extend(unsigned long address) #ifdef CONFIG_64BIT if(unlikely(parisc_narrow_firmware)) { if((address & 0xff000000) == 0xf0000000) - return 0xf0f0f0f000000000UL | (u32)address; + return (0xfffffff0UL << 32) | (u32)address; if((address & 0xf0000000) == 0xf0000000) - return 0xffffffff00000000UL | (u32)address; + return (0xffffffffUL << 32) | (u32)address; } #endif return address; diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig index 2b175ddf82f0b..aa8bb0208bcc8 100644 --- a/arch/powerpc/configs/ps3_defconfig +++ b/arch/powerpc/configs/ps3_defconfig @@ -24,6 +24,7 @@ CONFIG_PS3_VRAM=m CONFIG_PS3_LPM=m # CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set CONFIG_KEXEC=y +# CONFIG_PPC64_BIG_ENDIAN_ELF_ABI_V2 is not set CONFIG_PPC_4K_PAGES=y CONFIG_SCHED_SMT=y CONFIG_PM=y diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index b2ba3f79cfe9a..511cb385be96b 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -880,7 +880,7 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) #define TASK_SIZE_MIN (PGDIR_SIZE_L3 * PTRS_PER_PGD / 2) #ifdef CONFIG_COMPAT -#define TASK_SIZE_32 (_AC(0x80000000, UL) - PAGE_SIZE) +#define TASK_SIZE_32 (_AC(0x80000000, UL)) #define TASK_SIZE (test_thread_flag(TIF_32BIT) ? \ TASK_SIZE_32 : TASK_SIZE_64) #else diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h index 3e23e1786d052..4f6af8c6cfa06 100644 --- a/arch/riscv/include/asm/processor.h +++ b/arch/riscv/include/asm/processor.h @@ -15,7 +15,7 @@ #ifdef CONFIG_64BIT #define DEFAULT_MAP_WINDOW (UL(1) << (MMAP_VA_BITS - 1)) -#define STACK_TOP_MAX TASK_SIZE_64 +#define STACK_TOP_MAX TASK_SIZE #define arch_get_mmap_end(addr, len, flags) \ ({ \ diff --git a/arch/riscv/kernel/pi/cmdline_early.c b/arch/riscv/kernel/pi/cmdline_early.c index 68e786c84c949..f6d4dedffb842 100644 --- a/arch/riscv/kernel/pi/cmdline_early.c +++ b/arch/riscv/kernel/pi/cmdline_early.c @@ -38,8 +38,7 @@ static char *get_early_cmdline(uintptr_t dtb_pa) if (IS_ENABLED(CONFIG_CMDLINE_EXTEND) || IS_ENABLED(CONFIG_CMDLINE_FORCE) || fdt_cmdline_size == 0 /* CONFIG_CMDLINE_FALLBACK */) { - strncat(early_cmdline, CONFIG_CMDLINE, - COMMAND_LINE_SIZE - fdt_cmdline_size); + strlcat(early_cmdline, CONFIG_CMDLINE, COMMAND_LINE_SIZE); } return early_cmdline; diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index c773820e4af90..c6fe5405de4a4 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -597,7 +597,9 @@ static int ctr_aes_crypt(struct skcipher_request *req) * final block may be < AES_BLOCK_SIZE, copy only nbytes */ if (nbytes) { - cpacf_kmctr(sctx->fc, sctx->key, buf, walk.src.virt.addr, + memset(buf, 0, AES_BLOCK_SIZE); + memcpy(buf, walk.src.virt.addr, nbytes); + cpacf_kmctr(sctx->fc, sctx->key, buf, buf, AES_BLOCK_SIZE, walk.iv); memcpy(walk.dst.virt.addr, buf, nbytes); crypto_inc(walk.iv, AES_BLOCK_SIZE); diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c index 8b541e44151d4..55ee5567a5ea9 100644 --- a/arch/s390/crypto/paes_s390.c +++ b/arch/s390/crypto/paes_s390.c @@ -693,9 +693,11 @@ static int ctr_paes_crypt(struct skcipher_request *req) * final block may be < AES_BLOCK_SIZE, copy only nbytes */ if (nbytes) { + memset(buf, 0, AES_BLOCK_SIZE); + memcpy(buf, walk.src.virt.addr, nbytes); while (1) { if (cpacf_kmctr(ctx->fc, ¶m, buf, - walk.src.virt.addr, AES_BLOCK_SIZE, + buf, AES_BLOCK_SIZE, walk.iv) == AES_BLOCK_SIZE) break; if (__paes_convert_key(ctx)) diff --git a/arch/sh/boards/mach-ecovec24/setup.c b/arch/sh/boards/mach-ecovec24/setup.c index 3be293335de54..7a788d44cc734 100644 --- a/arch/sh/boards/mach-ecovec24/setup.c +++ b/arch/sh/boards/mach-ecovec24/setup.c @@ -1220,7 +1220,7 @@ static int __init arch_setup(void) lcdc_info.ch[0].num_modes = ARRAY_SIZE(ecovec_dvi_modes); /* No backlight */ - gpio_backlight_data.fbdev = NULL; + gpio_backlight_data.dev = NULL; gpio_set_value(GPIO_PTA2, 1); gpio_set_value(GPIO_PTU1, 1); diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h index 21f9407be5d35..7e88705e907f4 100644 --- a/arch/x86/include/asm/syscall_wrapper.h +++ b/arch/x86/include/asm/syscall_wrapper.h @@ -58,12 +58,29 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); ,,regs->di,,regs->si,,regs->dx \ ,,regs->r10,,regs->r8,,regs->r9) \ + +/* SYSCALL_PT_ARGS is Adapted from s390x */ +#define SYSCALL_PT_ARG6(m, t1, t2, t3, t4, t5, t6) \ + SYSCALL_PT_ARG5(m, t1, t2, t3, t4, t5), m(t6, (regs->bp)) +#define SYSCALL_PT_ARG5(m, t1, t2, t3, t4, t5) \ + SYSCALL_PT_ARG4(m, t1, t2, t3, t4), m(t5, (regs->di)) +#define SYSCALL_PT_ARG4(m, t1, t2, t3, t4) \ + SYSCALL_PT_ARG3(m, t1, t2, t3), m(t4, (regs->si)) +#define SYSCALL_PT_ARG3(m, t1, t2, t3) \ + SYSCALL_PT_ARG2(m, t1, t2), m(t3, (regs->dx)) +#define SYSCALL_PT_ARG2(m, t1, t2) \ + SYSCALL_PT_ARG1(m, t1), m(t2, (regs->cx)) +#define SYSCALL_PT_ARG1(m, t1) m(t1, (regs->bx)) +#define SYSCALL_PT_ARGS(x, ...) SYSCALL_PT_ARG##x(__VA_ARGS__) + +#define __SC_COMPAT_CAST(t, a) \ + (__typeof(__builtin_choose_expr(__TYPE_IS_L(t), 0, 0U))) \ + (unsigned int)a + /* Mapping of registers to parameters for syscalls on i386 */ #define SC_IA32_REGS_TO_ARGS(x, ...) \ - __MAP(x,__SC_ARGS \ - ,,(unsigned int)regs->bx,,(unsigned int)regs->cx \ - ,,(unsigned int)regs->dx,,(unsigned int)regs->si \ - ,,(unsigned int)regs->di,,(unsigned int)regs->bp) + SYSCALL_PT_ARGS(x, __SC_COMPAT_CAST, \ + __MAP(x, __SC_TYPE, __VA_ARGS__)) \ #define __SYS_STUB0(abi, name) \ long __##abi##_##name(const struct pt_regs *regs); \ diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 1648aa0204d97..046bc9d57e996 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -1438,7 +1438,7 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id) memset(&curr_time, 0, sizeof(struct rtc_time)); if (hpet_rtc_flags & (RTC_UIE | RTC_AIE)) { - if (unlikely(mc146818_get_time(&curr_time) < 0)) { + if (unlikely(mc146818_get_time(&curr_time, 10) < 0)) { pr_err_ratelimited("unable to read current time from RTC\n"); return IRQ_HANDLED; } diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index 1309b9b053386..2e7066980f3e8 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c @@ -67,7 +67,7 @@ void mach_get_cmos_time(struct timespec64 *now) return; } - if (mc146818_get_time(&tm)) { + if (mc146818_get_time(&tm, 1000)) { pr_err("Unable to read current time from RTC\n"); now->tv_sec = now->tv_nsec = 0; return; diff --git a/block/ioctl.c b/block/ioctl.c index a74ef911e2797..d1d8e8391279a 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -20,8 +20,6 @@ static int blkpg_do_ioctl(struct block_device *bdev, struct blkpg_partition p; sector_t start, length; - if (disk->flags & GENHD_FL_NO_PART) - return -EINVAL; if (!capable(CAP_SYS_ADMIN)) return -EACCES; if (copy_from_user(&p, upart, sizeof(struct blkpg_partition))) diff --git a/block/partitions/core.c b/block/partitions/core.c index e137a87f4db0d..e58c8b50350bd 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -458,6 +458,11 @@ int bdev_add_partition(struct gendisk *disk, int partno, sector_t start, goto out; } + if (disk->flags & GENHD_FL_NO_PART) { + ret = -EINVAL; + goto out; + } + if (partition_overlaps(disk, start, length, -1)) { ret = -EBUSY; goto out; diff --git a/crypto/algapi.c b/crypto/algapi.c index 4fe95c4480473..85bc279b4233f 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -341,6 +341,7 @@ __crypto_register_alg(struct crypto_alg *alg, struct list_head *algs_to_put) } if (!strcmp(q->cra_driver_name, alg->cra_name) || + !strcmp(q->cra_driver_name, alg->cra_driver_name) || !strcmp(q->cra_name, alg->cra_driver_name)) goto err; } diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index f85f3515c258f..9c5a5f4dba5a6 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -579,7 +579,7 @@ bool dev_pm_skip_resume(struct device *dev) } /** - * device_resume_noirq - Execute a "noirq resume" callback for given device. + * __device_resume_noirq - Execute a "noirq resume" callback for given device. * @dev: Device to handle. * @state: PM transition of the system being carried out. * @async: If true, the device is being resumed asynchronously. @@ -587,7 +587,7 @@ bool dev_pm_skip_resume(struct device *dev) * The driver of @dev will not receive interrupts while this function is being * executed. */ -static int device_resume_noirq(struct device *dev, pm_message_t state, bool async) +static void __device_resume_noirq(struct device *dev, pm_message_t state, bool async) { pm_callback_t callback = NULL; const char *info = NULL; @@ -655,7 +655,13 @@ Skip: Out: complete_all(&dev->power.completion); TRACE_RESUME(error); - return error; + + if (error) { + suspend_stats.failed_resume_noirq++; + dpm_save_failed_step(SUSPEND_RESUME_NOIRQ); + dpm_save_failed_dev(dev_name(dev)); + pm_dev_err(dev, state, async ? " async noirq" : " noirq", error); + } } static bool is_async(struct device *dev) @@ -668,11 +674,15 @@ static bool dpm_async_fn(struct device *dev, async_func_t func) { reinit_completion(&dev->power.completion); - if (is_async(dev)) { - get_device(dev); - async_schedule_dev(func, dev); + if (!is_async(dev)) + return false; + + get_device(dev); + + if (async_schedule_dev_nocall(func, dev)) return true; - } + + put_device(dev); return false; } @@ -680,15 +690,19 @@ static bool dpm_async_fn(struct device *dev, async_func_t func) static void async_resume_noirq(void *data, async_cookie_t cookie) { struct device *dev = data; - int error; - - error = device_resume_noirq(dev, pm_transition, true); - if (error) - pm_dev_err(dev, pm_transition, " async", error); + __device_resume_noirq(dev, pm_transition, true); put_device(dev); } +static void device_resume_noirq(struct device *dev) +{ + if (dpm_async_fn(dev, async_resume_noirq)) + return; + + __device_resume_noirq(dev, pm_transition, false); +} + static void dpm_noirq_resume_devices(pm_message_t state) { struct device *dev; @@ -698,14 +712,6 @@ static void dpm_noirq_resume_devices(pm_message_t state) mutex_lock(&dpm_list_mtx); pm_transition = state; - /* - * Advanced the async threads upfront, - * in case the starting of async threads is - * delayed by non-async resuming devices. - */ - list_for_each_entry(dev, &dpm_noirq_list, power.entry) - dpm_async_fn(dev, async_resume_noirq); - while (!list_empty(&dpm_noirq_list)) { dev = to_device(dpm_noirq_list.next); get_device(dev); @@ -713,17 +719,7 @@ static void dpm_noirq_resume_devices(pm_message_t state) mutex_unlock(&dpm_list_mtx); - if (!is_async(dev)) { - int error; - - error = device_resume_noirq(dev, state, false); - if (error) { - suspend_stats.failed_resume_noirq++; - dpm_save_failed_step(SUSPEND_RESUME_NOIRQ); - dpm_save_failed_dev(dev_name(dev)); - pm_dev_err(dev, state, " noirq", error); - } - } + device_resume_noirq(dev); put_device(dev); @@ -751,14 +747,14 @@ void dpm_resume_noirq(pm_message_t state) } /** - * device_resume_early - Execute an "early resume" callback for given device. + * __device_resume_early - Execute an "early resume" callback for given device. * @dev: Device to handle. * @state: PM transition of the system being carried out. * @async: If true, the device is being resumed asynchronously. * * Runtime PM is disabled for @dev while this function is being executed. */ -static int device_resume_early(struct device *dev, pm_message_t state, bool async) +static void __device_resume_early(struct device *dev, pm_message_t state, bool async) { pm_callback_t callback = NULL; const char *info = NULL; @@ -811,21 +807,31 @@ Out: pm_runtime_enable(dev); complete_all(&dev->power.completion); - return error; + + if (error) { + suspend_stats.failed_resume_early++; + dpm_save_failed_step(SUSPEND_RESUME_EARLY); + dpm_save_failed_dev(dev_name(dev)); + pm_dev_err(dev, state, async ? " async early" : " early", error); + } } static void async_resume_early(void *data, async_cookie_t cookie) { struct device *dev = data; - int error; - - error = device_resume_early(dev, pm_transition, true); - if (error) - pm_dev_err(dev, pm_transition, " async", error); + __device_resume_early(dev, pm_transition, true); put_device(dev); } +static void device_resume_early(struct device *dev) +{ + if (dpm_async_fn(dev, async_resume_early)) + return; + + __device_resume_early(dev, pm_transition, false); +} + /** * dpm_resume_early - Execute "early resume" callbacks for all devices. * @state: PM transition of the system being carried out. @@ -839,14 +845,6 @@ void dpm_resume_early(pm_message_t state) mutex_lock(&dpm_list_mtx); pm_transition = state; - /* - * Advanced the async threads upfront, - * in case the starting of async threads is - * delayed by non-async resuming devices. - */ - list_for_each_entry(dev, &dpm_late_early_list, power.entry) - dpm_async_fn(dev, async_resume_early); - while (!list_empty(&dpm_late_early_list)) { dev = to_device(dpm_late_early_list.next); get_device(dev); @@ -854,17 +852,7 @@ void dpm_resume_early(pm_message_t state) mutex_unlock(&dpm_list_mtx); - if (!is_async(dev)) { - int error; - - error = device_resume_early(dev, state, false); - if (error) { - suspend_stats.failed_resume_early++; - dpm_save_failed_step(SUSPEND_RESUME_EARLY); - dpm_save_failed_dev(dev_name(dev)); - pm_dev_err(dev, state, " early", error); - } - } + device_resume_early(dev); put_device(dev); @@ -888,12 +876,12 @@ void dpm_resume_start(pm_message_t state) EXPORT_SYMBOL_GPL(dpm_resume_start); /** - * device_resume - Execute "resume" callbacks for given device. + * __device_resume - Execute "resume" callbacks for given device. * @dev: Device to handle. * @state: PM transition of the system being carried out. * @async: If true, the device is being resumed asynchronously. */ -static int device_resume(struct device *dev, pm_message_t state, bool async) +static void __device_resume(struct device *dev, pm_message_t state, bool async) { pm_callback_t callback = NULL; const char *info = NULL; @@ -975,20 +963,30 @@ static int device_resume(struct device *dev, pm_message_t state, bool async) TRACE_RESUME(error); - return error; + if (error) { + suspend_stats.failed_resume++; + dpm_save_failed_step(SUSPEND_RESUME); + dpm_save_failed_dev(dev_name(dev)); + pm_dev_err(dev, state, async ? " async" : "", error); + } } static void async_resume(void *data, async_cookie_t cookie) { struct device *dev = data; - int error; - error = device_resume(dev, pm_transition, true); - if (error) - pm_dev_err(dev, pm_transition, " async", error); + __device_resume(dev, pm_transition, true); put_device(dev); } +static void device_resume(struct device *dev) +{ + if (dpm_async_fn(dev, async_resume)) + return; + + __device_resume(dev, pm_transition, false); +} + /** * dpm_resume - Execute "resume" callbacks for non-sysdev devices. * @state: PM transition of the system being carried out. @@ -1008,27 +1006,17 @@ void dpm_resume(pm_message_t state) pm_transition = state; async_error = 0; - list_for_each_entry(dev, &dpm_suspended_list, power.entry) - dpm_async_fn(dev, async_resume); - while (!list_empty(&dpm_suspended_list)) { dev = to_device(dpm_suspended_list.next); + get_device(dev); - if (!is_async(dev)) { - int error; - mutex_unlock(&dpm_list_mtx); + mutex_unlock(&dpm_list_mtx); + + device_resume(dev); - error = device_resume(dev, state, false); - if (error) { - suspend_stats.failed_resume++; - dpm_save_failed_step(SUSPEND_RESUME); - dpm_save_failed_dev(dev_name(dev)); - pm_dev_err(dev, state, "", error); - } + mutex_lock(&dpm_list_mtx); - mutex_lock(&dpm_list_mtx); - } if (!list_empty(&dev->power.entry)) list_move_tail(&dev->power.entry, &dpm_prepared_list); diff --git a/drivers/base/power/trace.c b/drivers/base/power/trace.c index 72b7a92337b18..cd6e559648b21 100644 --- a/drivers/base/power/trace.c +++ b/drivers/base/power/trace.c @@ -120,7 +120,7 @@ static unsigned int read_magic_time(void) struct rtc_time time; unsigned int val; - if (mc146818_get_time(&time) < 0) { + if (mc146818_get_time(&time, 1000) < 0) { pr_err("Unable to read current time from RTC\n"); return 0; } diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index b6414e1e645b7..aa65313aabb8d 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -510,7 +510,7 @@ static int __sock_xmit(struct nbd_device *nbd, struct socket *sock, int send, struct iov_iter *iter, int msg_flags, int *sent) { int result; - struct msghdr msg; + struct msghdr msg = {} ; unsigned int noreclaim_flag; if (unlikely(!sock)) { @@ -526,10 +526,6 @@ static int __sock_xmit(struct nbd_device *nbd, struct socket *sock, int send, do { sock->sk->sk_allocation = GFP_NOIO | __GFP_MEMALLOC; sock->sk->sk_use_task_frag = false; - msg.msg_name = NULL; - msg.msg_namelen = 0; - msg.msg_control = NULL; - msg.msg_controllen = 0; msg.msg_flags = msg_flags | MSG_NOSIGNAL; if (send) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index a999b698b131f..1e2596c5efd81 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -3452,14 +3452,15 @@ static bool rbd_lock_add_request(struct rbd_img_request *img_req) static void rbd_lock_del_request(struct rbd_img_request *img_req) { struct rbd_device *rbd_dev = img_req->rbd_dev; - bool need_wakeup; + bool need_wakeup = false; lockdep_assert_held(&rbd_dev->lock_rwsem); spin_lock(&rbd_dev->lock_lists_lock); - rbd_assert(!list_empty(&img_req->lock_item)); - list_del_init(&img_req->lock_item); - need_wakeup = (rbd_dev->lock_state == RBD_LOCK_STATE_RELEASING && - list_empty(&rbd_dev->running_list)); + if (!list_empty(&img_req->lock_item)) { + list_del_init(&img_req->lock_item); + need_wakeup = (rbd_dev->lock_state == RBD_LOCK_STATE_RELEASING && + list_empty(&rbd_dev->running_list)); + } spin_unlock(&rbd_dev->lock_lists_lock); if (need_wakeup) complete(&rbd_dev->releasing_wait); @@ -3842,14 +3843,19 @@ static void wake_lock_waiters(struct rbd_device *rbd_dev, int result) return; } - list_for_each_entry(img_req, &rbd_dev->acquiring_list, lock_item) { + while (!list_empty(&rbd_dev->acquiring_list)) { + img_req = list_first_entry(&rbd_dev->acquiring_list, + struct rbd_img_request, lock_item); mutex_lock(&img_req->state_mutex); rbd_assert(img_req->state == RBD_IMG_EXCLUSIVE_LOCK); + if (!result) + list_move_tail(&img_req->lock_item, + &rbd_dev->running_list); + else + list_del_init(&img_req->lock_item); rbd_img_schedule(img_req, result); mutex_unlock(&img_req->state_mutex); } - - list_splice_tail_init(&rbd_dev->acquiring_list, &rbd_dev->running_list); } static bool locker_equal(const struct ceph_locker *lhs, diff --git a/drivers/bus/mhi/host/main.c b/drivers/bus/mhi/host/main.c index dcf627b36e829..d6653cbcf94a2 100644 --- a/drivers/bus/mhi/host/main.c +++ b/drivers/bus/mhi/host/main.c @@ -268,7 +268,8 @@ static void mhi_del_ring_element(struct mhi_controller *mhi_cntrl, static bool is_valid_ring_ptr(struct mhi_ring *ring, dma_addr_t addr) { - return addr >= ring->iommu_base && addr < ring->iommu_base + ring->len; + return addr >= ring->iommu_base && addr < ring->iommu_base + ring->len && + !(addr & (sizeof(struct mhi_ring_element) - 1)); } int mhi_destroy_device(struct device *dev, void *data) @@ -642,6 +643,8 @@ static int parse_xfer_event(struct mhi_controller *mhi_cntrl, mhi_del_ring_element(mhi_cntrl, tre_ring); local_rp = tre_ring->rp; + read_unlock_bh(&mhi_chan->lock); + /* notify client */ mhi_chan->xfer_cb(mhi_chan->mhi_dev, &result); @@ -667,6 +670,8 @@ static int parse_xfer_event(struct mhi_controller *mhi_cntrl, kfree(buf_info->cb_buf); } } + + read_lock_bh(&mhi_chan->lock); } break; } /* CC_EOT */ @@ -1122,17 +1127,15 @@ static int mhi_queue(struct mhi_device *mhi_dev, struct mhi_buf_info *buf_info, if (unlikely(MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state))) return -EIO; - read_lock_irqsave(&mhi_cntrl->pm_lock, flags); - ret = mhi_is_ring_full(mhi_cntrl, tre_ring); - if (unlikely(ret)) { - ret = -EAGAIN; - goto exit_unlock; - } + if (unlikely(ret)) + return -EAGAIN; ret = mhi_gen_tre(mhi_cntrl, mhi_chan, buf_info, mflags); if (unlikely(ret)) - goto exit_unlock; + return ret; + + read_lock_irqsave(&mhi_cntrl->pm_lock, flags); /* Packet is queued, take a usage ref to exit M3 if necessary * for host->device buffer, balanced put is done on buffer completion @@ -1152,7 +1155,6 @@ static int mhi_queue(struct mhi_device *mhi_dev, struct mhi_buf_info *buf_info, if (dir == DMA_FROM_DEVICE) mhi_cntrl->runtime_put(mhi_cntrl); -exit_unlock: read_unlock_irqrestore(&mhi_cntrl->pm_lock, flags); return ret; @@ -1204,6 +1206,9 @@ int mhi_gen_tre(struct mhi_controller *mhi_cntrl, struct mhi_chan *mhi_chan, int eot, eob, chain, bei; int ret; + /* Protect accesses for reading and incrementing WP */ + write_lock_bh(&mhi_chan->lock); + buf_ring = &mhi_chan->buf_ring; tre_ring = &mhi_chan->tre_ring; @@ -1221,8 +1226,10 @@ int mhi_gen_tre(struct mhi_controller *mhi_cntrl, struct mhi_chan *mhi_chan, if (!info->pre_mapped) { ret = mhi_cntrl->map_single(mhi_cntrl, buf_info); - if (ret) + if (ret) { + write_unlock_bh(&mhi_chan->lock); return ret; + } } eob = !!(flags & MHI_EOB); @@ -1239,6 +1246,8 @@ int mhi_gen_tre(struct mhi_controller *mhi_cntrl, struct mhi_chan *mhi_chan, mhi_add_ring_element(mhi_cntrl, tre_ring); mhi_add_ring_element(mhi_cntrl, buf_ring); + write_unlock_bh(&mhi_chan->lock); + return 0; } diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index 420f155d251fb..a3bbdd6e60fca 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -23,10 +23,13 @@ #include #include #include +#include #include #define RNG_MODULE_NAME "hw_random" +#define RNG_BUFFER_SIZE (SMP_CACHE_BYTES < 32 ? 32 : SMP_CACHE_BYTES) + static struct hwrng *current_rng; /* the current rng has been explicitly chosen by user via sysfs */ static int cur_rng_set_by_user; @@ -58,7 +61,7 @@ static inline int rng_get_data(struct hwrng *rng, u8 *buffer, size_t size, static size_t rng_buffer_size(void) { - return SMP_CACHE_BYTES < 32 ? 32 : SMP_CACHE_BYTES; + return RNG_BUFFER_SIZE; } static void add_early_randomness(struct hwrng *rng) @@ -209,6 +212,7 @@ static inline int rng_get_data(struct hwrng *rng, u8 *buffer, size_t size, static ssize_t rng_dev_read(struct file *filp, char __user *buf, size_t size, loff_t *offp) { + u8 buffer[RNG_BUFFER_SIZE]; ssize_t ret = 0; int err = 0; int bytes_read, len; @@ -236,34 +240,37 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf, if (bytes_read < 0) { err = bytes_read; goto out_unlock_reading; + } else if (bytes_read == 0 && + (filp->f_flags & O_NONBLOCK)) { + err = -EAGAIN; + goto out_unlock_reading; } + data_avail = bytes_read; } - if (!data_avail) { - if (filp->f_flags & O_NONBLOCK) { - err = -EAGAIN; - goto out_unlock_reading; - } - } else { - len = data_avail; + len = data_avail; + if (len) { if (len > size) len = size; data_avail -= len; - if (copy_to_user(buf + ret, rng_buffer + data_avail, - len)) { + memcpy(buffer, rng_buffer + data_avail, len); + } + mutex_unlock(&reading_mutex); + put_rng(rng); + + if (len) { + if (copy_to_user(buf + ret, buffer, len)) { err = -EFAULT; - goto out_unlock_reading; + goto out; } size -= len; ret += len; } - mutex_unlock(&reading_mutex); - put_rng(rng); if (need_resched()) schedule_timeout_interruptible(1); @@ -274,6 +281,7 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf, } } out: + memzero_explicit(buffer, sizeof(buffer)); return ret ? : err; out_unlock_reading: diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 1f6186475715e..1791d37fbc53c 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -1232,14 +1232,13 @@ static void amd_pstate_epp_update_limit(struct cpufreq_policy *policy) max_limit_perf = div_u64(policy->max * cpudata->highest_perf, cpudata->max_freq); min_limit_perf = div_u64(policy->min * cpudata->highest_perf, cpudata->max_freq); + WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf); + WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf); + max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf, cpudata->max_limit_perf); min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf, cpudata->max_limit_perf); - - WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf); - WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf); - value = READ_ONCE(cpudata->cppc_req_cached); if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index dc50c9fb488df..c352a593e5d86 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -526,6 +526,30 @@ static int intel_pstate_cppc_get_scaling(int cpu) } #endif /* CONFIG_ACPI_CPPC_LIB */ +static int intel_pstate_freq_to_hwp_rel(struct cpudata *cpu, int freq, + unsigned int relation) +{ + if (freq == cpu->pstate.turbo_freq) + return cpu->pstate.turbo_pstate; + + if (freq == cpu->pstate.max_freq) + return cpu->pstate.max_pstate; + + switch (relation) { + case CPUFREQ_RELATION_H: + return freq / cpu->pstate.scaling; + case CPUFREQ_RELATION_C: + return DIV_ROUND_CLOSEST(freq, cpu->pstate.scaling); + } + + return DIV_ROUND_UP(freq, cpu->pstate.scaling); +} + +static int intel_pstate_freq_to_hwp(struct cpudata *cpu, int freq) +{ + return intel_pstate_freq_to_hwp_rel(cpu, freq, CPUFREQ_RELATION_L); +} + /** * intel_pstate_hybrid_hwp_adjust - Calibrate HWP performance levels. * @cpu: Target CPU. @@ -543,6 +567,7 @@ static void intel_pstate_hybrid_hwp_adjust(struct cpudata *cpu) int perf_ctl_scaling = cpu->pstate.perf_ctl_scaling; int perf_ctl_turbo = pstate_funcs.get_turbo(cpu->cpu); int scaling = cpu->pstate.scaling; + int freq; pr_debug("CPU%d: perf_ctl_max_phys = %d\n", cpu->cpu, perf_ctl_max_phys); pr_debug("CPU%d: perf_ctl_turbo = %d\n", cpu->cpu, perf_ctl_turbo); @@ -556,16 +581,16 @@ static void intel_pstate_hybrid_hwp_adjust(struct cpudata *cpu) cpu->pstate.max_freq = rounddown(cpu->pstate.max_pstate * scaling, perf_ctl_scaling); - cpu->pstate.max_pstate_physical = - DIV_ROUND_UP(perf_ctl_max_phys * perf_ctl_scaling, - scaling); + freq = perf_ctl_max_phys * perf_ctl_scaling; + cpu->pstate.max_pstate_physical = intel_pstate_freq_to_hwp(cpu, freq); - cpu->pstate.min_freq = cpu->pstate.min_pstate * perf_ctl_scaling; + freq = cpu->pstate.min_pstate * perf_ctl_scaling; + cpu->pstate.min_freq = freq; /* * Cast the min P-state value retrieved via pstate_funcs.get_min() to * the effective range of HWP performance levels. */ - cpu->pstate.min_pstate = DIV_ROUND_UP(cpu->pstate.min_freq, scaling); + cpu->pstate.min_pstate = intel_pstate_freq_to_hwp(cpu, freq); } static inline void update_turbo_state(void) @@ -2528,13 +2553,12 @@ static void intel_pstate_update_perf_limits(struct cpudata *cpu, * abstract values to represent performance rather than pure ratios. */ if (hwp_active && cpu->pstate.scaling != perf_ctl_scaling) { - int scaling = cpu->pstate.scaling; int freq; freq = max_policy_perf * perf_ctl_scaling; - max_policy_perf = DIV_ROUND_UP(freq, scaling); + max_policy_perf = intel_pstate_freq_to_hwp(cpu, freq); freq = min_policy_perf * perf_ctl_scaling; - min_policy_perf = DIV_ROUND_UP(freq, scaling); + min_policy_perf = intel_pstate_freq_to_hwp(cpu, freq); } pr_debug("cpu:%d min_policy_perf:%d max_policy_perf:%d\n", @@ -2908,18 +2932,7 @@ static int intel_cpufreq_target(struct cpufreq_policy *policy, cpufreq_freq_transition_begin(policy, &freqs); - switch (relation) { - case CPUFREQ_RELATION_L: - target_pstate = DIV_ROUND_UP(freqs.new, cpu->pstate.scaling); - break; - case CPUFREQ_RELATION_H: - target_pstate = freqs.new / cpu->pstate.scaling; - break; - default: - target_pstate = DIV_ROUND_CLOSEST(freqs.new, cpu->pstate.scaling); - break; - } - + target_pstate = intel_pstate_freq_to_hwp_rel(cpu, freqs.new, relation); target_pstate = intel_cpufreq_update_pstate(policy, target_pstate, false); freqs.new = target_pstate * cpu->pstate.scaling; @@ -2937,7 +2950,7 @@ static unsigned int intel_cpufreq_fast_switch(struct cpufreq_policy *policy, update_turbo_state(); - target_pstate = DIV_ROUND_UP(target_freq, cpu->pstate.scaling); + target_pstate = intel_pstate_freq_to_hwp(cpu, target_freq); target_pstate = intel_cpufreq_update_pstate(policy, target_pstate, true); diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 472bd510b5e27..6ebd12f7970bc 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -525,7 +525,7 @@ static int alloc_hpa(struct cxl_region *cxlr, resource_size_t size) struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); struct cxl_region_params *p = &cxlr->params; struct resource *res; - u32 remainder = 0; + u64 remainder = 0; lockdep_assert_held_write(&cxl_region_rwsem); @@ -545,7 +545,7 @@ static int alloc_hpa(struct cxl_region *cxlr, resource_size_t size) (cxlr->mode == CXL_DECODER_PMEM && uuid_is_null(&p->uuid))) return -ENXIO; - div_u64_rem(size, SZ_256M * p->interleave_ways, &remainder); + div64_u64_rem(size, (u64)SZ_256M * p->interleave_ways, &remainder); if (remainder) return -EINVAL; diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 474d81831ad36..81d9df89dde6b 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -1688,7 +1688,7 @@ static ssize_t trans_stat_show(struct device *dev, struct device_attribute *attr, char *buf) { struct devfreq *df = to_devfreq(dev); - ssize_t len; + ssize_t len = 0; int i, j; unsigned int max_state; @@ -1697,7 +1697,7 @@ static ssize_t trans_stat_show(struct device *dev, max_state = df->max_state; if (max_state == 0) - return sprintf(buf, "Not Supported.\n"); + return scnprintf(buf, PAGE_SIZE, "Not Supported.\n"); mutex_lock(&df->lock); if (!df->stop_polling && @@ -1707,31 +1707,52 @@ static ssize_t trans_stat_show(struct device *dev, } mutex_unlock(&df->lock); - len = sprintf(buf, " From : To\n"); - len += sprintf(buf + len, " :"); - for (i = 0; i < max_state; i++) - len += sprintf(buf + len, "%10lu", - df->freq_table[i]); + len += scnprintf(buf + len, PAGE_SIZE - len, " From : To\n"); + len += scnprintf(buf + len, PAGE_SIZE - len, " :"); + for (i = 0; i < max_state; i++) { + if (len >= PAGE_SIZE - 1) + break; + len += scnprintf(buf + len, PAGE_SIZE - len, "%10lu", + df->freq_table[i]); + } + if (len >= PAGE_SIZE - 1) + return PAGE_SIZE - 1; - len += sprintf(buf + len, " time(ms)\n"); + len += scnprintf(buf + len, PAGE_SIZE - len, " time(ms)\n"); for (i = 0; i < max_state; i++) { + if (len >= PAGE_SIZE - 1) + break; if (df->freq_table[i] == df->previous_freq) - len += sprintf(buf + len, "*"); + len += scnprintf(buf + len, PAGE_SIZE - len, "*"); else - len += sprintf(buf + len, " "); + len += scnprintf(buf + len, PAGE_SIZE - len, " "); + if (len >= PAGE_SIZE - 1) + break; + + len += scnprintf(buf + len, PAGE_SIZE - len, "%10lu:", + df->freq_table[i]); + for (j = 0; j < max_state; j++) { + if (len >= PAGE_SIZE - 1) + break; + len += scnprintf(buf + len, PAGE_SIZE - len, "%10u", + df->stats.trans_table[(i * max_state) + j]); + } + if (len >= PAGE_SIZE - 1) + break; + len += scnprintf(buf + len, PAGE_SIZE - len, "%10llu\n", (u64) + jiffies64_to_msecs(df->stats.time_in_state[i])); + } - len += sprintf(buf + len, "%10lu:", df->freq_table[i]); - for (j = 0; j < max_state; j++) - len += sprintf(buf + len, "%10u", - df->stats.trans_table[(i * max_state) + j]); + if (len < PAGE_SIZE - 1) + len += scnprintf(buf + len, PAGE_SIZE - len, "Total transition : %u\n", + df->stats.total_trans); - len += sprintf(buf + len, "%10llu\n", (u64) - jiffies64_to_msecs(df->stats.time_in_state[i])); + if (len >= PAGE_SIZE - 1) { + pr_warn_once("devfreq transition table exceeds PAGE_SIZE. Disabling\n"); + return -EFBIG; } - len += sprintf(buf + len, "Total transition : %u\n", - df->stats.total_trans); return len; } diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index b7388ae62d7f1..491b222402216 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -1103,6 +1103,9 @@ EXPORT_SYMBOL_GPL(dma_async_device_channel_register); static void __dma_async_device_channel_unregister(struct dma_device *device, struct dma_chan *chan) { + if (chan->local == NULL) + return; + WARN_ONCE(!device->device_release && chan->client_count, "%s called while %d clients hold a reference\n", __func__, chan->client_count); diff --git a/drivers/dma/fsl-edma-main.c b/drivers/dma/fsl-edma-main.c index 00cb70aca34a3..30df55da4dbb9 100644 --- a/drivers/dma/fsl-edma-main.c +++ b/drivers/dma/fsl-edma-main.c @@ -26,6 +26,8 @@ #define ARGS_RX BIT(0) #define ARGS_REMOTE BIT(1) #define ARGS_MULTI_FIFO BIT(2) +#define ARGS_EVEN_CH BIT(3) +#define ARGS_ODD_CH BIT(4) static void fsl_edma_synchronize(struct dma_chan *chan) { @@ -159,6 +161,12 @@ static struct dma_chan *fsl_edma3_xlate(struct of_phandle_args *dma_spec, fsl_chan->is_remote = dma_spec->args[2] & ARGS_REMOTE; fsl_chan->is_multi_fifo = dma_spec->args[2] & ARGS_MULTI_FIFO; + if ((dma_spec->args[2] & ARGS_EVEN_CH) && (i & 0x1)) + continue; + + if ((dma_spec->args[2] & ARGS_ODD_CH) && !(i & 0x1)) + continue; + if (!b_chmux && i == dma_spec->args[0]) { chan = dma_get_slave_channel(chan); chan->device->privatecnt++; diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 8f754f922217d..fa0f880beae64 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -802,6 +802,9 @@ err_bmap: static void idxd_device_evl_free(struct idxd_device *idxd) { + void *evl_log; + unsigned int evl_log_size; + dma_addr_t evl_dma; union gencfg_reg gencfg; union genctrl_reg genctrl; struct device *dev = &idxd->pdev->dev; @@ -822,11 +825,15 @@ static void idxd_device_evl_free(struct idxd_device *idxd) iowrite64(0, idxd->reg_base + IDXD_EVLCFG_OFFSET); iowrite64(0, idxd->reg_base + IDXD_EVLCFG_OFFSET + 8); - dma_free_coherent(dev, evl->log_size, evl->log, evl->dma); bitmap_free(evl->bmap); + evl_log = evl->log; + evl_log_size = evl->log_size; + evl_dma = evl->dma; evl->log = NULL; evl->size = IDXD_EVL_SIZE_MIN; spin_unlock(&evl->lock); + + dma_free_coherent(dev, evl_log_size, evl_log, evl_dma); } static void idxd_group_config_write(struct idxd_group *group) diff --git a/drivers/firmware/arm_scmi/common.h b/drivers/firmware/arm_scmi/common.h index c46dc5215af7a..00b165d1f502d 100644 --- a/drivers/firmware/arm_scmi/common.h +++ b/drivers/firmware/arm_scmi/common.h @@ -314,6 +314,7 @@ void shmem_fetch_notification(struct scmi_shared_mem __iomem *shmem, void shmem_clear_channel(struct scmi_shared_mem __iomem *shmem); bool shmem_poll_done(struct scmi_shared_mem __iomem *shmem, struct scmi_xfer *xfer); +bool shmem_channel_free(struct scmi_shared_mem __iomem *shmem); /* declarations for message passing transports */ struct scmi_msg_payld; diff --git a/drivers/firmware/arm_scmi/mailbox.c b/drivers/firmware/arm_scmi/mailbox.c index 19246ed1f01ff..b8d470417e8f9 100644 --- a/drivers/firmware/arm_scmi/mailbox.c +++ b/drivers/firmware/arm_scmi/mailbox.c @@ -45,6 +45,20 @@ static void rx_callback(struct mbox_client *cl, void *m) { struct scmi_mailbox *smbox = client_to_scmi_mailbox(cl); + /* + * An A2P IRQ is NOT valid when received while the platform still has + * the ownership of the channel, because the platform at first releases + * the SMT channel and then sends the completion interrupt. + * + * This addresses a possible race condition in which a spurious IRQ from + * a previous timed-out reply which arrived late could be wrongly + * associated with the next pending transaction. + */ + if (cl->knows_txdone && !shmem_channel_free(smbox->shmem)) { + dev_warn(smbox->cinfo->dev, "Ignoring spurious A2P IRQ !\n"); + return; + } + scmi_rx_callback(smbox->cinfo, shmem_read_header(smbox->shmem), NULL); } diff --git a/drivers/firmware/arm_scmi/perf.c b/drivers/firmware/arm_scmi/perf.c index e887fd1690434..dd344506b0a37 100644 --- a/drivers/firmware/arm_scmi/perf.c +++ b/drivers/firmware/arm_scmi/perf.c @@ -347,8 +347,8 @@ process_response_opp(struct scmi_opp *opp, unsigned int loop_idx, } static inline void -process_response_opp_v4(struct perf_dom_info *dom, struct scmi_opp *opp, - unsigned int loop_idx, +process_response_opp_v4(struct device *dev, struct perf_dom_info *dom, + struct scmi_opp *opp, unsigned int loop_idx, const struct scmi_msg_resp_perf_describe_levels_v4 *r) { opp->perf = le32_to_cpu(r->opp[loop_idx].perf_val); @@ -359,10 +359,23 @@ process_response_opp_v4(struct perf_dom_info *dom, struct scmi_opp *opp, /* Note that PERF v4 reports always five 32-bit words */ opp->indicative_freq = le32_to_cpu(r->opp[loop_idx].indicative_freq); if (dom->level_indexing_mode) { + int ret; + opp->level_index = le32_to_cpu(r->opp[loop_idx].level_index); - xa_store(&dom->opps_by_idx, opp->level_index, opp, GFP_KERNEL); - xa_store(&dom->opps_by_lvl, opp->perf, opp, GFP_KERNEL); + ret = xa_insert(&dom->opps_by_idx, opp->level_index, opp, + GFP_KERNEL); + if (ret) + dev_warn(dev, + "Failed to add opps_by_idx at %d - ret:%d\n", + opp->level_index, ret); + + ret = xa_insert(&dom->opps_by_lvl, opp->perf, opp, GFP_KERNEL); + if (ret) + dev_warn(dev, + "Failed to add opps_by_lvl at %d - ret:%d\n", + opp->perf, ret); + hash_add(dom->opps_by_freq, &opp->hash, opp->indicative_freq); } } @@ -379,7 +392,7 @@ iter_perf_levels_process_response(const struct scmi_protocol_handle *ph, if (PROTOCOL_REV_MAJOR(p->version) <= 0x3) process_response_opp(opp, st->loop_idx, response); else - process_response_opp_v4(p->perf_dom, opp, st->loop_idx, + process_response_opp_v4(ph->dev, p->perf_dom, opp, st->loop_idx, response); p->perf_dom->opp_count++; diff --git a/drivers/firmware/arm_scmi/raw_mode.c b/drivers/firmware/arm_scmi/raw_mode.c index 0493aa3c12bf5..3505735185033 100644 --- a/drivers/firmware/arm_scmi/raw_mode.c +++ b/drivers/firmware/arm_scmi/raw_mode.c @@ -1111,7 +1111,6 @@ static int scmi_raw_mode_setup(struct scmi_raw_mode_info *raw, int i; for (i = 0; i < num_chans; i++) { - void *xret; struct scmi_raw_queue *q; q = scmi_raw_queue_init(raw); @@ -1120,13 +1119,12 @@ static int scmi_raw_mode_setup(struct scmi_raw_mode_info *raw, goto err_xa; } - xret = xa_store(&raw->chans_q, channels[i], q, + ret = xa_insert(&raw->chans_q, channels[i], q, GFP_KERNEL); - if (xa_err(xret)) { + if (ret) { dev_err(dev, "Fail to allocate Raw queue 0x%02X\n", channels[i]); - ret = xa_err(xret); goto err_xa; } } @@ -1322,6 +1320,12 @@ void scmi_raw_message_report(void *r, struct scmi_xfer *xfer, dev = raw->handle->dev; q = scmi_raw_queue_select(raw, idx, SCMI_XFER_IS_CHAN_SET(xfer) ? chan_id : 0); + if (!q) { + dev_warn(dev, + "RAW[%d] - NO queue for chan 0x%X. Dropping report.\n", + idx, chan_id); + return; + } /* * Grab the msg_q_lock upfront to avoid a possible race between diff --git a/drivers/firmware/arm_scmi/shmem.c b/drivers/firmware/arm_scmi/shmem.c index 87b4f4d35f062..517d52fb3bcbb 100644 --- a/drivers/firmware/arm_scmi/shmem.c +++ b/drivers/firmware/arm_scmi/shmem.c @@ -122,3 +122,9 @@ bool shmem_poll_done(struct scmi_shared_mem __iomem *shmem, (SCMI_SHMEM_CHAN_STAT_CHANNEL_ERROR | SCMI_SHMEM_CHAN_STAT_CHANNEL_FREE); } + +bool shmem_channel_free(struct scmi_shared_mem __iomem *shmem) +{ + return (ioread32(&shmem->channel_status) & + SCMI_SHMEM_CHAN_STAT_CHANNEL_FREE); +} diff --git a/drivers/firmware/sysfb.c b/drivers/firmware/sysfb.c index 82fcfd29bc4d2..3c197db42c9d9 100644 --- a/drivers/firmware/sysfb.c +++ b/drivers/firmware/sysfb.c @@ -128,4 +128,4 @@ unlock_mutex: } /* must execute after PCI subsystem for EFI quirks */ -subsys_initcall_sync(sysfb_init); +device_initcall(sysfb_init); diff --git a/drivers/gpio/gpio-eic-sprd.c b/drivers/gpio/gpio-eic-sprd.c index 5320cf1de89c4..b24e349deed5e 100644 --- a/drivers/gpio/gpio-eic-sprd.c +++ b/drivers/gpio/gpio-eic-sprd.c @@ -321,20 +321,27 @@ static int sprd_eic_irq_set_type(struct irq_data *data, unsigned int flow_type) switch (flow_type) { case IRQ_TYPE_LEVEL_HIGH: sprd_eic_update(chip, offset, SPRD_EIC_DBNC_IEV, 1); + sprd_eic_update(chip, offset, SPRD_EIC_DBNC_IC, 1); break; case IRQ_TYPE_LEVEL_LOW: sprd_eic_update(chip, offset, SPRD_EIC_DBNC_IEV, 0); + sprd_eic_update(chip, offset, SPRD_EIC_DBNC_IC, 1); break; case IRQ_TYPE_EDGE_RISING: case IRQ_TYPE_EDGE_FALLING: case IRQ_TYPE_EDGE_BOTH: state = sprd_eic_get(chip, offset); - if (state) + if (state) { sprd_eic_update(chip, offset, SPRD_EIC_DBNC_IEV, 0); - else + sprd_eic_update(chip, offset, + SPRD_EIC_DBNC_IC, 1); + } else { sprd_eic_update(chip, offset, SPRD_EIC_DBNC_IEV, 1); + sprd_eic_update(chip, offset, + SPRD_EIC_DBNC_IC, 1); + } break; default: return -ENOTSUPP; @@ -346,20 +353,27 @@ static int sprd_eic_irq_set_type(struct irq_data *data, unsigned int flow_type) switch (flow_type) { case IRQ_TYPE_LEVEL_HIGH: sprd_eic_update(chip, offset, SPRD_EIC_LATCH_INTPOL, 0); + sprd_eic_update(chip, offset, SPRD_EIC_LATCH_INTCLR, 1); break; case IRQ_TYPE_LEVEL_LOW: sprd_eic_update(chip, offset, SPRD_EIC_LATCH_INTPOL, 1); + sprd_eic_update(chip, offset, SPRD_EIC_LATCH_INTCLR, 1); break; case IRQ_TYPE_EDGE_RISING: case IRQ_TYPE_EDGE_FALLING: case IRQ_TYPE_EDGE_BOTH: state = sprd_eic_get(chip, offset); - if (state) + if (state) { sprd_eic_update(chip, offset, SPRD_EIC_LATCH_INTPOL, 0); - else + sprd_eic_update(chip, offset, + SPRD_EIC_LATCH_INTCLR, 1); + } else { sprd_eic_update(chip, offset, SPRD_EIC_LATCH_INTPOL, 1); + sprd_eic_update(chip, offset, + SPRD_EIC_LATCH_INTCLR, 1); + } break; default: return -ENOTSUPP; @@ -373,29 +387,34 @@ static int sprd_eic_irq_set_type(struct irq_data *data, unsigned int flow_type) sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTBOTH, 0); sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTMODE, 0); sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTPOL, 1); + sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTCLR, 1); irq_set_handler_locked(data, handle_edge_irq); break; case IRQ_TYPE_EDGE_FALLING: sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTBOTH, 0); sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTMODE, 0); sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTPOL, 0); + sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTCLR, 1); irq_set_handler_locked(data, handle_edge_irq); break; case IRQ_TYPE_EDGE_BOTH: sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTMODE, 0); sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTBOTH, 1); + sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTCLR, 1); irq_set_handler_locked(data, handle_edge_irq); break; case IRQ_TYPE_LEVEL_HIGH: sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTBOTH, 0); sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTMODE, 1); sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTPOL, 1); + sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTCLR, 1); irq_set_handler_locked(data, handle_level_irq); break; case IRQ_TYPE_LEVEL_LOW: sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTBOTH, 0); sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTMODE, 1); sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTPOL, 0); + sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTCLR, 1); irq_set_handler_locked(data, handle_level_irq); break; default: @@ -408,29 +427,34 @@ static int sprd_eic_irq_set_type(struct irq_data *data, unsigned int flow_type) sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTBOTH, 0); sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTMODE, 0); sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTPOL, 1); + sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTCLR, 1); irq_set_handler_locked(data, handle_edge_irq); break; case IRQ_TYPE_EDGE_FALLING: sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTBOTH, 0); sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTMODE, 0); sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTPOL, 0); + sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTCLR, 1); irq_set_handler_locked(data, handle_edge_irq); break; case IRQ_TYPE_EDGE_BOTH: sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTMODE, 0); sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTBOTH, 1); + sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTCLR, 1); irq_set_handler_locked(data, handle_edge_irq); break; case IRQ_TYPE_LEVEL_HIGH: sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTBOTH, 0); sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTMODE, 1); sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTPOL, 1); + sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTCLR, 1); irq_set_handler_locked(data, handle_level_irq); break; case IRQ_TYPE_LEVEL_LOW: sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTBOTH, 0); sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTMODE, 1); sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTPOL, 0); + sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTCLR, 1); irq_set_handler_locked(data, handle_level_irq); break; default: diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index 5d04720107ef5..4ab33d55aec47 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -1675,6 +1675,20 @@ static const struct dmi_system_id gpiolib_acpi_quirks[] __initconst = { .ignore_interrupt = "INT33FC:00@3", }, }, + { + /* + * Spurious wakeups from TP_ATTN# pin + * Found in BIOS 0.35 + * https://gitlab.freedesktop.org/drm/amd/-/issues/3073 + */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "GPD"), + DMI_MATCH(DMI_PRODUCT_NAME, "G1619-04"), + }, + .driver_data = &(struct acpi_gpiolib_dmi_quirk) { + .ignore_wake = "PNP0C50:00@8", + }, + }, {} /* Terminating entry */ }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 635b58553583b..2c35036e4ba25 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2197,8 +2197,6 @@ retry_init: pci_wake_from_d3(pdev, TRUE); - pci_wake_from_d3(pdev, TRUE); - /* * For runpm implemented via BACO, PMFW will handle the * timing for BACO in and out: diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 306252cd67fd7..c2b9dfc6451d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -3989,16 +3989,13 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) if (!amdgpu_sriov_vf(adev)) { snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix); - err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name); - /* don't check this. There are apparently firmwares in the wild with - * incorrect size in the header - */ - if (err == -ENODEV) - goto out; + err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); if (err) - dev_dbg(adev->dev, - "gfx10: amdgpu_ucode_request() failed \"%s\"\n", - fw_name); + goto out; + + /* don't validate this firmware. There are apparently firmwares + * in the wild with incorrect size in the header + */ rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; version_major = le16_to_cpu(rlc_hdr->header.header_version_major); version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); @@ -6575,7 +6572,7 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_device *adev, void *m, #ifdef __BIG_ENDIAN tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); #endif - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index b346eb0a0db11..d0c3ec9f4fb67 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -3807,7 +3807,7 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, (order_base_2(prop->queue_size / 4) - 1)); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); @@ -6353,6 +6353,9 @@ static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev, mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { + bitmap = i * adev->gfx.config.max_sh_per_se + j; + if (!((gfx_v11_0_get_sa_active_bitmap(adev) >> bitmap) & 1)) + continue; mask = 1; counter = 0; gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff, 0); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c index 8b7fed9135269..22cbfa1bdaddb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c @@ -170,6 +170,7 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1; + m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK; pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control); m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c index 15277f1d5cf0a..d722cbd317834 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c @@ -224,6 +224,7 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1; + m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK; pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control); m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index 87a1000b85729..cf0834ae53466 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -956,6 +956,11 @@ int dm_helper_dmub_aux_transfer_sync( struct aux_payload *payload, enum aux_return_code_type *operation_result) { + if (!link->hpd_status) { + *operation_result = AUX_RET_ERROR_HPD_DISCON; + return -1; + } + return amdgpu_dm_process_dmub_aux_transfer_sync(ctx, link->link_index, payload, operation_result); } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c index 7326b75658461..2618504e260e4 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c @@ -131,30 +131,27 @@ static int dcn314_get_active_display_cnt_wa( return display_count; } -static void dcn314_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context, bool disable) +static void dcn314_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context, + bool safe_to_lower, bool disable) { struct dc *dc = clk_mgr_base->ctx->dc; int i; for (i = 0; i < dc->res_pool->pipe_count; ++i) { - struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + struct pipe_ctx *pipe = safe_to_lower + ? &context->res_ctx.pipe_ctx[i] + : &dc->current_state->res_ctx.pipe_ctx[i]; if (pipe->top_pipe || pipe->prev_odm_pipe) continue; if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal))) { - struct stream_encoder *stream_enc = pipe->stream_res.stream_enc; - if (disable) { - if (stream_enc && stream_enc->funcs->disable_fifo) - pipe->stream_res.stream_enc->funcs->disable_fifo(stream_enc); + if (pipe->stream_res.tg && pipe->stream_res.tg->funcs->immediate_disable_crtc) + pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg); - pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg); reset_sync_context_for_pipe(dc, context, i); } else { pipe->stream_res.tg->funcs->enable_crtc(pipe->stream_res.tg); - - if (stream_enc && stream_enc->funcs->enable_fifo) - pipe->stream_res.stream_enc->funcs->enable_fifo(stream_enc); } } } @@ -252,11 +249,11 @@ void dcn314_update_clocks(struct clk_mgr *clk_mgr_base, } if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) { - dcn314_disable_otg_wa(clk_mgr_base, context, true); + dcn314_disable_otg_wa(clk_mgr_base, context, safe_to_lower, true); clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz; dcn314_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz); - dcn314_disable_otg_wa(clk_mgr_base, context, false); + dcn314_disable_otg_wa(clk_mgr_base, context, safe_to_lower, false); update_dispclk = true; } diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c index 35d087cf1980f..a96f0747628cc 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c @@ -873,11 +873,15 @@ bool link_set_dsc_pps_packet(struct pipe_ctx *pipe_ctx, bool enable, bool immedi { struct display_stream_compressor *dsc = pipe_ctx->stream_res.dsc; struct dc_stream_state *stream = pipe_ctx->stream; - DC_LOGGER_INIT(dsc->ctx->logger); - if (!pipe_ctx->stream->timing.flags.DSC || !dsc) + if (!pipe_ctx->stream->timing.flags.DSC) return false; + if (!dsc) + return false; + + DC_LOGGER_INIT(dsc->ctx->logger); + if (enable) { struct dsc_config dsc_cfg; uint8_t dsc_packed_pps[128]; diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.c index 5c9a30211c109..fc50931c2aecb 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.c @@ -205,7 +205,7 @@ enum dc_status core_link_read_dpcd( uint32_t extended_size; /* size of the remaining partitioned address space */ uint32_t size_left_to_read; - enum dc_status status; + enum dc_status status = DC_ERROR_UNEXPECTED; /* size of the next partition to be read from */ uint32_t partition_size; uint32_t data_index = 0; @@ -234,7 +234,7 @@ enum dc_status core_link_write_dpcd( { uint32_t partition_size; uint32_t data_index = 0; - enum dc_status status; + enum dc_status status = DC_ERROR_UNEXPECTED; while (size) { partition_size = dpcd_get_next_partition_size(address, size); diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c index 13c3d7ff61395..6f64aab18f07b 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c @@ -920,8 +920,8 @@ bool edp_get_replay_state(const struct dc_link *link, uint64_t *state) bool edp_setup_replay(struct dc_link *link, const struct dc_stream_state *stream) { /* To-do: Setup Replay */ - struct dc *dc = link->ctx->dc; - struct dmub_replay *replay = dc->res_pool->replay; + struct dc *dc; + struct dmub_replay *replay; int i; unsigned int panel_inst; struct replay_context replay_context = { 0 }; @@ -937,6 +937,10 @@ bool edp_setup_replay(struct dc_link *link, const struct dc_stream_state *stream if (!link) return false; + dc = link->ctx->dc; + + replay = dc->res_pool->replay; + if (!replay) return false; @@ -965,8 +969,7 @@ bool edp_setup_replay(struct dc_link *link, const struct dc_stream_state *stream replay_context.line_time_in_ns = lineTimeInNs; - if (replay) - link->replay_settings.replay_feature_enabled = + link->replay_settings.replay_feature_enabled = replay->funcs->replay_copy_settings(replay, link, &replay_context, panel_inst); if (link->replay_settings.replay_feature_enabled) { diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c index 2b3d5183818ac..3af67b729f6c7 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c @@ -841,6 +841,8 @@ bool is_psr_su_specific_panel(struct dc_link *link) isPSRSUSupported = false; else if (dpcd_caps->sink_dev_id_str[1] == 0x08 && dpcd_caps->sink_dev_id_str[0] == 0x03) isPSRSUSupported = false; + else if (dpcd_caps->sink_dev_id_str[1] == 0x08 && dpcd_caps->sink_dev_id_str[0] == 0x03) + isPSRSUSupported = false; else if (dpcd_caps->psr_info.force_psrsu_cap == 0x1) isPSRSUSupported = true; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index b47fd42414f46..56e4c312cb7a9 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -24,6 +24,7 @@ #include #include +#include #include #include "amdgpu.h" @@ -741,16 +742,8 @@ static int smu_late_init(void *handle) * handle the switch automatically. Driver involvement * is unnecessary. */ - if (!smu->dc_controlled_by_gpio) { - ret = smu_set_power_source(smu, - adev->pm.ac_power ? SMU_POWER_SOURCE_AC : - SMU_POWER_SOURCE_DC); - if (ret) { - dev_err(adev->dev, "Failed to switch to %s mode!\n", - adev->pm.ac_power ? "AC" : "DC"); - return ret; - } - } + adev->pm.ac_power = power_supply_is_system_supplied() > 0; + smu_set_ac_dc(smu); if ((adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 1)) || (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 3))) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c index aa4a5498a12f7..123c19bb62280 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c @@ -1441,10 +1441,12 @@ static int smu_v11_0_irq_process(struct amdgpu_device *adev, case 0x3: dev_dbg(adev->dev, "Switched to AC mode!\n"); schedule_work(&smu->interrupt_work); + adev->pm.ac_power = true; break; case 0x4: dev_dbg(adev->dev, "Switched to DC mode!\n"); schedule_work(&smu->interrupt_work); + adev->pm.ac_power = false; break; case 0x7: /* diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index 5355f621388bb..c097aed4722b9 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -1377,10 +1377,12 @@ static int smu_v13_0_irq_process(struct amdgpu_device *adev, case 0x3: dev_dbg(adev->dev, "Switched to AC mode!\n"); smu_v13_0_ack_ac_dc_interrupt(smu); + adev->pm.ac_power = true; break; case 0x4: dev_dbg(adev->dev, "Switched to DC mode!\n"); smu_v13_0_ack_ac_dc_interrupt(smu); + adev->pm.ac_power = false; break; case 0x7: /* diff --git a/drivers/gpu/drm/bridge/analogix/anx7625.c b/drivers/gpu/drm/bridge/analogix/anx7625.c index 51abe42c639e5..5168628f11cff 100644 --- a/drivers/gpu/drm/bridge/analogix/anx7625.c +++ b/drivers/gpu/drm/bridge/analogix/anx7625.c @@ -1741,6 +1741,7 @@ static ssize_t anx7625_aux_transfer(struct drm_dp_aux *aux, u8 request = msg->request & ~DP_AUX_I2C_MOT; int ret = 0; + mutex_lock(&ctx->aux_lock); pm_runtime_get_sync(dev); msg->reply = 0; switch (request) { @@ -1757,6 +1758,7 @@ static ssize_t anx7625_aux_transfer(struct drm_dp_aux *aux, msg->size, msg->buffer); pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); + mutex_unlock(&ctx->aux_lock); return ret; } @@ -2453,7 +2455,9 @@ static void anx7625_bridge_atomic_disable(struct drm_bridge *bridge, ctx->connector = NULL; anx7625_dp_stop(ctx); - pm_runtime_put_sync(dev); + mutex_lock(&ctx->aux_lock); + pm_runtime_put_sync_suspend(dev); + mutex_unlock(&ctx->aux_lock); } static enum drm_connector_status @@ -2647,6 +2651,7 @@ static int anx7625_i2c_probe(struct i2c_client *client) mutex_init(&platform->lock); mutex_init(&platform->hdcp_wq_lock); + mutex_init(&platform->aux_lock); INIT_DELAYED_WORK(&platform->hdcp_work, hdcp_check_work_func); platform->hdcp_workqueue = create_workqueue("hdcp workqueue"); diff --git a/drivers/gpu/drm/bridge/analogix/anx7625.h b/drivers/gpu/drm/bridge/analogix/anx7625.h index 5af819611ebce..80d3fb4e985f1 100644 --- a/drivers/gpu/drm/bridge/analogix/anx7625.h +++ b/drivers/gpu/drm/bridge/analogix/anx7625.h @@ -471,6 +471,8 @@ struct anx7625_data { struct workqueue_struct *hdcp_workqueue; /* Lock for hdcp work queue */ struct mutex hdcp_wq_lock; + /* Lock for aux transfer and disable */ + struct mutex aux_lock; char edid_block; struct display_timing dt; u8 display_timing_valid; diff --git a/drivers/gpu/drm/bridge/nxp-ptn3460.c b/drivers/gpu/drm/bridge/nxp-ptn3460.c index d81920227a8ae..7c0076e499533 100644 --- a/drivers/gpu/drm/bridge/nxp-ptn3460.c +++ b/drivers/gpu/drm/bridge/nxp-ptn3460.c @@ -54,13 +54,13 @@ static int ptn3460_read_bytes(struct ptn3460_bridge *ptn_bridge, char addr, int ret; ret = i2c_master_send(ptn_bridge->client, &addr, 1); - if (ret <= 0) { + if (ret < 0) { DRM_ERROR("Failed to send i2c command, ret=%d\n", ret); return ret; } ret = i2c_master_recv(ptn_bridge->client, buf, len); - if (ret <= 0) { + if (ret < 0) { DRM_ERROR("Failed to recv i2c data, ret=%d\n", ret); return ret; } @@ -78,7 +78,7 @@ static int ptn3460_write_byte(struct ptn3460_bridge *ptn_bridge, char addr, buf[1] = val; ret = i2c_master_send(ptn_bridge->client, buf, ARRAY_SIZE(buf)); - if (ret <= 0) { + if (ret < 0) { DRM_ERROR("Failed to send i2c command, ret=%d\n", ret); return ret; } diff --git a/drivers/gpu/drm/bridge/parade-ps8640.c b/drivers/gpu/drm/bridge/parade-ps8640.c index 541e4f5afc4c8..14d4dcf239da8 100644 --- a/drivers/gpu/drm/bridge/parade-ps8640.c +++ b/drivers/gpu/drm/bridge/parade-ps8640.c @@ -107,6 +107,7 @@ struct ps8640 { struct device_link *link; bool pre_enabled; bool need_post_hpd_delay; + struct mutex aux_lock; }; static const struct regmap_config ps8640_regmap_config[] = { @@ -345,11 +346,20 @@ static ssize_t ps8640_aux_transfer(struct drm_dp_aux *aux, struct device *dev = &ps_bridge->page[PAGE0_DP_CNTL]->dev; int ret; + mutex_lock(&ps_bridge->aux_lock); pm_runtime_get_sync(dev); + ret = _ps8640_wait_hpd_asserted(ps_bridge, 200 * 1000); + if (ret) { + pm_runtime_put_sync_suspend(dev); + goto exit; + } ret = ps8640_aux_transfer_msg(aux, msg); pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); +exit: + mutex_unlock(&ps_bridge->aux_lock); + return ret; } @@ -470,7 +480,18 @@ static void ps8640_atomic_post_disable(struct drm_bridge *bridge, ps_bridge->pre_enabled = false; ps8640_bridge_vdo_control(ps_bridge, DISABLE); + + /* + * The bridge seems to expect everything to be power cycled at the + * disable process, so grab a lock here to make sure + * ps8640_aux_transfer() is not holding a runtime PM reference and + * preventing the bridge from suspend. + */ + mutex_lock(&ps_bridge->aux_lock); + pm_runtime_put_sync_suspend(&ps_bridge->page[PAGE0_DP_CNTL]->dev); + + mutex_unlock(&ps_bridge->aux_lock); } static int ps8640_bridge_attach(struct drm_bridge *bridge, @@ -619,6 +640,8 @@ static int ps8640_probe(struct i2c_client *client) if (!ps_bridge) return -ENOMEM; + mutex_init(&ps_bridge->aux_lock); + ps_bridge->supplies[0].supply = "vdd12"; ps_bridge->supplies[1].supply = "vdd33"; ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(ps_bridge->supplies), diff --git a/drivers/gpu/drm/bridge/samsung-dsim.c b/drivers/gpu/drm/bridge/samsung-dsim.c index 19bdb32dbc9aa..f24666b481938 100644 --- a/drivers/gpu/drm/bridge/samsung-dsim.c +++ b/drivers/gpu/drm/bridge/samsung-dsim.c @@ -941,10 +941,6 @@ static int samsung_dsim_init_link(struct samsung_dsim *dsi) reg = samsung_dsim_read(dsi, DSIM_ESCMODE_REG); reg &= ~DSIM_STOP_STATE_CNT_MASK; reg |= DSIM_STOP_STATE_CNT(driver_data->reg_values[STOP_STATE_CNT]); - - if (!samsung_dsim_hw_is_exynos(dsi->plat_data->hw_type)) - reg |= DSIM_FORCE_STOP_STATE; - samsung_dsim_write(dsi, DSIM_ESCMODE_REG, reg); reg = DSIM_BTA_TIMEOUT(0xff) | DSIM_LPDR_TIMEOUT(0xffff); @@ -1401,18 +1397,6 @@ static void samsung_dsim_disable_irq(struct samsung_dsim *dsi) disable_irq(dsi->irq); } -static void samsung_dsim_set_stop_state(struct samsung_dsim *dsi, bool enable) -{ - u32 reg = samsung_dsim_read(dsi, DSIM_ESCMODE_REG); - - if (enable) - reg |= DSIM_FORCE_STOP_STATE; - else - reg &= ~DSIM_FORCE_STOP_STATE; - - samsung_dsim_write(dsi, DSIM_ESCMODE_REG, reg); -} - static int samsung_dsim_init(struct samsung_dsim *dsi) { const struct samsung_dsim_driver_data *driver_data = dsi->driver_data; @@ -1462,9 +1446,6 @@ static void samsung_dsim_atomic_pre_enable(struct drm_bridge *bridge, ret = samsung_dsim_init(dsi); if (ret) return; - - samsung_dsim_set_display_mode(dsi); - samsung_dsim_set_display_enable(dsi, true); } } @@ -1473,12 +1454,8 @@ static void samsung_dsim_atomic_enable(struct drm_bridge *bridge, { struct samsung_dsim *dsi = bridge_to_dsi(bridge); - if (samsung_dsim_hw_is_exynos(dsi->plat_data->hw_type)) { - samsung_dsim_set_display_mode(dsi); - samsung_dsim_set_display_enable(dsi, true); - } else { - samsung_dsim_set_stop_state(dsi, false); - } + samsung_dsim_set_display_mode(dsi); + samsung_dsim_set_display_enable(dsi, true); dsi->state |= DSIM_STATE_VIDOUT_AVAILABLE; } @@ -1491,9 +1468,6 @@ static void samsung_dsim_atomic_disable(struct drm_bridge *bridge, if (!(dsi->state & DSIM_STATE_ENABLED)) return; - if (!samsung_dsim_hw_is_exynos(dsi->plat_data->hw_type)) - samsung_dsim_set_stop_state(dsi, true); - dsi->state &= ~DSIM_STATE_VIDOUT_AVAILABLE; } @@ -1795,8 +1769,6 @@ static ssize_t samsung_dsim_host_transfer(struct mipi_dsi_host *host, if (ret) return ret; - samsung_dsim_set_stop_state(dsi, false); - ret = mipi_dsi_create_packet(&xfer.packet, msg); if (ret < 0) return ret; diff --git a/drivers/gpu/drm/bridge/sii902x.c b/drivers/gpu/drm/bridge/sii902x.c index 2bdc5b439bebd..4560ae9cbce15 100644 --- a/drivers/gpu/drm/bridge/sii902x.c +++ b/drivers/gpu/drm/bridge/sii902x.c @@ -1080,6 +1080,26 @@ static int sii902x_init(struct sii902x *sii902x) return ret; } + ret = sii902x_audio_codec_init(sii902x, dev); + if (ret) + return ret; + + i2c_set_clientdata(sii902x->i2c, sii902x); + + sii902x->i2cmux = i2c_mux_alloc(sii902x->i2c->adapter, dev, + 1, 0, I2C_MUX_GATE, + sii902x_i2c_bypass_select, + sii902x_i2c_bypass_deselect); + if (!sii902x->i2cmux) { + ret = -ENOMEM; + goto err_unreg_audio; + } + + sii902x->i2cmux->priv = sii902x; + ret = i2c_mux_add_adapter(sii902x->i2cmux, 0, 0, 0); + if (ret) + goto err_unreg_audio; + sii902x->bridge.funcs = &sii902x_bridge_funcs; sii902x->bridge.of_node = dev->of_node; sii902x->bridge.timings = &default_sii902x_timings; @@ -1090,19 +1110,13 @@ static int sii902x_init(struct sii902x *sii902x) drm_bridge_add(&sii902x->bridge); - sii902x_audio_codec_init(sii902x, dev); - - i2c_set_clientdata(sii902x->i2c, sii902x); + return 0; - sii902x->i2cmux = i2c_mux_alloc(sii902x->i2c->adapter, dev, - 1, 0, I2C_MUX_GATE, - sii902x_i2c_bypass_select, - sii902x_i2c_bypass_deselect); - if (!sii902x->i2cmux) - return -ENOMEM; +err_unreg_audio: + if (!PTR_ERR_OR_ZERO(sii902x->audio.pdev)) + platform_device_unregister(sii902x->audio.pdev); - sii902x->i2cmux->priv = sii902x; - return i2c_mux_add_adapter(sii902x->i2cmux, 0, 0, 0); + return ret; } static int sii902x_probe(struct i2c_client *client) @@ -1170,12 +1184,14 @@ static int sii902x_probe(struct i2c_client *client) } static void sii902x_remove(struct i2c_client *client) - { struct sii902x *sii902x = i2c_get_clientdata(client); - i2c_mux_del_adapters(sii902x->i2cmux); drm_bridge_remove(&sii902x->bridge); + i2c_mux_del_adapters(sii902x->i2cmux); + + if (!PTR_ERR_OR_ZERO(sii902x->audio.pdev)) + platform_device_unregister(sii902x->audio.pdev); } static const struct of_device_id sii902x_dt_ids[] = { diff --git a/drivers/gpu/drm/drm_damage_helper.c b/drivers/gpu/drm/drm_damage_helper.c index d8b2955e88fd0..afb02aae707b4 100644 --- a/drivers/gpu/drm/drm_damage_helper.c +++ b/drivers/gpu/drm/drm_damage_helper.c @@ -241,7 +241,8 @@ drm_atomic_helper_damage_iter_init(struct drm_atomic_helper_damage_iter *iter, iter->plane_src.x2 = (src.x2 >> 16) + !!(src.x2 & 0xFFFF); iter->plane_src.y2 = (src.y2 >> 16) + !!(src.y2 & 0xFFFF); - if (!iter->clips || !drm_rect_equals(&state->src, &old_state->src)) { + if (!iter->clips || state->ignore_damage_clips || + !drm_rect_equals(&state->src, &old_state->src)) { iter->clips = NULL; iter->num_clips = 0; iter->full_update = true; diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c index 24e7998d17313..311e179904a2a 100644 --- a/drivers/gpu/drm/drm_plane.c +++ b/drivers/gpu/drm/drm_plane.c @@ -678,6 +678,19 @@ int drm_mode_getplane_res(struct drm_device *dev, void *data, !file_priv->universal_planes) continue; + /* + * If we're running on a virtualized driver then, + * unless userspace advertizes support for the + * virtualized cursor plane, disable cursor planes + * because they'll be broken due to missing cursor + * hotspot info. + */ + if (plane->type == DRM_PLANE_TYPE_CURSOR && + drm_core_check_feature(dev, DRIVER_CURSOR_HOTSPOT) && + file_priv->atomic && + !file_priv->supports_virtualized_cursor_plane) + continue; + if (drm_lease_held(file_priv, plane->base.id)) { if (count < plane_resp->count_planes && put_user(plane->base.id, plane_ptr + count)) @@ -1387,6 +1400,7 @@ retry: out: if (fb) drm_framebuffer_put(fb); + fb = NULL; if (plane->old_fb) drm_framebuffer_put(plane->old_fb); plane->old_fb = NULL; diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c index 4d986077738b9..bce027552474a 100644 --- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c @@ -319,9 +319,9 @@ static void decon_win_set_bldmod(struct decon_context *ctx, unsigned int win, static void decon_win_set_pixfmt(struct decon_context *ctx, unsigned int win, struct drm_framebuffer *fb) { - struct exynos_drm_plane plane = ctx->planes[win]; + struct exynos_drm_plane *plane = &ctx->planes[win]; struct exynos_drm_plane_state *state = - to_exynos_plane_state(plane.base.state); + to_exynos_plane_state(plane->base.state); unsigned int alpha = state->base.alpha; unsigned int pixel_alpha; unsigned long val; diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c index 8dde7b1e9b35d..5bdc246f5fad0 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c @@ -661,9 +661,9 @@ static void fimd_win_set_bldmod(struct fimd_context *ctx, unsigned int win, static void fimd_win_set_pixfmt(struct fimd_context *ctx, unsigned int win, struct drm_framebuffer *fb, int width) { - struct exynos_drm_plane plane = ctx->planes[win]; + struct exynos_drm_plane *plane = &ctx->planes[win]; struct exynos_drm_plane_state *state = - to_exynos_plane_state(plane.base.state); + to_exynos_plane_state(plane->base.state); uint32_t pixel_format = fb->format->format; unsigned int alpha = state->base.alpha; u32 val = WINCONx_ENWIN; diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c index 34cdabc30b4f5..5302bebbe38c9 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c @@ -1342,7 +1342,7 @@ static int __maybe_unused gsc_runtime_resume(struct device *dev) for (i = 0; i < ctx->num_clocks; i++) { ret = clk_prepare_enable(ctx->clocks[i]); if (ret) { - while (--i > 0) + while (--i >= 0) clk_disable_unprepare(ctx->clocks[i]); return ret; } diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c index 5b8efe8e735a9..f7113b0321e4f 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.c +++ b/drivers/gpu/drm/i915/display/icl_dsi.c @@ -1155,6 +1155,7 @@ static void gen11_dsi_powerup_panel(struct intel_encoder *encoder) } intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_INIT_OTP); + intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DISPLAY_ON); /* ensure all panel commands dispatched before enabling transcoder */ wait_for_cmds_dispatched_to_panel(encoder); @@ -1255,8 +1256,6 @@ static void gen11_dsi_enable(struct intel_atomic_state *state, /* step6d: enable dsi transcoder */ gen11_dsi_enable_transcoder(encoder); - intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DISPLAY_ON); - /* step7: enable backlight */ intel_backlight_enable(crtc_state, conn_state); intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_BACKLIGHT_ON); diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 97d5eef10130d..5cf3db7058b98 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -674,7 +674,9 @@ static void hsw_activate_psr1(struct intel_dp *intel_dp) val |= EDP_PSR_IDLE_FRAMES(psr_compute_idle_frames(intel_dp)); - val |= EDP_PSR_MAX_SLEEP_TIME(max_sleep_time); + if (DISPLAY_VER(dev_priv) < 20) + val |= EDP_PSR_MAX_SLEEP_TIME(max_sleep_time); + if (IS_HASWELL(dev_priv)) val |= EDP_PSR_MIN_LINK_ENTRY_TIME_8_LINES; @@ -1398,9 +1400,21 @@ static void intel_psr_enable_source(struct intel_dp *intel_dp, * can rely on frontbuffer tracking. */ mask = EDP_PSR_DEBUG_MASK_MEMUP | - EDP_PSR_DEBUG_MASK_HPD | - EDP_PSR_DEBUG_MASK_LPSP | - EDP_PSR_DEBUG_MASK_MAX_SLEEP; + EDP_PSR_DEBUG_MASK_HPD; + + /* + * For some unknown reason on HSW non-ULT (or at least on + * Dell Latitude E6540) external displays start to flicker + * when PSR is enabled on the eDP. SR/PC6 residency is much + * higher than should be possible with an external display. + * As a workaround leave LPSP unmasked to prevent PSR entry + * when external displays are active. + */ + if (DISPLAY_VER(dev_priv) >= 8 || IS_HASWELL_ULT(dev_priv)) + mask |= EDP_PSR_DEBUG_MASK_LPSP; + + if (DISPLAY_VER(dev_priv) < 20) + mask |= EDP_PSR_DEBUG_MASK_MAX_SLEEP; /* * No separate pipe reg write mask on hsw/bdw, so have to unmask all diff --git a/drivers/gpu/drm/nouveau/nouveau_vmm.c b/drivers/gpu/drm/nouveau/nouveau_vmm.c index a6602c0126715..3dda885df5b22 100644 --- a/drivers/gpu/drm/nouveau/nouveau_vmm.c +++ b/drivers/gpu/drm/nouveau/nouveau_vmm.c @@ -108,6 +108,9 @@ nouveau_vma_new(struct nouveau_bo *nvbo, struct nouveau_vmm *vmm, } else { ret = nvif_vmm_get(&vmm->vmm, PTES, false, mem->mem.page, 0, mem->mem.size, &tmp); + if (ret) + goto done; + vma->addr = tmp.addr; } diff --git a/drivers/gpu/drm/panel/panel-edp.c b/drivers/gpu/drm/panel/panel-edp.c index 95c8472d878a9..7dc6fb7308ce3 100644 --- a/drivers/gpu/drm/panel/panel-edp.c +++ b/drivers/gpu/drm/panel/panel-edp.c @@ -973,6 +973,8 @@ static const struct panel_desc auo_b116xak01 = { }, .delay = { .hpd_absent = 200, + .unprepare = 500, + .enable = 50, }, }; @@ -1840,7 +1842,8 @@ static const struct edp_panel_entry edp_panels[] = { EDP_PANEL_ENTRY('A', 'U', 'O', 0x145c, &delay_200_500_e50, "B116XAB01.4"), EDP_PANEL_ENTRY('A', 'U', 'O', 0x1e9b, &delay_200_500_e50, "B133UAN02.1"), EDP_PANEL_ENTRY('A', 'U', 'O', 0x1ea5, &delay_200_500_e50, "B116XAK01.6"), - EDP_PANEL_ENTRY('A', 'U', 'O', 0x405c, &auo_b116xak01.delay, "B116XAK01"), + EDP_PANEL_ENTRY('A', 'U', 'O', 0x235c, &delay_200_500_e50, "B116XTN02.3"), + EDP_PANEL_ENTRY('A', 'U', 'O', 0x405c, &auo_b116xak01.delay, "B116XAK01.0"), EDP_PANEL_ENTRY('A', 'U', 'O', 0x582d, &delay_200_500_e50, "B133UAN01.0"), EDP_PANEL_ENTRY('A', 'U', 'O', 0x615c, &delay_200_500_e50, "B116XAN06.1"), EDP_PANEL_ENTRY('A', 'U', 'O', 0x8594, &delay_200_500_e50, "B133UAN01.0"), @@ -1848,8 +1851,10 @@ static const struct edp_panel_entry edp_panels[] = { EDP_PANEL_ENTRY('B', 'O', 'E', 0x0786, &delay_200_500_p2e80, "NV116WHM-T01"), EDP_PANEL_ENTRY('B', 'O', 'E', 0x07d1, &boe_nv133fhm_n61.delay, "NV133FHM-N61"), EDP_PANEL_ENTRY('B', 'O', 'E', 0x082d, &boe_nv133fhm_n61.delay, "NV133FHM-N62"), + EDP_PANEL_ENTRY('B', 'O', 'E', 0x09c3, &delay_200_500_e50, "NT116WHM-N21,836X2"), EDP_PANEL_ENTRY('B', 'O', 'E', 0x094b, &delay_200_500_e50, "NT116WHM-N21"), EDP_PANEL_ENTRY('B', 'O', 'E', 0x095f, &delay_200_500_e50, "NE135FBM-N41 v8.1"), + EDP_PANEL_ENTRY('B', 'O', 'E', 0x0979, &delay_200_500_e50, "NV116WHM-N49 V8.0"), EDP_PANEL_ENTRY('B', 'O', 'E', 0x098d, &boe_nv110wtm_n61.delay, "NV110WTM-N61"), EDP_PANEL_ENTRY('B', 'O', 'E', 0x09dd, &delay_200_500_e50, "NT116WHM-N21"), EDP_PANEL_ENTRY('B', 'O', 'E', 0x0a5d, &delay_200_500_e50, "NV116WHM-N45"), diff --git a/drivers/gpu/drm/panel/panel-samsung-s6d7aa0.c b/drivers/gpu/drm/panel/panel-samsung-s6d7aa0.c index ea5a857793827..f23d8832a1ad0 100644 --- a/drivers/gpu/drm/panel/panel-samsung-s6d7aa0.c +++ b/drivers/gpu/drm/panel/panel-samsung-s6d7aa0.c @@ -309,7 +309,7 @@ static const struct s6d7aa0_panel_desc s6d7aa0_lsl080al02_desc = { .off_func = s6d7aa0_lsl080al02_off, .drm_mode = &s6d7aa0_lsl080al02_mode, .mode_flags = MIPI_DSI_MODE_VSYNC_FLUSH | MIPI_DSI_MODE_VIDEO_NO_HFP, - .bus_flags = DRM_BUS_FLAG_DE_HIGH, + .bus_flags = 0, .has_backlight = false, .use_passwd3 = false, diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 6e46e55d29a9a..51f838befb321 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -3782,6 +3782,7 @@ static const struct panel_desc tianma_tm070jdhg30 = { }, .bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_SPWG, .connector_type = DRM_MODE_CONNECTOR_LVDS, + .bus_flags = DRM_BUS_FLAG_DE_HIGH, }; static const struct panel_desc tianma_tm070jvhg33 = { @@ -3794,6 +3795,7 @@ static const struct panel_desc tianma_tm070jvhg33 = { }, .bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_SPWG, .connector_type = DRM_MODE_CONNECTOR_LVDS, + .bus_flags = DRM_BUS_FLAG_DE_HIGH, }; static const struct display_timing tianma_tm070rvhg71_timing = { diff --git a/drivers/gpu/drm/qxl/qxl_drv.c b/drivers/gpu/drm/qxl/qxl_drv.c index b30ede1cf62d3..91930e84a9cd2 100644 --- a/drivers/gpu/drm/qxl/qxl_drv.c +++ b/drivers/gpu/drm/qxl/qxl_drv.c @@ -283,7 +283,7 @@ static const struct drm_ioctl_desc qxl_ioctls[] = { }; static struct drm_driver qxl_driver = { - .driver_features = DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC, + .driver_features = DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_CURSOR_HOTSPOT, .dumb_create = qxl_mode_dumb_create, .dumb_map_offset = drm_gem_ttm_dumb_map_offset, diff --git a/drivers/gpu/drm/tidss/tidss_crtc.c b/drivers/gpu/drm/tidss/tidss_crtc.c index 5e5e466f35d10..7c78c074e3a2e 100644 --- a/drivers/gpu/drm/tidss/tidss_crtc.c +++ b/drivers/gpu/drm/tidss/tidss_crtc.c @@ -169,13 +169,13 @@ static void tidss_crtc_atomic_flush(struct drm_crtc *crtc, struct tidss_device *tidss = to_tidss(ddev); unsigned long flags; - dev_dbg(ddev->dev, - "%s: %s enabled %d, needs modeset %d, event %p\n", __func__, - crtc->name, drm_atomic_crtc_needs_modeset(crtc->state), - crtc->state->enable, crtc->state->event); + dev_dbg(ddev->dev, "%s: %s is %sactive, %s modeset, event %p\n", + __func__, crtc->name, crtc->state->active ? "" : "not ", + drm_atomic_crtc_needs_modeset(crtc->state) ? "needs" : "doesn't need", + crtc->state->event); /* There is nothing to do if CRTC is not going to be enabled. */ - if (!crtc->state->enable) + if (!crtc->state->active) return; /* diff --git a/drivers/gpu/drm/vboxvideo/vbox_drv.c b/drivers/gpu/drm/vboxvideo/vbox_drv.c index 047b958123341..cd9e66a06596a 100644 --- a/drivers/gpu/drm/vboxvideo/vbox_drv.c +++ b/drivers/gpu/drm/vboxvideo/vbox_drv.c @@ -182,7 +182,7 @@ DEFINE_DRM_GEM_FOPS(vbox_fops); static const struct drm_driver driver = { .driver_features = - DRIVER_MODESET | DRIVER_GEM | DRIVER_ATOMIC, + DRIVER_MODESET | DRIVER_GEM | DRIVER_ATOMIC | DRIVER_CURSOR_HOTSPOT, .fops = &vbox_fops, .name = DRIVER_NAME, diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c index 644b8ee51009b..148f09aaf99a7 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.c +++ b/drivers/gpu/drm/virtio/virtgpu_drv.c @@ -177,7 +177,7 @@ static const struct drm_driver driver = { * out via drm_device::driver_features: */ .driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_RENDER | DRIVER_ATOMIC | - DRIVER_SYNCOBJ | DRIVER_SYNCOBJ_TIMELINE, + DRIVER_SYNCOBJ | DRIVER_SYNCOBJ_TIMELINE | DRIVER_CURSOR_HOTSPOT, .open = virtio_gpu_driver_open, .postclose = virtio_gpu_driver_postclose, diff --git a/drivers/gpu/drm/virtio/virtgpu_plane.c b/drivers/gpu/drm/virtio/virtgpu_plane.c index a2e045f3a0004..a1ef657eba077 100644 --- a/drivers/gpu/drm/virtio/virtgpu_plane.c +++ b/drivers/gpu/drm/virtio/virtgpu_plane.c @@ -79,6 +79,8 @@ static int virtio_gpu_plane_atomic_check(struct drm_plane *plane, { struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, plane); + struct drm_plane_state *old_plane_state = drm_atomic_get_old_plane_state(state, + plane); bool is_cursor = plane->type == DRM_PLANE_TYPE_CURSOR; struct drm_crtc_state *crtc_state; int ret; @@ -86,6 +88,14 @@ static int virtio_gpu_plane_atomic_check(struct drm_plane *plane, if (!new_plane_state->fb || WARN_ON(!new_plane_state->crtc)) return 0; + /* + * Ignore damage clips if the framebuffer attached to the plane's state + * has changed since the last plane update (page-flip). In this case, a + * full plane update should happen because uploads are done per-buffer. + */ + if (old_plane_state->fb != new_plane_state->fb) + new_plane_state->ignore_damage_clips = true; + crtc_state = drm_atomic_get_crtc_state(state, new_plane_state->crtc); if (IS_ERR(crtc_state)) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 8b24ecf60e3ec..d3e308fdfd5be 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -1611,7 +1611,7 @@ static const struct file_operations vmwgfx_driver_fops = { static const struct drm_driver driver = { .driver_features = - DRIVER_MODESET | DRIVER_RENDER | DRIVER_ATOMIC | DRIVER_GEM, + DRIVER_MODESET | DRIVER_RENDER | DRIVER_ATOMIC | DRIVER_GEM | DRIVER_CURSOR_HOTSPOT, .ioctls = vmw_ioctls, .num_ioctls = ARRAY_SIZE(vmw_ioctls), .master_set = vmw_master_set, diff --git a/drivers/iio/adc/ad7091r-base.c b/drivers/iio/adc/ad7091r-base.c index 0e5d3d2e9c985..76002b91c86a4 100644 --- a/drivers/iio/adc/ad7091r-base.c +++ b/drivers/iio/adc/ad7091r-base.c @@ -6,6 +6,7 @@ */ #include +#include #include #include #include @@ -28,6 +29,7 @@ #define AD7091R_REG_RESULT_CONV_RESULT(x) ((x) & 0xfff) /* AD7091R_REG_CONF */ +#define AD7091R_REG_CONF_ALERT_EN BIT(4) #define AD7091R_REG_CONF_AUTO BIT(8) #define AD7091R_REG_CONF_CMD BIT(10) @@ -49,6 +51,27 @@ struct ad7091r_state { struct mutex lock; /*lock to prevent concurent reads */ }; +const struct iio_event_spec ad7091r_events[] = { + { + .type = IIO_EV_TYPE_THRESH, + .dir = IIO_EV_DIR_RISING, + .mask_separate = BIT(IIO_EV_INFO_VALUE) | + BIT(IIO_EV_INFO_ENABLE), + }, + { + .type = IIO_EV_TYPE_THRESH, + .dir = IIO_EV_DIR_FALLING, + .mask_separate = BIT(IIO_EV_INFO_VALUE) | + BIT(IIO_EV_INFO_ENABLE), + }, + { + .type = IIO_EV_TYPE_THRESH, + .dir = IIO_EV_DIR_EITHER, + .mask_separate = BIT(IIO_EV_INFO_HYSTERESIS), + }, +}; +EXPORT_SYMBOL_NS_GPL(ad7091r_events, IIO_AD7091R); + static int ad7091r_set_mode(struct ad7091r_state *st, enum ad7091r_mode mode) { int ret, conf; @@ -168,8 +191,142 @@ unlock: return ret; } +static int ad7091r_read_event_config(struct iio_dev *indio_dev, + const struct iio_chan_spec *chan, + enum iio_event_type type, + enum iio_event_direction dir) +{ + struct ad7091r_state *st = iio_priv(indio_dev); + int val, ret; + + switch (dir) { + case IIO_EV_DIR_RISING: + ret = regmap_read(st->map, + AD7091R_REG_CH_HIGH_LIMIT(chan->channel), + &val); + if (ret) + return ret; + return val != AD7091R_HIGH_LIMIT; + case IIO_EV_DIR_FALLING: + ret = regmap_read(st->map, + AD7091R_REG_CH_LOW_LIMIT(chan->channel), + &val); + if (ret) + return ret; + return val != AD7091R_LOW_LIMIT; + default: + return -EINVAL; + } +} + +static int ad7091r_write_event_config(struct iio_dev *indio_dev, + const struct iio_chan_spec *chan, + enum iio_event_type type, + enum iio_event_direction dir, int state) +{ + struct ad7091r_state *st = iio_priv(indio_dev); + + if (state) { + return regmap_set_bits(st->map, AD7091R_REG_CONF, + AD7091R_REG_CONF_ALERT_EN); + } else { + /* + * Set thresholds either to 0 or to 2^12 - 1 as appropriate to + * prevent alerts and thus disable event generation. + */ + switch (dir) { + case IIO_EV_DIR_RISING: + return regmap_write(st->map, + AD7091R_REG_CH_HIGH_LIMIT(chan->channel), + AD7091R_HIGH_LIMIT); + case IIO_EV_DIR_FALLING: + return regmap_write(st->map, + AD7091R_REG_CH_LOW_LIMIT(chan->channel), + AD7091R_LOW_LIMIT); + default: + return -EINVAL; + } + } +} + +static int ad7091r_read_event_value(struct iio_dev *indio_dev, + const struct iio_chan_spec *chan, + enum iio_event_type type, + enum iio_event_direction dir, + enum iio_event_info info, int *val, int *val2) +{ + struct ad7091r_state *st = iio_priv(indio_dev); + int ret; + + switch (info) { + case IIO_EV_INFO_VALUE: + switch (dir) { + case IIO_EV_DIR_RISING: + ret = regmap_read(st->map, + AD7091R_REG_CH_HIGH_LIMIT(chan->channel), + val); + if (ret) + return ret; + return IIO_VAL_INT; + case IIO_EV_DIR_FALLING: + ret = regmap_read(st->map, + AD7091R_REG_CH_LOW_LIMIT(chan->channel), + val); + if (ret) + return ret; + return IIO_VAL_INT; + default: + return -EINVAL; + } + case IIO_EV_INFO_HYSTERESIS: + ret = regmap_read(st->map, + AD7091R_REG_CH_HYSTERESIS(chan->channel), + val); + if (ret) + return ret; + return IIO_VAL_INT; + default: + return -EINVAL; + } +} + +static int ad7091r_write_event_value(struct iio_dev *indio_dev, + const struct iio_chan_spec *chan, + enum iio_event_type type, + enum iio_event_direction dir, + enum iio_event_info info, int val, int val2) +{ + struct ad7091r_state *st = iio_priv(indio_dev); + + switch (info) { + case IIO_EV_INFO_VALUE: + switch (dir) { + case IIO_EV_DIR_RISING: + return regmap_write(st->map, + AD7091R_REG_CH_HIGH_LIMIT(chan->channel), + val); + case IIO_EV_DIR_FALLING: + return regmap_write(st->map, + AD7091R_REG_CH_LOW_LIMIT(chan->channel), + val); + default: + return -EINVAL; + } + case IIO_EV_INFO_HYSTERESIS: + return regmap_write(st->map, + AD7091R_REG_CH_HYSTERESIS(chan->channel), + val); + default: + return -EINVAL; + } +} + static const struct iio_info ad7091r_info = { .read_raw = ad7091r_read_raw, + .read_event_config = &ad7091r_read_event_config, + .write_event_config = &ad7091r_write_event_config, + .read_event_value = &ad7091r_read_event_value, + .write_event_value = &ad7091r_write_event_value, }; static irqreturn_t ad7091r_event_handler(int irq, void *private) @@ -232,6 +389,11 @@ int ad7091r_probe(struct device *dev, const char *name, iio_dev->channels = chip_info->channels; if (irq) { + ret = regmap_update_bits(st->map, AD7091R_REG_CONF, + AD7091R_REG_CONF_ALERT_EN, BIT(4)); + if (ret) + return ret; + ret = devm_request_threaded_irq(dev, irq, NULL, ad7091r_event_handler, IRQF_TRIGGER_FALLING | IRQF_ONESHOT, name, iio_dev); @@ -243,7 +405,14 @@ int ad7091r_probe(struct device *dev, const char *name, if (IS_ERR(st->vref)) { if (PTR_ERR(st->vref) == -EPROBE_DEFER) return -EPROBE_DEFER; + st->vref = NULL; + /* Enable internal vref */ + ret = regmap_set_bits(st->map, AD7091R_REG_CONF, + AD7091R_REG_CONF_INT_VREF); + if (ret) + return dev_err_probe(st->dev, ret, + "Error on enable internal reference\n"); } else { ret = regulator_enable(st->vref); if (ret) diff --git a/drivers/iio/adc/ad7091r-base.h b/drivers/iio/adc/ad7091r-base.h index 509748aef9b19..b9e1c8bf3440a 100644 --- a/drivers/iio/adc/ad7091r-base.h +++ b/drivers/iio/adc/ad7091r-base.h @@ -8,6 +8,12 @@ #ifndef __DRIVERS_IIO_ADC_AD7091R_BASE_H__ #define __DRIVERS_IIO_ADC_AD7091R_BASE_H__ +#define AD7091R_REG_CONF_INT_VREF BIT(0) + +/* AD7091R_REG_CH_LIMIT */ +#define AD7091R_HIGH_LIMIT 0xFFF +#define AD7091R_LOW_LIMIT 0x0 + struct device; struct ad7091r_state; @@ -17,6 +23,8 @@ struct ad7091r_chip_info { unsigned int vref_mV; }; +extern const struct iio_event_spec ad7091r_events[3]; + extern const struct regmap_config ad7091r_regmap_config; int ad7091r_probe(struct device *dev, const char *name, diff --git a/drivers/iio/adc/ad7091r5.c b/drivers/iio/adc/ad7091r5.c index 2f048527b7b78..dae98c95ebb87 100644 --- a/drivers/iio/adc/ad7091r5.c +++ b/drivers/iio/adc/ad7091r5.c @@ -12,26 +12,6 @@ #include "ad7091r-base.h" -static const struct iio_event_spec ad7091r5_events[] = { - { - .type = IIO_EV_TYPE_THRESH, - .dir = IIO_EV_DIR_RISING, - .mask_separate = BIT(IIO_EV_INFO_VALUE) | - BIT(IIO_EV_INFO_ENABLE), - }, - { - .type = IIO_EV_TYPE_THRESH, - .dir = IIO_EV_DIR_FALLING, - .mask_separate = BIT(IIO_EV_INFO_VALUE) | - BIT(IIO_EV_INFO_ENABLE), - }, - { - .type = IIO_EV_TYPE_THRESH, - .dir = IIO_EV_DIR_EITHER, - .mask_separate = BIT(IIO_EV_INFO_HYSTERESIS), - }, -}; - #define AD7091R_CHANNEL(idx, bits, ev, num_ev) { \ .type = IIO_VOLTAGE, \ .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), \ @@ -44,10 +24,10 @@ static const struct iio_event_spec ad7091r5_events[] = { .scan_type.realbits = bits, \ } static const struct iio_chan_spec ad7091r5_channels_irq[] = { - AD7091R_CHANNEL(0, 12, ad7091r5_events, ARRAY_SIZE(ad7091r5_events)), - AD7091R_CHANNEL(1, 12, ad7091r5_events, ARRAY_SIZE(ad7091r5_events)), - AD7091R_CHANNEL(2, 12, ad7091r5_events, ARRAY_SIZE(ad7091r5_events)), - AD7091R_CHANNEL(3, 12, ad7091r5_events, ARRAY_SIZE(ad7091r5_events)), + AD7091R_CHANNEL(0, 12, ad7091r_events, ARRAY_SIZE(ad7091r_events)), + AD7091R_CHANNEL(1, 12, ad7091r_events, ARRAY_SIZE(ad7091r_events)), + AD7091R_CHANNEL(2, 12, ad7091r_events, ARRAY_SIZE(ad7091r_events)), + AD7091R_CHANNEL(3, 12, ad7091r_events, ARRAY_SIZE(ad7091r_events)), }; static const struct iio_chan_spec ad7091r5_channels_noirq[] = { diff --git a/drivers/media/common/videobuf2/videobuf2-dma-sg.c b/drivers/media/common/videobuf2/videobuf2-dma-sg.c index 28f3fdfe23a29..6975a71d740f6 100644 --- a/drivers/media/common/videobuf2/videobuf2-dma-sg.c +++ b/drivers/media/common/videobuf2/videobuf2-dma-sg.c @@ -487,9 +487,15 @@ vb2_dma_sg_dmabuf_ops_end_cpu_access(struct dma_buf *dbuf, static int vb2_dma_sg_dmabuf_ops_vmap(struct dma_buf *dbuf, struct iosys_map *map) { - struct vb2_dma_sg_buf *buf = dbuf->priv; + struct vb2_dma_sg_buf *buf; + void *vaddr; + + buf = dbuf->priv; + vaddr = vb2_dma_sg_vaddr(buf->vb, buf); + if (!vaddr) + return -EINVAL; - iosys_map_set_vaddr(map, buf->vaddr); + iosys_map_set_vaddr(map, vaddr); return 0; } diff --git a/drivers/media/i2c/imx290.c b/drivers/media/i2c/imx290.c index 29098612813cb..c6fea5837a19f 100644 --- a/drivers/media/i2c/imx290.c +++ b/drivers/media/i2c/imx290.c @@ -41,18 +41,18 @@ #define IMX290_WINMODE_720P (1 << 4) #define IMX290_WINMODE_CROP (4 << 4) #define IMX290_FR_FDG_SEL CCI_REG8(0x3009) -#define IMX290_BLKLEVEL CCI_REG16(0x300a) +#define IMX290_BLKLEVEL CCI_REG16_LE(0x300a) #define IMX290_GAIN CCI_REG8(0x3014) -#define IMX290_VMAX CCI_REG24(0x3018) +#define IMX290_VMAX CCI_REG24_LE(0x3018) #define IMX290_VMAX_MAX 0x3ffff -#define IMX290_HMAX CCI_REG16(0x301c) +#define IMX290_HMAX CCI_REG16_LE(0x301c) #define IMX290_HMAX_MAX 0xffff -#define IMX290_SHS1 CCI_REG24(0x3020) +#define IMX290_SHS1 CCI_REG24_LE(0x3020) #define IMX290_WINWV_OB CCI_REG8(0x303a) -#define IMX290_WINPV CCI_REG16(0x303c) -#define IMX290_WINWV CCI_REG16(0x303e) -#define IMX290_WINPH CCI_REG16(0x3040) -#define IMX290_WINWH CCI_REG16(0x3042) +#define IMX290_WINPV CCI_REG16_LE(0x303c) +#define IMX290_WINWV CCI_REG16_LE(0x303e) +#define IMX290_WINPH CCI_REG16_LE(0x3040) +#define IMX290_WINWH CCI_REG16_LE(0x3042) #define IMX290_OUT_CTRL CCI_REG8(0x3046) #define IMX290_ODBIT_10BIT (0 << 0) #define IMX290_ODBIT_12BIT (1 << 0) @@ -78,28 +78,28 @@ #define IMX290_ADBIT2 CCI_REG8(0x317c) #define IMX290_ADBIT2_10BIT 0x12 #define IMX290_ADBIT2_12BIT 0x00 -#define IMX290_CHIP_ID CCI_REG16(0x319a) +#define IMX290_CHIP_ID CCI_REG16_LE(0x319a) #define IMX290_ADBIT3 CCI_REG8(0x31ec) #define IMX290_ADBIT3_10BIT 0x37 #define IMX290_ADBIT3_12BIT 0x0e #define IMX290_REPETITION CCI_REG8(0x3405) #define IMX290_PHY_LANE_NUM CCI_REG8(0x3407) #define IMX290_OPB_SIZE_V CCI_REG8(0x3414) -#define IMX290_Y_OUT_SIZE CCI_REG16(0x3418) -#define IMX290_CSI_DT_FMT CCI_REG16(0x3441) +#define IMX290_Y_OUT_SIZE CCI_REG16_LE(0x3418) +#define IMX290_CSI_DT_FMT CCI_REG16_LE(0x3441) #define IMX290_CSI_DT_FMT_RAW10 0x0a0a #define IMX290_CSI_DT_FMT_RAW12 0x0c0c #define IMX290_CSI_LANE_MODE CCI_REG8(0x3443) -#define IMX290_EXTCK_FREQ CCI_REG16(0x3444) -#define IMX290_TCLKPOST CCI_REG16(0x3446) -#define IMX290_THSZERO CCI_REG16(0x3448) -#define IMX290_THSPREPARE CCI_REG16(0x344a) -#define IMX290_TCLKTRAIL CCI_REG16(0x344c) -#define IMX290_THSTRAIL CCI_REG16(0x344e) -#define IMX290_TCLKZERO CCI_REG16(0x3450) -#define IMX290_TCLKPREPARE CCI_REG16(0x3452) -#define IMX290_TLPX CCI_REG16(0x3454) -#define IMX290_X_OUT_SIZE CCI_REG16(0x3472) +#define IMX290_EXTCK_FREQ CCI_REG16_LE(0x3444) +#define IMX290_TCLKPOST CCI_REG16_LE(0x3446) +#define IMX290_THSZERO CCI_REG16_LE(0x3448) +#define IMX290_THSPREPARE CCI_REG16_LE(0x344a) +#define IMX290_TCLKTRAIL CCI_REG16_LE(0x344c) +#define IMX290_THSTRAIL CCI_REG16_LE(0x344e) +#define IMX290_TCLKZERO CCI_REG16_LE(0x3450) +#define IMX290_TCLKPREPARE CCI_REG16_LE(0x3452) +#define IMX290_TLPX CCI_REG16_LE(0x3454) +#define IMX290_X_OUT_SIZE CCI_REG16_LE(0x3472) #define IMX290_INCKSEL7 CCI_REG8(0x3480) #define IMX290_PGCTRL_REGEN BIT(0) diff --git a/drivers/media/i2c/imx355.c b/drivers/media/i2c/imx355.c index 9c79ae8dc8428..059a41b7eefc4 100644 --- a/drivers/media/i2c/imx355.c +++ b/drivers/media/i2c/imx355.c @@ -1788,10 +1788,6 @@ static int imx355_probe(struct i2c_client *client) goto error_handler_free; } - ret = v4l2_async_register_subdev_sensor(&imx355->sd); - if (ret < 0) - goto error_media_entity; - /* * Device is already turned on by i2c-core with ACPI domain PM. * Enable runtime PM and turn off the device. @@ -1800,9 +1796,15 @@ static int imx355_probe(struct i2c_client *client) pm_runtime_enable(&client->dev); pm_runtime_idle(&client->dev); + ret = v4l2_async_register_subdev_sensor(&imx355->sd); + if (ret < 0) + goto error_media_entity_runtime_pm; + return 0; -error_media_entity: +error_media_entity_runtime_pm: + pm_runtime_disable(&client->dev); + pm_runtime_set_suspended(&client->dev); media_entity_cleanup(&imx355->sd.entity); error_handler_free: diff --git a/drivers/media/i2c/ov01a10.c b/drivers/media/i2c/ov01a10.c index 2b9e1b3a3bf4f..9afe9bf50334a 100644 --- a/drivers/media/i2c/ov01a10.c +++ b/drivers/media/i2c/ov01a10.c @@ -907,6 +907,7 @@ static void ov01a10_remove(struct i2c_client *client) v4l2_ctrl_handler_free(sd->ctrl_handler); pm_runtime_disable(&client->dev); + pm_runtime_set_suspended(&client->dev); } static int ov01a10_probe(struct i2c_client *client) @@ -953,17 +954,26 @@ static int ov01a10_probe(struct i2c_client *client) goto err_media_entity_cleanup; } + /* + * Device is already turned on by i2c-core with ACPI domain PM. + * Enable runtime PM and turn off the device. + */ + pm_runtime_set_active(&client->dev); + pm_runtime_enable(dev); + pm_runtime_idle(dev); + ret = v4l2_async_register_subdev_sensor(&ov01a10->sd); if (ret < 0) { dev_err(dev, "Failed to register subdev: %d\n", ret); - goto err_media_entity_cleanup; + goto err_pm_disable; } - pm_runtime_enable(dev); - pm_runtime_idle(dev); - return 0; +err_pm_disable: + pm_runtime_disable(dev); + pm_runtime_set_suspended(&client->dev); + err_media_entity_cleanup: media_entity_cleanup(&ov01a10->sd.entity); diff --git a/drivers/media/i2c/ov13b10.c b/drivers/media/i2c/ov13b10.c index 8ebdb32dd3dbc..124eaa4f9e2ca 100644 --- a/drivers/media/i2c/ov13b10.c +++ b/drivers/media/i2c/ov13b10.c @@ -1536,24 +1536,27 @@ static int ov13b10_probe(struct i2c_client *client) goto error_handler_free; } - ret = v4l2_async_register_subdev_sensor(&ov13b->sd); - if (ret < 0) - goto error_media_entity; /* * Device is already turned on by i2c-core with ACPI domain PM. * Enable runtime PM and turn off the device. */ - /* Set the device's state to active if it's in D0 state. */ if (full_power) pm_runtime_set_active(&client->dev); pm_runtime_enable(&client->dev); pm_runtime_idle(&client->dev); + ret = v4l2_async_register_subdev_sensor(&ov13b->sd); + if (ret < 0) + goto error_media_entity_runtime_pm; + return 0; -error_media_entity: +error_media_entity_runtime_pm: + pm_runtime_disable(&client->dev); + if (full_power) + pm_runtime_set_suspended(&client->dev); media_entity_cleanup(&ov13b->sd.entity); error_handler_free: @@ -1576,6 +1579,7 @@ static void ov13b10_remove(struct i2c_client *client) ov13b10_free_controls(ov13b); pm_runtime_disable(&client->dev); + pm_runtime_set_suspended(&client->dev); } static DEFINE_RUNTIME_DEV_PM_OPS(ov13b10_pm_ops, ov13b10_suspend, diff --git a/drivers/media/i2c/ov9734.c b/drivers/media/i2c/ov9734.c index b6244772bc593..b36fc0fedad48 100644 --- a/drivers/media/i2c/ov9734.c +++ b/drivers/media/i2c/ov9734.c @@ -939,6 +939,7 @@ static void ov9734_remove(struct i2c_client *client) media_entity_cleanup(&sd->entity); v4l2_ctrl_handler_free(sd->ctrl_handler); pm_runtime_disable(&client->dev); + pm_runtime_set_suspended(&client->dev); mutex_destroy(&ov9734->mutex); } @@ -984,13 +985,6 @@ static int ov9734_probe(struct i2c_client *client) goto probe_error_v4l2_ctrl_handler_free; } - ret = v4l2_async_register_subdev_sensor(&ov9734->sd); - if (ret < 0) { - dev_err(&client->dev, "failed to register V4L2 subdev: %d", - ret); - goto probe_error_media_entity_cleanup; - } - /* * Device is already turned on by i2c-core with ACPI domain PM. * Enable runtime PM and turn off the device. @@ -999,9 +993,18 @@ static int ov9734_probe(struct i2c_client *client) pm_runtime_enable(&client->dev); pm_runtime_idle(&client->dev); + ret = v4l2_async_register_subdev_sensor(&ov9734->sd); + if (ret < 0) { + dev_err(&client->dev, "failed to register V4L2 subdev: %d", + ret); + goto probe_error_media_entity_cleanup_pm; + } + return 0; -probe_error_media_entity_cleanup: +probe_error_media_entity_cleanup_pm: + pm_runtime_disable(&client->dev); + pm_runtime_set_suspended(&client->dev); media_entity_cleanup(&ov9734->sd.entity); probe_error_v4l2_ctrl_handler_free: diff --git a/drivers/media/i2c/st-mipid02.c b/drivers/media/i2c/st-mipid02.c index fa27638edc072..dab14787116b6 100644 --- a/drivers/media/i2c/st-mipid02.c +++ b/drivers/media/i2c/st-mipid02.c @@ -770,6 +770,7 @@ static void mipid02_set_fmt_sink(struct v4l2_subdev *sd, struct v4l2_subdev_format *format) { struct mipid02_dev *bridge = to_mipid02_dev(sd); + struct v4l2_subdev_format source_fmt; struct v4l2_mbus_framefmt *fmt; format->format.code = get_fmt_code(format->format.code); @@ -781,8 +782,12 @@ static void mipid02_set_fmt_sink(struct v4l2_subdev *sd, *fmt = format->format; - /* Propagate the format change to the source pad */ - mipid02_set_fmt_source(sd, sd_state, format); + /* + * Propagate the format change to the source pad, taking + * care not to update the format pointer given back to user + */ + source_fmt = *format; + mipid02_set_fmt_source(sd, sd_state, &source_fmt); } static int mipid02_set_fmt(struct v4l2_subdev *sd, diff --git a/drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c b/drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c index 60425c99a2b8b..c3456c700c07e 100644 --- a/drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c +++ b/drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c @@ -1021,13 +1021,13 @@ static void mtk_jpeg_dec_device_run(void *priv) if (ret < 0) goto dec_end; - schedule_delayed_work(&jpeg->job_timeout_work, - msecs_to_jiffies(MTK_JPEG_HW_TIMEOUT_MSEC)); - mtk_jpeg_set_dec_src(ctx, &src_buf->vb2_buf, &bs); if (mtk_jpeg_set_dec_dst(ctx, &jpeg_src_buf->dec_param, &dst_buf->vb2_buf, &fb)) goto dec_end; + schedule_delayed_work(&jpeg->job_timeout_work, + msecs_to_jiffies(MTK_JPEG_HW_TIMEOUT_MSEC)); + spin_lock_irqsave(&jpeg->hw_lock, flags); mtk_jpeg_dec_reset(jpeg->reg_base); mtk_jpeg_dec_set_config(jpeg->reg_base, @@ -1749,9 +1749,6 @@ retry_select: v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx); v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx); - schedule_delayed_work(&comp_jpeg[hw_id]->job_timeout_work, - msecs_to_jiffies(MTK_JPEG_HW_TIMEOUT_MSEC)); - mtk_jpeg_set_dec_src(ctx, &src_buf->vb2_buf, &bs); if (mtk_jpeg_set_dec_dst(ctx, &jpeg_src_buf->dec_param, @@ -1761,6 +1758,9 @@ retry_select: goto setdst_end; } + schedule_delayed_work(&comp_jpeg[hw_id]->job_timeout_work, + msecs_to_jiffies(MTK_JPEG_HW_TIMEOUT_MSEC)); + spin_lock_irqsave(&comp_jpeg[hw_id]->hw_lock, flags); ctx->total_frame_num++; mtk_jpeg_dec_reset(comp_jpeg[hw_id]->reg_base); diff --git a/drivers/media/v4l2-core/v4l2-cci.c b/drivers/media/v4l2-core/v4l2-cci.c index bc2dbec019b04..10005c80f43b5 100644 --- a/drivers/media/v4l2-core/v4l2-cci.c +++ b/drivers/media/v4l2-core/v4l2-cci.c @@ -18,6 +18,7 @@ int cci_read(struct regmap *map, u32 reg, u64 *val, int *err) { + bool little_endian; unsigned int len; u8 buf[8]; int ret; @@ -25,8 +26,9 @@ int cci_read(struct regmap *map, u32 reg, u64 *val, int *err) if (err && *err) return *err; - len = FIELD_GET(CCI_REG_WIDTH_MASK, reg); - reg = FIELD_GET(CCI_REG_ADDR_MASK, reg); + little_endian = reg & CCI_REG_LE; + len = CCI_REG_WIDTH_BYTES(reg); + reg = CCI_REG_ADDR(reg); ret = regmap_bulk_read(map, reg, buf, len); if (ret) { @@ -40,16 +42,28 @@ int cci_read(struct regmap *map, u32 reg, u64 *val, int *err) *val = buf[0]; break; case 2: - *val = get_unaligned_be16(buf); + if (little_endian) + *val = get_unaligned_le16(buf); + else + *val = get_unaligned_be16(buf); break; case 3: - *val = get_unaligned_be24(buf); + if (little_endian) + *val = get_unaligned_le24(buf); + else + *val = get_unaligned_be24(buf); break; case 4: - *val = get_unaligned_be32(buf); + if (little_endian) + *val = get_unaligned_le32(buf); + else + *val = get_unaligned_be32(buf); break; case 8: - *val = get_unaligned_be64(buf); + if (little_endian) + *val = get_unaligned_le64(buf); + else + *val = get_unaligned_be64(buf); break; default: dev_err(regmap_get_device(map), "Error invalid reg-width %u for reg 0x%04x\n", @@ -68,6 +82,7 @@ EXPORT_SYMBOL_GPL(cci_read); int cci_write(struct regmap *map, u32 reg, u64 val, int *err) { + bool little_endian; unsigned int len; u8 buf[8]; int ret; @@ -75,24 +90,37 @@ int cci_write(struct regmap *map, u32 reg, u64 val, int *err) if (err && *err) return *err; - len = FIELD_GET(CCI_REG_WIDTH_MASK, reg); - reg = FIELD_GET(CCI_REG_ADDR_MASK, reg); + little_endian = reg & CCI_REG_LE; + len = CCI_REG_WIDTH_BYTES(reg); + reg = CCI_REG_ADDR(reg); switch (len) { case 1: buf[0] = val; break; case 2: - put_unaligned_be16(val, buf); + if (little_endian) + put_unaligned_le16(val, buf); + else + put_unaligned_be16(val, buf); break; case 3: - put_unaligned_be24(val, buf); + if (little_endian) + put_unaligned_le24(val, buf); + else + put_unaligned_be24(val, buf); break; case 4: - put_unaligned_be32(val, buf); + if (little_endian) + put_unaligned_le32(val, buf); + else + put_unaligned_be32(val, buf); break; case 8: - put_unaligned_be64(val, buf); + if (little_endian) + put_unaligned_le64(val, buf); + else + put_unaligned_be64(val, buf); break; default: dev_err(regmap_get_device(map), "Error invalid reg-width %u for reg 0x%04x\n", diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 134c36edb6cf7..32d49100dff51 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -400,6 +400,10 @@ struct mmc_blk_ioc_data { struct mmc_ioc_cmd ic; unsigned char *buf; u64 buf_bytes; + unsigned int flags; +#define MMC_BLK_IOC_DROP BIT(0) /* drop this mrq */ +#define MMC_BLK_IOC_SBC BIT(1) /* use mrq.sbc */ + struct mmc_rpmb_data *rpmb; }; @@ -465,7 +469,7 @@ static int mmc_blk_ioctl_copy_to_user(struct mmc_ioc_cmd __user *ic_ptr, } static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md, - struct mmc_blk_ioc_data *idata) + struct mmc_blk_ioc_data **idatas, int i) { struct mmc_command cmd = {}, sbc = {}; struct mmc_data data = {}; @@ -475,10 +479,18 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md, unsigned int busy_timeout_ms; int err; unsigned int target_part; + struct mmc_blk_ioc_data *idata = idatas[i]; + struct mmc_blk_ioc_data *prev_idata = NULL; if (!card || !md || !idata) return -EINVAL; + if (idata->flags & MMC_BLK_IOC_DROP) + return 0; + + if (idata->flags & MMC_BLK_IOC_SBC) + prev_idata = idatas[i - 1]; + /* * The RPMB accesses comes in from the character device, so we * need to target these explicitly. Else we just target the @@ -532,7 +544,7 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md, return err; } - if (idata->rpmb) { + if (idata->rpmb || prev_idata) { sbc.opcode = MMC_SET_BLOCK_COUNT; /* * We don't do any blockcount validation because the max size @@ -540,6 +552,8 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md, * 'Reliable Write' bit here. */ sbc.arg = data.blocks | (idata->ic.write_flag & BIT(31)); + if (prev_idata) + sbc.arg = prev_idata->ic.arg; sbc.flags = MMC_RSP_R1 | MMC_CMD_AC; mrq.sbc = &sbc; } @@ -557,6 +571,15 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md, mmc_wait_for_req(card->host, &mrq); memcpy(&idata->ic.response, cmd.resp, sizeof(cmd.resp)); + if (prev_idata) { + memcpy(&prev_idata->ic.response, sbc.resp, sizeof(sbc.resp)); + if (sbc.error) { + dev_err(mmc_dev(card->host), "%s: sbc error %d\n", + __func__, sbc.error); + return sbc.error; + } + } + if (cmd.error) { dev_err(mmc_dev(card->host), "%s: cmd error %d\n", __func__, cmd.error); @@ -1034,6 +1057,20 @@ static inline void mmc_blk_reset_success(struct mmc_blk_data *md, int type) md->reset_done &= ~type; } +static void mmc_blk_check_sbc(struct mmc_queue_req *mq_rq) +{ + struct mmc_blk_ioc_data **idata = mq_rq->drv_op_data; + int i; + + for (i = 1; i < mq_rq->ioc_count; i++) { + if (idata[i - 1]->ic.opcode == MMC_SET_BLOCK_COUNT && + mmc_op_multi(idata[i]->ic.opcode)) { + idata[i - 1]->flags |= MMC_BLK_IOC_DROP; + idata[i]->flags |= MMC_BLK_IOC_SBC; + } + } +} + /* * The non-block commands come back from the block layer after it queued it and * processed it with all other requests and then they get issued in this @@ -1061,11 +1098,14 @@ static void mmc_blk_issue_drv_op(struct mmc_queue *mq, struct request *req) if (ret) break; } + + mmc_blk_check_sbc(mq_rq); + fallthrough; case MMC_DRV_OP_IOCTL_RPMB: idata = mq_rq->drv_op_data; for (i = 0, ret = 0; i < mq_rq->ioc_count; i++) { - ret = __mmc_blk_ioctl_cmd(card, md, idata[i]); + ret = __mmc_blk_ioctl_cmd(card, md, idata, i); if (ret) break; } diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c index cc333ad67cac8..2a99ffb61f8c0 100644 --- a/drivers/mmc/host/mmc_spi.c +++ b/drivers/mmc/host/mmc_spi.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include #include @@ -119,19 +119,14 @@ struct mmc_spi_host { struct spi_transfer status; struct spi_message readback; - /* underlying DMA-aware controller, or null */ - struct device *dma_dev; - /* buffer used for commands and for message "overhead" */ struct scratch *data; - dma_addr_t data_dma; /* Specs say to write ones most of the time, even when the card * has no need to read its input data; and many cards won't care. * This is our source of those ones. */ void *ones; - dma_addr_t ones_dma; }; @@ -147,11 +142,8 @@ static inline int mmc_cs_off(struct mmc_spi_host *host) return spi_setup(host->spi); } -static int -mmc_spi_readbytes(struct mmc_spi_host *host, unsigned len) +static int mmc_spi_readbytes(struct mmc_spi_host *host, unsigned int len) { - int status; - if (len > sizeof(*host->data)) { WARN_ON(1); return -EIO; @@ -159,19 +151,7 @@ mmc_spi_readbytes(struct mmc_spi_host *host, unsigned len) host->status.len = len; - if (host->dma_dev) - dma_sync_single_for_device(host->dma_dev, - host->data_dma, sizeof(*host->data), - DMA_FROM_DEVICE); - - status = spi_sync_locked(host->spi, &host->readback); - - if (host->dma_dev) - dma_sync_single_for_cpu(host->dma_dev, - host->data_dma, sizeof(*host->data), - DMA_FROM_DEVICE); - - return status; + return spi_sync_locked(host->spi, &host->readback); } static int mmc_spi_skip(struct mmc_spi_host *host, unsigned long timeout, @@ -506,23 +486,11 @@ mmc_spi_command_send(struct mmc_spi_host *host, t = &host->t; memset(t, 0, sizeof(*t)); t->tx_buf = t->rx_buf = data->status; - t->tx_dma = t->rx_dma = host->data_dma; t->len = cp - data->status; t->cs_change = 1; spi_message_add_tail(t, &host->m); - if (host->dma_dev) { - host->m.is_dma_mapped = 1; - dma_sync_single_for_device(host->dma_dev, - host->data_dma, sizeof(*host->data), - DMA_BIDIRECTIONAL); - } status = spi_sync_locked(host->spi, &host->m); - - if (host->dma_dev) - dma_sync_single_for_cpu(host->dma_dev, - host->data_dma, sizeof(*host->data), - DMA_BIDIRECTIONAL); if (status < 0) { dev_dbg(&host->spi->dev, " ... write returned %d\n", status); cmd->error = status; @@ -540,9 +508,6 @@ mmc_spi_command_send(struct mmc_spi_host *host, * We always provide TX data for data and CRC. The MMC/SD protocol * requires us to write ones; but Linux defaults to writing zeroes; * so we explicitly initialize it to all ones on RX paths. - * - * We also handle DMA mapping, so the underlying SPI controller does - * not need to (re)do it for each message. */ static void mmc_spi_setup_data_message( @@ -552,11 +517,8 @@ mmc_spi_setup_data_message( { struct spi_transfer *t; struct scratch *scratch = host->data; - dma_addr_t dma = host->data_dma; spi_message_init(&host->m); - if (dma) - host->m.is_dma_mapped = 1; /* for reads, readblock() skips 0xff bytes before finding * the token; for writes, this transfer issues that token. @@ -570,8 +532,6 @@ mmc_spi_setup_data_message( else scratch->data_token = SPI_TOKEN_SINGLE; t->tx_buf = &scratch->data_token; - if (dma) - t->tx_dma = dma + offsetof(struct scratch, data_token); spi_message_add_tail(t, &host->m); } @@ -581,7 +541,6 @@ mmc_spi_setup_data_message( t = &host->t; memset(t, 0, sizeof(*t)); t->tx_buf = host->ones; - t->tx_dma = host->ones_dma; /* length and actual buffer info are written later */ spi_message_add_tail(t, &host->m); @@ -591,14 +550,9 @@ mmc_spi_setup_data_message( if (direction == DMA_TO_DEVICE) { /* the actual CRC may get written later */ t->tx_buf = &scratch->crc_val; - if (dma) - t->tx_dma = dma + offsetof(struct scratch, crc_val); } else { t->tx_buf = host->ones; - t->tx_dma = host->ones_dma; t->rx_buf = &scratch->crc_val; - if (dma) - t->rx_dma = dma + offsetof(struct scratch, crc_val); } spi_message_add_tail(t, &host->m); @@ -621,10 +575,7 @@ mmc_spi_setup_data_message( memset(t, 0, sizeof(*t)); t->len = (direction == DMA_TO_DEVICE) ? sizeof(scratch->status) : 1; t->tx_buf = host->ones; - t->tx_dma = host->ones_dma; t->rx_buf = scratch->status; - if (dma) - t->rx_dma = dma + offsetof(struct scratch, status); t->cs_change = 1; spi_message_add_tail(t, &host->m); } @@ -653,23 +604,13 @@ mmc_spi_writeblock(struct mmc_spi_host *host, struct spi_transfer *t, if (host->mmc->use_spi_crc) scratch->crc_val = cpu_to_be16(crc_itu_t(0, t->tx_buf, t->len)); - if (host->dma_dev) - dma_sync_single_for_device(host->dma_dev, - host->data_dma, sizeof(*scratch), - DMA_BIDIRECTIONAL); status = spi_sync_locked(spi, &host->m); - if (status != 0) { dev_dbg(&spi->dev, "write error (%d)\n", status); return status; } - if (host->dma_dev) - dma_sync_single_for_cpu(host->dma_dev, - host->data_dma, sizeof(*scratch), - DMA_BIDIRECTIONAL); - /* * Get the transmission data-response reply. It must follow * immediately after the data block we transferred. This reply @@ -718,8 +659,6 @@ mmc_spi_writeblock(struct mmc_spi_host *host, struct spi_transfer *t, } t->tx_buf += t->len; - if (host->dma_dev) - t->tx_dma += t->len; /* Return when not busy. If we didn't collect that status yet, * we'll need some more I/O. @@ -783,30 +722,12 @@ mmc_spi_readblock(struct mmc_spi_host *host, struct spi_transfer *t, } leftover = status << 1; - if (host->dma_dev) { - dma_sync_single_for_device(host->dma_dev, - host->data_dma, sizeof(*scratch), - DMA_BIDIRECTIONAL); - dma_sync_single_for_device(host->dma_dev, - t->rx_dma, t->len, - DMA_FROM_DEVICE); - } - status = spi_sync_locked(spi, &host->m); if (status < 0) { dev_dbg(&spi->dev, "read error %d\n", status); return status; } - if (host->dma_dev) { - dma_sync_single_for_cpu(host->dma_dev, - host->data_dma, sizeof(*scratch), - DMA_BIDIRECTIONAL); - dma_sync_single_for_cpu(host->dma_dev, - t->rx_dma, t->len, - DMA_FROM_DEVICE); - } - if (bitshift) { /* Walk through the data and the crc and do * all the magic to get byte-aligned data. @@ -841,8 +762,6 @@ mmc_spi_readblock(struct mmc_spi_host *host, struct spi_transfer *t, } t->rx_buf += t->len; - if (host->dma_dev) - t->rx_dma += t->len; return 0; } @@ -857,7 +776,6 @@ mmc_spi_data_do(struct mmc_spi_host *host, struct mmc_command *cmd, struct mmc_data *data, u32 blk_size) { struct spi_device *spi = host->spi; - struct device *dma_dev = host->dma_dev; struct spi_transfer *t; enum dma_data_direction direction = mmc_get_dma_dir(data); struct scatterlist *sg; @@ -884,31 +802,8 @@ mmc_spi_data_do(struct mmc_spi_host *host, struct mmc_command *cmd, */ for_each_sg(data->sg, sg, data->sg_len, n_sg) { int status = 0; - dma_addr_t dma_addr = 0; void *kmap_addr; unsigned length = sg->length; - enum dma_data_direction dir = direction; - - /* set up dma mapping for controller drivers that might - * use DMA ... though they may fall back to PIO - */ - if (dma_dev) { - /* never invalidate whole *shared* pages ... */ - if ((sg->offset != 0 || length != PAGE_SIZE) - && dir == DMA_FROM_DEVICE) - dir = DMA_BIDIRECTIONAL; - - dma_addr = dma_map_page(dma_dev, sg_page(sg), 0, - PAGE_SIZE, dir); - if (dma_mapping_error(dma_dev, dma_addr)) { - data->error = -EFAULT; - break; - } - if (direction == DMA_TO_DEVICE) - t->tx_dma = dma_addr + sg->offset; - else - t->rx_dma = dma_addr + sg->offset; - } /* allow pio too; we don't allow highmem */ kmap_addr = kmap(sg_page(sg)); @@ -941,8 +836,6 @@ mmc_spi_data_do(struct mmc_spi_host *host, struct mmc_command *cmd, if (direction == DMA_FROM_DEVICE) flush_dcache_page(sg_page(sg)); kunmap(sg_page(sg)); - if (dma_dev) - dma_unmap_page(dma_dev, dma_addr, PAGE_SIZE, dir); if (status < 0) { data->error = status; @@ -977,21 +870,9 @@ mmc_spi_data_do(struct mmc_spi_host *host, struct mmc_command *cmd, scratch->status[0] = SPI_TOKEN_STOP_TRAN; host->early_status.tx_buf = host->early_status.rx_buf; - host->early_status.tx_dma = host->early_status.rx_dma; host->early_status.len = statlen; - if (host->dma_dev) - dma_sync_single_for_device(host->dma_dev, - host->data_dma, sizeof(*scratch), - DMA_BIDIRECTIONAL); - tmp = spi_sync_locked(spi, &host->m); - - if (host->dma_dev) - dma_sync_single_for_cpu(host->dma_dev, - host->data_dma, sizeof(*scratch), - DMA_BIDIRECTIONAL); - if (tmp < 0) { if (!data->error) data->error = tmp; @@ -1265,52 +1146,6 @@ mmc_spi_detect_irq(int irq, void *mmc) return IRQ_HANDLED; } -#ifdef CONFIG_HAS_DMA -static int mmc_spi_dma_alloc(struct mmc_spi_host *host) -{ - struct spi_device *spi = host->spi; - struct device *dev; - - if (!spi->master->dev.parent->dma_mask) - return 0; - - dev = spi->master->dev.parent; - - host->ones_dma = dma_map_single(dev, host->ones, MMC_SPI_BLOCKSIZE, - DMA_TO_DEVICE); - if (dma_mapping_error(dev, host->ones_dma)) - return -ENOMEM; - - host->data_dma = dma_map_single(dev, host->data, sizeof(*host->data), - DMA_BIDIRECTIONAL); - if (dma_mapping_error(dev, host->data_dma)) { - dma_unmap_single(dev, host->ones_dma, MMC_SPI_BLOCKSIZE, - DMA_TO_DEVICE); - return -ENOMEM; - } - - dma_sync_single_for_cpu(dev, host->data_dma, sizeof(*host->data), - DMA_BIDIRECTIONAL); - - host->dma_dev = dev; - return 0; -} - -static void mmc_spi_dma_free(struct mmc_spi_host *host) -{ - if (!host->dma_dev) - return; - - dma_unmap_single(host->dma_dev, host->ones_dma, MMC_SPI_BLOCKSIZE, - DMA_TO_DEVICE); - dma_unmap_single(host->dma_dev, host->data_dma, sizeof(*host->data), - DMA_BIDIRECTIONAL); -} -#else -static inline int mmc_spi_dma_alloc(struct mmc_spi_host *host) { return 0; } -static inline void mmc_spi_dma_free(struct mmc_spi_host *host) {} -#endif - static int mmc_spi_probe(struct spi_device *spi) { void *ones; @@ -1402,24 +1237,17 @@ static int mmc_spi_probe(struct spi_device *spi) host->powerup_msecs = 250; } - /* preallocate dma buffers */ + /* Preallocate buffers */ host->data = kmalloc(sizeof(*host->data), GFP_KERNEL); if (!host->data) goto fail_nobuf1; - status = mmc_spi_dma_alloc(host); - if (status) - goto fail_dma; - /* setup message for status/busy readback */ spi_message_init(&host->readback); - host->readback.is_dma_mapped = (host->dma_dev != NULL); spi_message_add_tail(&host->status, &host->readback); host->status.tx_buf = host->ones; - host->status.tx_dma = host->ones_dma; host->status.rx_buf = &host->data->status; - host->status.rx_dma = host->data_dma + offsetof(struct scratch, status); host->status.cs_change = 1; /* register card detect irq */ @@ -1464,9 +1292,8 @@ static int mmc_spi_probe(struct spi_device *spi) if (!status) has_ro = true; - dev_info(&spi->dev, "SD/MMC host %s%s%s%s%s\n", + dev_info(&spi->dev, "SD/MMC host %s%s%s%s\n", dev_name(&mmc->class_dev), - host->dma_dev ? "" : ", no DMA", has_ro ? "" : ", no WP", (host->pdata && host->pdata->setpower) ? "" : ", no poweroff", @@ -1477,8 +1304,6 @@ static int mmc_spi_probe(struct spi_device *spi) fail_gpiod_request: mmc_remove_host(mmc); fail_glue_init: - mmc_spi_dma_free(host); -fail_dma: kfree(host->data); fail_nobuf1: mmc_spi_put_pdata(spi); @@ -1500,7 +1325,6 @@ static void mmc_spi_remove(struct spi_device *spi) mmc_remove_host(mmc); - mmc_spi_dma_free(host); kfree(host->data); kfree(host->ones); diff --git a/drivers/mtd/maps/vmu-flash.c b/drivers/mtd/maps/vmu-flash.c index a7ec947a3ebb1..53019d313db71 100644 --- a/drivers/mtd/maps/vmu-flash.c +++ b/drivers/mtd/maps/vmu-flash.c @@ -719,7 +719,7 @@ static int vmu_can_unload(struct maple_device *mdev) card = maple_get_drvdata(mdev); for (x = 0; x < card->partitions; x++) { mtd = &((card->mtd)[x]); - if (mtd->usecount > 0) + if (kref_read(&mtd->refcnt)) return 0; } return 1; diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index 1fcac403cee60..76167b8ca9dda 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -1208,6 +1208,23 @@ static int nand_lp_exec_read_page_op(struct nand_chip *chip, unsigned int page, return nand_exec_op(chip, &op); } +static void rawnand_cap_cont_reads(struct nand_chip *chip) +{ + struct nand_memory_organization *memorg; + unsigned int pages_per_lun, first_lun, last_lun; + + memorg = nanddev_get_memorg(&chip->base); + pages_per_lun = memorg->pages_per_eraseblock * memorg->eraseblocks_per_lun; + first_lun = chip->cont_read.first_page / pages_per_lun; + last_lun = chip->cont_read.last_page / pages_per_lun; + + /* Prevent sequential cache reads across LUN boundaries */ + if (first_lun != last_lun) + chip->cont_read.pause_page = first_lun * pages_per_lun + pages_per_lun - 1; + else + chip->cont_read.pause_page = chip->cont_read.last_page; +} + static int nand_lp_exec_cont_read_page_op(struct nand_chip *chip, unsigned int page, unsigned int offset_in_page, void *buf, unsigned int len, bool check_only) @@ -1226,7 +1243,7 @@ static int nand_lp_exec_cont_read_page_op(struct nand_chip *chip, unsigned int p NAND_OP_DATA_IN(len, buf, 0), }; struct nand_op_instr cont_instrs[] = { - NAND_OP_CMD(page == chip->cont_read.last_page ? + NAND_OP_CMD(page == chip->cont_read.pause_page ? NAND_CMD_READCACHEEND : NAND_CMD_READCACHESEQ, NAND_COMMON_TIMING_NS(conf, tWB_max)), NAND_OP_WAIT_RDY(NAND_COMMON_TIMING_MS(conf, tR_max), @@ -1263,16 +1280,29 @@ static int nand_lp_exec_cont_read_page_op(struct nand_chip *chip, unsigned int p } if (page == chip->cont_read.first_page) - return nand_exec_op(chip, &start_op); + ret = nand_exec_op(chip, &start_op); else - return nand_exec_op(chip, &cont_op); + ret = nand_exec_op(chip, &cont_op); + if (ret) + return ret; + + if (!chip->cont_read.ongoing) + return 0; + + if (page == chip->cont_read.pause_page && + page != chip->cont_read.last_page) { + chip->cont_read.first_page = chip->cont_read.pause_page + 1; + rawnand_cap_cont_reads(chip); + } else if (page == chip->cont_read.last_page) { + chip->cont_read.ongoing = false; + } + + return 0; } static bool rawnand_cont_read_ongoing(struct nand_chip *chip, unsigned int page) { - return chip->cont_read.ongoing && - page >= chip->cont_read.first_page && - page <= chip->cont_read.last_page; + return chip->cont_read.ongoing && page >= chip->cont_read.first_page; } /** @@ -3431,21 +3461,42 @@ static void rawnand_enable_cont_reads(struct nand_chip *chip, unsigned int page, u32 readlen, int col) { struct mtd_info *mtd = nand_to_mtd(chip); + unsigned int end_page, end_col; + + chip->cont_read.ongoing = false; if (!chip->controller->supported_op.cont_read) return; - if ((col && col + readlen < (3 * mtd->writesize)) || - (!col && readlen < (2 * mtd->writesize))) { - chip->cont_read.ongoing = false; + end_page = DIV_ROUND_UP(col + readlen, mtd->writesize); + end_col = (col + readlen) % mtd->writesize; + + if (col) + page++; + + if (end_col && end_page) + end_page--; + + if (page + 1 > end_page) return; - } - chip->cont_read.ongoing = true; chip->cont_read.first_page = page; - if (col) + chip->cont_read.last_page = end_page; + chip->cont_read.ongoing = true; + + rawnand_cap_cont_reads(chip); +} + +static void rawnand_cont_read_skip_first_page(struct nand_chip *chip, unsigned int page) +{ + if (!chip->cont_read.ongoing || page != chip->cont_read.first_page) + return; + + chip->cont_read.first_page++; + if (chip->cont_read.first_page == chip->cont_read.pause_page) chip->cont_read.first_page++; - chip->cont_read.last_page = page + ((readlen >> chip->page_shift) & chip->pagemask); + if (chip->cont_read.first_page >= chip->cont_read.last_page) + chip->cont_read.ongoing = false; } /** @@ -3622,6 +3673,8 @@ read_retry: buf += bytes; max_bitflips = max_t(unsigned int, max_bitflips, chip->pagecache.bitflips); + + rawnand_cont_read_skip_first_page(chip, page); } readlen -= bytes; @@ -5126,6 +5179,14 @@ static void rawnand_late_check_supported_ops(struct nand_chip *chip) /* The supported_op fields should not be set by individual drivers */ WARN_ON_ONCE(chip->controller->supported_op.cont_read); + /* + * Too many devices do not support sequential cached reads with on-die + * ECC correction enabled, so in this case refuse to perform the + * automation. + */ + if (chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_ON_DIE) + return; + if (!nand_has_exec_op(chip)) return; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 6039886a8544f..dac4f9510c173 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -10598,10 +10598,12 @@ int bnxt_half_open_nic(struct bnxt *bp) netdev_err(bp->dev, "bnxt_alloc_mem err: %x\n", rc); goto half_open_err; } + bnxt_init_napi(bp); set_bit(BNXT_STATE_HALF_OPEN, &bp->state); rc = bnxt_init_nic(bp, true); if (rc) { clear_bit(BNXT_STATE_HALF_OPEN, &bp->state); + bnxt_del_napi(bp); netdev_err(bp->dev, "bnxt_init_nic err: %x\n", rc); goto half_open_err; } @@ -10620,6 +10622,7 @@ half_open_err: void bnxt_half_close_nic(struct bnxt *bp) { bnxt_hwrm_resource_free(bp, false, true); + bnxt_del_napi(bp); bnxt_free_skbs(bp); bnxt_free_mem(bp, true); clear_bit(BNXT_STATE_HALF_OPEN, &bp->state); @@ -12261,6 +12264,11 @@ static int bnxt_fw_init_one_p1(struct bnxt *bp) bp->fw_cap = 0; rc = bnxt_hwrm_ver_get(bp); + /* FW may be unresponsive after FLR. FLR must complete within 100 msec + * so wait before continuing with recovery. + */ + if (rc) + msleep(100); bnxt_try_map_fw_health_reg(bp); if (rc) { rc = bnxt_try_recover_fw(bp); diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c index 38da2d6c250e6..08e113e785a76 100644 --- a/drivers/net/ethernet/engleder/tsnep_main.c +++ b/drivers/net/ethernet/engleder/tsnep_main.c @@ -1434,7 +1434,7 @@ static int tsnep_rx_poll(struct tsnep_rx *rx, struct napi_struct *napi, xdp_prepare_buff(&xdp, page_address(entry->page), XDP_PACKET_HEADROOM + TSNEP_RX_INLINE_METADATA_SIZE, - length, false); + length - ETH_FCS_LEN, false); consume = tsnep_xdp_run_prog(rx, prog, &xdp, &xdp_status, tx_nq, tx); @@ -1517,7 +1517,7 @@ static int tsnep_rx_poll_zc(struct tsnep_rx *rx, struct napi_struct *napi, prefetch(entry->xdp->data); length = __le32_to_cpu(entry->desc_wb->properties) & TSNEP_DESC_LENGTH_MASK; - xsk_buff_set_size(entry->xdp, length); + xsk_buff_set_size(entry->xdp, length - ETH_FCS_LEN); xsk_buff_dma_sync_for_cpu(entry->xdp, rx->xsk_pool); /* RX metadata with timestamps is in front of actual data, @@ -1711,6 +1711,19 @@ static void tsnep_rx_reopen_xsk(struct tsnep_rx *rx) allocated--; } } + + /* set need wakeup flag immediately if ring is not filled completely, + * first polling would be too late as need wakeup signalisation would + * be delayed for an indefinite time + */ + if (xsk_uses_need_wakeup(rx->xsk_pool)) { + int desc_available = tsnep_rx_desc_available(rx); + + if (desc_available) + xsk_set_rx_need_wakeup(rx->xsk_pool); + else + xsk_clear_rx_need_wakeup(rx->xsk_pool); + } } static bool tsnep_pending(struct tsnep_queue *queue) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 35c95f07fd6d7..54da59286df4e 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -2011,6 +2011,7 @@ static void fec_enet_adjust_link(struct net_device *ndev) /* if any of the above changed restart the FEC */ if (status_change) { + netif_stop_queue(ndev); napi_disable(&fep->napi); netif_tx_lock_bh(ndev); fec_restart(ndev); @@ -2020,6 +2021,7 @@ static void fec_enet_adjust_link(struct net_device *ndev) } } else { if (fep->link) { + netif_stop_queue(ndev); napi_disable(&fep->napi); netif_tx_lock_bh(ndev); fec_stop(ndev); diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 5b20eba93d048..aad39ebff4aba 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -3578,40 +3578,55 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) struct i40e_hmc_obj_rxq rx_ctx; int err = 0; bool ok; - int ret; bitmap_zero(ring->state, __I40E_RING_STATE_NBITS); /* clear the context structure first */ memset(&rx_ctx, 0, sizeof(rx_ctx)); - if (ring->vsi->type == I40E_VSI_MAIN) - xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq); + ring->rx_buf_len = vsi->rx_buf_len; + + /* XDP RX-queue info only needed for RX rings exposed to XDP */ + if (ring->vsi->type != I40E_VSI_MAIN) + goto skip; + + if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) { + err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, + ring->queue_index, + ring->q_vector->napi.napi_id, + ring->rx_buf_len); + if (err) + return err; + } ring->xsk_pool = i40e_xsk_pool(ring); if (ring->xsk_pool) { - ring->rx_buf_len = - xsk_pool_get_rx_frame_size(ring->xsk_pool); - ret = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, + xdp_rxq_info_unreg(&ring->xdp_rxq); + ring->rx_buf_len = xsk_pool_get_rx_frame_size(ring->xsk_pool); + err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, + ring->queue_index, + ring->q_vector->napi.napi_id, + ring->rx_buf_len); + if (err) + return err; + err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, MEM_TYPE_XSK_BUFF_POOL, NULL); - if (ret) - return ret; + if (err) + return err; dev_info(&vsi->back->pdev->dev, "Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n", ring->queue_index); } else { - ring->rx_buf_len = vsi->rx_buf_len; - if (ring->vsi->type == I40E_VSI_MAIN) { - ret = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, - MEM_TYPE_PAGE_SHARED, - NULL); - if (ret) - return ret; - } + err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, + MEM_TYPE_PAGE_SHARED, + NULL); + if (err) + return err; } +skip: xdp_init_buff(&ring->xdp, i40e_rx_pg_size(ring) / 2, &ring->xdp_rxq); rx_ctx.dbuff = DIV_ROUND_UP(ring->rx_buf_len, diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index b047c587629b6..1df2f93388128 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -1556,7 +1556,6 @@ void i40e_free_rx_resources(struct i40e_ring *rx_ring) int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring) { struct device *dev = rx_ring->dev; - int err; u64_stats_init(&rx_ring->syncp); @@ -1577,14 +1576,6 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring) rx_ring->next_to_process = 0; rx_ring->next_to_use = 0; - /* XDP RX-queue info only needed for RX rings exposed to XDP */ - if (rx_ring->vsi->type == I40E_VSI_MAIN) { - err = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, - rx_ring->queue_index, rx_ring->q_vector->napi.napi_id); - if (err < 0) - return err; - } - rx_ring->xdp_prog = rx_ring->vsi->xdp_prog; rx_ring->rx_bi = @@ -2100,7 +2091,8 @@ static void i40e_put_rx_buffer(struct i40e_ring *rx_ring, static void i40e_process_rx_buffs(struct i40e_ring *rx_ring, int xdp_res, struct xdp_buff *xdp) { - u32 next = rx_ring->next_to_clean; + u32 nr_frags = xdp_get_shared_info_from_buff(xdp)->nr_frags; + u32 next = rx_ring->next_to_clean, i = 0; struct i40e_rx_buffer *rx_buffer; xdp->flags = 0; @@ -2113,10 +2105,10 @@ static void i40e_process_rx_buffs(struct i40e_ring *rx_ring, int xdp_res, if (!rx_buffer->page) continue; - if (xdp_res == I40E_XDP_CONSUMED) - rx_buffer->pagecnt_bias++; - else + if (xdp_res != I40E_XDP_CONSUMED) i40e_rx_buffer_flip(rx_buffer, xdp->frame_sz); + else if (i++ <= nr_frags) + rx_buffer->pagecnt_bias++; /* EOP buffer will be put in i40e_clean_rx_irq() */ if (next == rx_ring->next_to_process) @@ -2130,20 +2122,20 @@ static void i40e_process_rx_buffs(struct i40e_ring *rx_ring, int xdp_res, * i40e_construct_skb - Allocate skb and populate it * @rx_ring: rx descriptor ring to transact packets on * @xdp: xdp_buff pointing to the data - * @nr_frags: number of buffers for the packet * * This function allocates an skb. It then populates it with the page * data from the current receive descriptor, taking care to set up the * skb correctly. */ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring, - struct xdp_buff *xdp, - u32 nr_frags) + struct xdp_buff *xdp) { unsigned int size = xdp->data_end - xdp->data; struct i40e_rx_buffer *rx_buffer; + struct skb_shared_info *sinfo; unsigned int headlen; struct sk_buff *skb; + u32 nr_frags = 0; /* prefetch first cache line of first page */ net_prefetch(xdp->data); @@ -2181,6 +2173,10 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring, memcpy(__skb_put(skb, headlen), xdp->data, ALIGN(headlen, sizeof(long))); + if (unlikely(xdp_buff_has_frags(xdp))) { + sinfo = xdp_get_shared_info_from_buff(xdp); + nr_frags = sinfo->nr_frags; + } rx_buffer = i40e_rx_bi(rx_ring, rx_ring->next_to_clean); /* update all of the pointers */ size -= headlen; @@ -2200,9 +2196,8 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring, } if (unlikely(xdp_buff_has_frags(xdp))) { - struct skb_shared_info *sinfo, *skinfo = skb_shinfo(skb); + struct skb_shared_info *skinfo = skb_shinfo(skb); - sinfo = xdp_get_shared_info_from_buff(xdp); memcpy(&skinfo->frags[skinfo->nr_frags], &sinfo->frags[0], sizeof(skb_frag_t) * nr_frags); @@ -2225,17 +2220,17 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring, * i40e_build_skb - Build skb around an existing buffer * @rx_ring: Rx descriptor ring to transact packets on * @xdp: xdp_buff pointing to the data - * @nr_frags: number of buffers for the packet * * This function builds an skb around an existing Rx buffer, taking care * to set up the skb correctly and avoid any memcpy overhead. */ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring, - struct xdp_buff *xdp, - u32 nr_frags) + struct xdp_buff *xdp) { unsigned int metasize = xdp->data - xdp->data_meta; + struct skb_shared_info *sinfo; struct sk_buff *skb; + u32 nr_frags; /* Prefetch first cache line of first page. If xdp->data_meta * is unused, this points exactly as xdp->data, otherwise we @@ -2244,6 +2239,11 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring, */ net_prefetch(xdp->data_meta); + if (unlikely(xdp_buff_has_frags(xdp))) { + sinfo = xdp_get_shared_info_from_buff(xdp); + nr_frags = sinfo->nr_frags; + } + /* build an skb around the page buffer */ skb = napi_build_skb(xdp->data_hard_start, xdp->frame_sz); if (unlikely(!skb)) @@ -2256,9 +2256,6 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring, skb_metadata_set(skb, metasize); if (unlikely(xdp_buff_has_frags(xdp))) { - struct skb_shared_info *sinfo; - - sinfo = xdp_get_shared_info_from_buff(xdp); xdp_update_skb_shared_info(skb, nr_frags, sinfo->xdp_frags_size, nr_frags * xdp->frame_sz, @@ -2603,9 +2600,9 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget, total_rx_bytes += size; } else { if (ring_uses_build_skb(rx_ring)) - skb = i40e_build_skb(rx_ring, xdp, nfrags); + skb = i40e_build_skb(rx_ring, xdp); else - skb = i40e_construct_skb(rx_ring, xdp, nfrags); + skb = i40e_construct_skb(rx_ring, xdp); /* drop if we failed to retrieve a buffer */ if (!skb) { diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index 7d991e4d9b896..1f8ae6f5d9807 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -418,7 +418,8 @@ i40e_add_xsk_frag(struct i40e_ring *rx_ring, struct xdp_buff *first, } __skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++, - virt_to_page(xdp->data_hard_start), 0, size); + virt_to_page(xdp->data_hard_start), + XDP_PACKET_HEADROOM, size); sinfo->xdp_frags_size += size; xsk_buff_add_frag(xdp); @@ -503,7 +504,6 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget) xdp_res = i40e_run_xdp_zc(rx_ring, first, xdp_prog); i40e_handle_xdp_result_zc(rx_ring, first, rx_desc, &rx_packets, &rx_bytes, xdp_res, &failure); - first->flags = 0; next_to_clean = next_to_process; if (failure) break; diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index 7fa43827a3f06..4f3e65b47cdc3 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -534,19 +534,27 @@ int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) ring->rx_buf_len = ring->vsi->rx_buf_len; if (ring->vsi->type == ICE_VSI_PF) { - if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) - /* coverity[check_return] */ - __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, - ring->q_index, - ring->q_vector->napi.napi_id, - ring->vsi->rx_buf_len); + if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) { + err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, + ring->q_index, + ring->q_vector->napi.napi_id, + ring->rx_buf_len); + if (err) + return err; + } ring->xsk_pool = ice_xsk_pool(ring); if (ring->xsk_pool) { - xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq); + xdp_rxq_info_unreg(&ring->xdp_rxq); ring->rx_buf_len = xsk_pool_get_rx_frame_size(ring->xsk_pool); + err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, + ring->q_index, + ring->q_vector->napi.napi_id, + ring->rx_buf_len); + if (err) + return err; err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, MEM_TYPE_XSK_BUFF_POOL, NULL); @@ -557,13 +565,14 @@ int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) dev_info(dev, "Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n", ring->q_index); } else { - if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) - /* coverity[check_return] */ - __xdp_rxq_info_reg(&ring->xdp_rxq, - ring->netdev, - ring->q_index, - ring->q_vector->napi.napi_id, - ring->vsi->rx_buf_len); + if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) { + err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, + ring->q_index, + ring->q_vector->napi.napi_id, + ring->rx_buf_len); + if (err) + return err; + } err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, MEM_TYPE_PAGE_SHARED, diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 52d0a126eb616..24c914015973e 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -513,11 +513,6 @@ int ice_setup_rx_ring(struct ice_rx_ring *rx_ring) if (ice_is_xdp_ena_vsi(rx_ring->vsi)) WRITE_ONCE(rx_ring->xdp_prog, rx_ring->vsi->xdp_prog); - if (rx_ring->vsi->type == ICE_VSI_PF && - !xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) - if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, - rx_ring->q_index, rx_ring->q_vector->napi.napi_id)) - goto err; return 0; err: @@ -600,9 +595,7 @@ out_failure: ret = ICE_XDP_CONSUMED; } exit: - rx_buf->act = ret; - if (unlikely(xdp_buff_has_frags(xdp))) - ice_set_rx_bufs_act(xdp, rx_ring, ret); + ice_set_rx_bufs_act(xdp, rx_ring, ret); } /** @@ -890,14 +883,17 @@ ice_add_xdp_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, } if (unlikely(sinfo->nr_frags == MAX_SKB_FRAGS)) { - if (unlikely(xdp_buff_has_frags(xdp))) - ice_set_rx_bufs_act(xdp, rx_ring, ICE_XDP_CONSUMED); + ice_set_rx_bufs_act(xdp, rx_ring, ICE_XDP_CONSUMED); return -ENOMEM; } __skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++, rx_buf->page, rx_buf->page_offset, size); sinfo->xdp_frags_size += size; + /* remember frag count before XDP prog execution; bpf_xdp_adjust_tail() + * can pop off frags but driver has to handle it on its own + */ + rx_ring->nr_frags = sinfo->nr_frags; if (page_is_pfmemalloc(rx_buf->page)) xdp_buff_set_frag_pfmemalloc(xdp); @@ -1249,6 +1245,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) xdp->data = NULL; rx_ring->first_desc = ntc; + rx_ring->nr_frags = 0; continue; construct_skb: if (likely(ice_ring_uses_build_skb(rx_ring))) @@ -1264,10 +1261,12 @@ construct_skb: ICE_XDP_CONSUMED); xdp->data = NULL; rx_ring->first_desc = ntc; + rx_ring->nr_frags = 0; break; } xdp->data = NULL; rx_ring->first_desc = ntc; + rx_ring->nr_frags = 0; stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_RXE_S); if (unlikely(ice_test_staterr(rx_desc->wb.status_error0, diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index 166413fc33f48..407d4c320097f 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -333,6 +333,7 @@ struct ice_rx_ring { struct ice_channel *ch; struct ice_tx_ring *xdp_ring; struct xsk_buff_pool *xsk_pool; + u32 nr_frags; dma_addr_t dma; /* physical address of ring */ u64 cached_phctime; u16 rx_buf_len; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h index 115969ecdf7b9..b0e56675f98b2 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h @@ -12,26 +12,39 @@ * act: action to store onto Rx buffers related to XDP buffer parts * * Set action that should be taken before putting Rx buffer from first frag - * to one before last. Last one is handled by caller of this function as it - * is the EOP frag that is currently being processed. This function is - * supposed to be called only when XDP buffer contains frags. + * to the last. */ static inline void ice_set_rx_bufs_act(struct xdp_buff *xdp, const struct ice_rx_ring *rx_ring, const unsigned int act) { - const struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); - u32 first = rx_ring->first_desc; - u32 nr_frags = sinfo->nr_frags; + u32 sinfo_frags = xdp_get_shared_info_from_buff(xdp)->nr_frags; + u32 nr_frags = rx_ring->nr_frags + 1; + u32 idx = rx_ring->first_desc; u32 cnt = rx_ring->count; struct ice_rx_buf *buf; for (int i = 0; i < nr_frags; i++) { - buf = &rx_ring->rx_buf[first]; + buf = &rx_ring->rx_buf[idx]; buf->act = act; - if (++first == cnt) - first = 0; + if (++idx == cnt) + idx = 0; + } + + /* adjust pagecnt_bias on frags freed by XDP prog */ + if (sinfo_frags < rx_ring->nr_frags && act == ICE_XDP_CONSUMED) { + u32 delta = rx_ring->nr_frags - sinfo_frags; + + while (delta) { + if (idx == 0) + idx = cnt - 1; + else + idx--; + buf = &rx_ring->rx_buf[idx]; + buf->pagecnt_bias--; + delta--; + } } } diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 2a3f0834e1391..307c609137bdf 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -826,7 +826,8 @@ ice_add_xsk_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *first, } __skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++, - virt_to_page(xdp->data_hard_start), 0, size); + virt_to_page(xdp->data_hard_start), + XDP_PACKET_HEADROOM, size); sinfo->xdp_frags_size += size; xsk_buff_add_frag(xdp); @@ -897,7 +898,6 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget) if (!first) { first = xdp; - xdp_buff_clear_frags_flag(first); } else if (ice_add_xsk_frag(rx_ring, first, xdp, size)) { break; } diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 21c3f9b015c85..aca17082b9eca 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -614,12 +614,38 @@ static void mvpp23_bm_set_8pool_mode(struct mvpp2 *priv) mvpp2_write(priv, MVPP22_BM_POOL_BASE_ADDR_HIGH_REG, val); } +/* Cleanup pool before actual initialization in the OS */ +static void mvpp2_bm_pool_cleanup(struct mvpp2 *priv, int pool_id) +{ + unsigned int thread = mvpp2_cpu_to_thread(priv, get_cpu()); + u32 val; + int i; + + /* Drain the BM from all possible residues left by firmware */ + for (i = 0; i < MVPP2_BM_POOL_SIZE_MAX; i++) + mvpp2_thread_read(priv, thread, MVPP2_BM_PHY_ALLOC_REG(pool_id)); + + put_cpu(); + + /* Stop the BM pool */ + val = mvpp2_read(priv, MVPP2_BM_POOL_CTRL_REG(pool_id)); + val |= MVPP2_BM_STOP_MASK; + mvpp2_write(priv, MVPP2_BM_POOL_CTRL_REG(pool_id), val); +} + static int mvpp2_bm_init(struct device *dev, struct mvpp2 *priv) { enum dma_data_direction dma_dir = DMA_FROM_DEVICE; int i, err, poolnum = MVPP2_BM_POOLS_NUM; struct mvpp2_port *port; + if (priv->percpu_pools) + poolnum = mvpp2_get_nrxqs(priv) * 2; + + /* Clean up the pool state in case it contains stale state */ + for (i = 0; i < poolnum; i++) + mvpp2_bm_pool_cleanup(priv, i); + if (priv->percpu_pools) { for (i = 0; i < priv->port_count; i++) { port = priv->port_list[i]; @@ -629,7 +655,6 @@ static int mvpp2_bm_init(struct device *dev, struct mvpp2 *priv) } } - poolnum = mvpp2_get_nrxqs(priv) * 2; for (i = 0; i < poolnum; i++) { /* the pool in use */ int pn = i / (poolnum / 2); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 7013e1c8741a3..55efb932ab2cf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -1921,6 +1921,7 @@ static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status, { const char *namep = mlx5_command_str(opcode); struct mlx5_cmd_stats *stats; + unsigned long flags; if (!err || !(strcmp(namep, "unknown command opcode"))) return; @@ -1928,7 +1929,7 @@ static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status, stats = xa_load(&dev->cmd.stats, opcode); if (!stats) return; - spin_lock_irq(&stats->lock); + spin_lock_irqsave(&stats->lock, flags); stats->failed++; if (err < 0) stats->last_failed_errno = -err; @@ -1937,7 +1938,7 @@ static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status, stats->last_failed_mbox_status = status; stats->last_failed_syndrome = syndrome; } - spin_unlock_irq(&stats->lock); + spin_unlock_irqrestore(&stats->lock, flags); } /* preserve -EREMOTEIO for outbox.status != OK, otherwise return err as is */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c index e1283531e0b81..671adbad0a40f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c @@ -436,6 +436,7 @@ static int fs_any_create_groups(struct mlx5e_flow_table *ft) in = kvzalloc(inlen, GFP_KERNEL); if (!in || !ft->g) { kfree(ft->g); + ft->g = NULL; kvfree(in); return -ENOMEM; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index e097f336e1c4a..30507b7c2fb17 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -1062,8 +1062,8 @@ void mlx5e_build_sq_param(struct mlx5_core_dev *mdev, void *wq = MLX5_ADDR_OF(sqc, sqc, wq); bool allow_swp; - allow_swp = - mlx5_geneve_tx_allowed(mdev) || !!mlx5_ipsec_device_caps(mdev); + allow_swp = mlx5_geneve_tx_allowed(mdev) || + (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_CRYPTO); mlx5e_build_sq_param_common(mdev, param); MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); MLX5_SET(sqc, sqc, allow_swp, allow_swp); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index af3928eddafd1..803035d4e5976 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -213,7 +213,7 @@ static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq, mlx5e_ptpsq_mark_ts_cqes_undelivered(ptpsq, hwtstamp); out: napi_consume_skb(skb, budget); - md_buff[*md_buff_sz++] = metadata_id; + md_buff[(*md_buff_sz)++] = metadata_id; if (unlikely(mlx5e_ptp_metadata_map_unhealthy(&ptpsq->metadata_map)) && !test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) queue_work(ptpsq->txqsq.priv->wq, &ptpsq->report_unhealthy_work); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index 5834e47e72d82..e2ffc572de188 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -336,12 +336,17 @@ void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry, /* iv len */ aes_gcm->icv_len = x->aead->alg_icv_len; + attrs->dir = x->xso.dir; + /* esn */ if (x->props.flags & XFRM_STATE_ESN) { attrs->replay_esn.trigger = true; attrs->replay_esn.esn = sa_entry->esn_state.esn; attrs->replay_esn.esn_msb = sa_entry->esn_state.esn_msb; attrs->replay_esn.overlap = sa_entry->esn_state.overlap; + if (attrs->dir == XFRM_DEV_OFFLOAD_OUT) + goto skip_replay_window; + switch (x->replay_esn->replay_window) { case 32: attrs->replay_esn.replay_window = @@ -365,7 +370,7 @@ void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry, } } - attrs->dir = x->xso.dir; +skip_replay_window: /* spi */ attrs->spi = be32_to_cpu(x->id.spi); @@ -501,7 +506,8 @@ static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev, return -EINVAL; } - if (x->replay_esn && x->replay_esn->replay_window != 32 && + if (x->replay_esn && x->xso.dir == XFRM_DEV_OFFLOAD_IN && + x->replay_esn->replay_window != 32 && x->replay_esn->replay_window != 64 && x->replay_esn->replay_window != 128 && x->replay_esn->replay_window != 256) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index bb7f86c993e55..e66f486faafe1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -254,11 +254,13 @@ static int arfs_create_groups(struct mlx5e_flow_table *ft, ft->g = kcalloc(MLX5E_ARFS_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL); - in = kvzalloc(inlen, GFP_KERNEL); - if (!in || !ft->g) { - kfree(ft->g); - kvfree(in); + if (!ft->g) return -ENOMEM; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_free_g; } mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); @@ -278,7 +280,7 @@ static int arfs_create_groups(struct mlx5e_flow_table *ft, break; default: err = -EINVAL; - goto out; + goto err_free_in; } switch (type) { @@ -300,7 +302,7 @@ static int arfs_create_groups(struct mlx5e_flow_table *ft, break; default: err = -EINVAL; - goto out; + goto err_free_in; } MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); @@ -309,7 +311,7 @@ static int arfs_create_groups(struct mlx5e_flow_table *ft, MLX5_SET_CFG(in, end_flow_index, ix - 1); ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); if (IS_ERR(ft->g[ft->num_groups])) - goto err; + goto err_clean_group; ft->num_groups++; memset(in, 0, inlen); @@ -318,18 +320,20 @@ static int arfs_create_groups(struct mlx5e_flow_table *ft, MLX5_SET_CFG(in, end_flow_index, ix - 1); ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); if (IS_ERR(ft->g[ft->num_groups])) - goto err; + goto err_clean_group; ft->num_groups++; kvfree(in); return 0; -err: +err_clean_group: err = PTR_ERR(ft->g[ft->num_groups]); ft->g[ft->num_groups] = NULL; -out: +err_free_in: kvfree(in); - +err_free_g: + kfree(ft->g); + ft->g = NULL; return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 25e44ee5121a9..dc9b157a44993 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -2012,9 +2012,10 @@ static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow, list_for_each_entry_safe(peer_flow, tmp, &flow->peer_flows, peer_flows) { if (peer_index != mlx5_get_dev_index(peer_flow->priv->mdev)) continue; + + list_del(&peer_flow->peer_flows); if (refcount_dec_and_test(&peer_flow->refcnt)) { mlx5e_tc_del_fdb_flow(peer_flow->priv, peer_flow); - list_del(&peer_flow->peer_flows); kfree(peer_flow); } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c index 7a01714b37800..22dd30cf8033f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c @@ -78,9 +78,12 @@ mlx5_esw_bridge_mdb_flow_create(u16 esw_owner_vhca_id, struct mlx5_esw_bridge_md xa_for_each(&entry->ports, idx, port) { dests[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dests[i].ft = port->mcast.ft; + if (port->vport_num == MLX5_VPORT_UPLINK) + dests[i].ft->flags |= MLX5_FLOW_TABLE_UPLINK_VPORT; i++; } + rule_spec->flow_context.flags |= FLOW_CONTEXT_UPLINK_HAIRPIN_EN; rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; dmac_v = MLX5_ADDR_OF(fte_match_param, rule_spec->match_value, outer_headers.dmac_47_16); ether_addr_copy(dmac_v, entry->key.addr); @@ -585,10 +588,7 @@ mlx5_esw_bridge_mcast_vlan_flow_create(u16 vlan_proto, struct mlx5_esw_bridge_po if (!rule_spec) return ERR_PTR(-ENOMEM); - if (MLX5_CAP_ESW_FLOWTABLE(bridge->br_offloads->esw->dev, flow_source) && - port->vport_num == MLX5_VPORT_UPLINK) - rule_spec->flow_context.flow_source = - MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT; + rule_spec->flow_context.flags |= FLOW_CONTEXT_UPLINK_HAIRPIN_EN; rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; @@ -660,15 +660,11 @@ mlx5_esw_bridge_mcast_fwd_flow_create(struct mlx5_esw_bridge_port *port) if (!rule_spec) return ERR_PTR(-ENOMEM); - if (MLX5_CAP_ESW_FLOWTABLE(bridge->br_offloads->esw->dev, flow_source) && - port->vport_num == MLX5_VPORT_UPLINK) - rule_spec->flow_context.flow_source = - MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT; - if (MLX5_CAP_ESW(bridge->br_offloads->esw->dev, merged_eswitch)) { dest.vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID; dest.vport.vhca_id = port->esw_owner_vhca_id; } + rule_spec->flow_context.flags |= FLOW_CONTEXT_UPLINK_HAIRPIN_EN; handle = mlx5_add_flow_rules(port->mcast.ft, rule_spec, &flow_act, &dest, 1); kvfree(rule_spec); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index a4b9253316618..b29299c49ab3d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -566,6 +566,8 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, fte->flow_context.flow_tag); MLX5_SET(flow_context, in_flow_context, flow_source, fte->flow_context.flow_source); + MLX5_SET(flow_context, in_flow_context, uplink_hairpin_en, + !!(fte->flow_context.flags & FLOW_CONTEXT_UPLINK_HAIRPIN_EN)); MLX5_SET(flow_context, in_flow_context, extended_destination, extended_dest); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c index 40c7be1240416..58bd749b5e4de 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c @@ -98,7 +98,7 @@ static int create_aso_cq(struct mlx5_aso_cq *cq, void *cqc_data) mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas)); - MLX5_SET(cqc, cqc, cq_period_mode, DIM_CQ_PERIOD_MODE_START_FROM_EQE); + MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index); MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c index 5b83da08692d6..90c38cbbde181 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c @@ -781,6 +781,7 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, switch (action_type) { case DR_ACTION_TYP_DROP: attr.final_icm_addr = nic_dmn->drop_icm_addr; + attr.hit_gvmi = nic_dmn->drop_icm_addr >> 48; break; case DR_ACTION_TYP_FT: dest_action = action; @@ -866,11 +867,17 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, action->sampler->tx_icm_addr; break; case DR_ACTION_TYP_VPORT: - attr.hit_gvmi = action->vport->caps->vhca_gvmi; - dest_action = action; - attr.final_icm_addr = rx_rule ? - action->vport->caps->icm_address_rx : - action->vport->caps->icm_address_tx; + if (unlikely(rx_rule && action->vport->caps->num == MLX5_VPORT_UPLINK)) { + /* can't go to uplink on RX rule - dropping instead */ + attr.final_icm_addr = nic_dmn->drop_icm_addr; + attr.hit_gvmi = nic_dmn->drop_icm_addr >> 48; + } else { + attr.hit_gvmi = action->vport->caps->vhca_gvmi; + dest_action = action; + attr.final_icm_addr = rx_rule ? + action->vport->caps->icm_address_rx : + action->vport->caps->icm_address_tx; + } break; case DR_ACTION_TYP_POP_VLAN: if (!rx_rule && !(dmn->ste_ctx->actions_caps & diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 1bfcf673b3ce7..292857c0e601f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -4356,6 +4356,28 @@ dma_map_err: return NETDEV_TX_OK; } +/** + * stmmac_has_ip_ethertype() - Check if packet has IP ethertype + * @skb: socket buffer to check + * + * Check if a packet has an ethertype that will trigger the IP header checks + * and IP/TCP checksum engine of the stmmac core. + * + * Return: true if the ethertype can trigger the checksum engine, false + * otherwise + */ +static bool stmmac_has_ip_ethertype(struct sk_buff *skb) +{ + int depth = 0; + __be16 proto; + + proto = __vlan_get_protocol(skb, eth_header_parse_protocol(skb), + &depth); + + return (depth <= ETH_HLEN) && + (proto == htons(ETH_P_IP) || proto == htons(ETH_P_IPV6)); +} + /** * stmmac_xmit - Tx entry point of the driver * @skb : the socket buffer @@ -4417,6 +4439,20 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) WARN_ON(tx_q->tx_skbuff[first_entry]); csum_insertion = (skb->ip_summed == CHECKSUM_PARTIAL); + /* DWMAC IPs can be synthesized to support tx coe only for a few tx + * queues. In that case, checksum offloading for those queues that don't + * support tx coe needs to fallback to software checksum calculation. + * + * Packets that won't trigger the COE e.g. most DSA-tagged packets will + * also have to be checksummed in software. + */ + if (csum_insertion && + (priv->plat->tx_queues_cfg[queue].coe_unsupported || + !stmmac_has_ip_ethertype(skb))) { + if (unlikely(skb_checksum_help(skb))) + goto dma_map_err; + csum_insertion = !csum_insertion; + } if (likely(priv->extend_desc)) desc = (struct dma_desc *)(tx_q->dma_etx + entry); @@ -4972,7 +5008,7 @@ static void stmmac_dispatch_skb_zc(struct stmmac_priv *priv, u32 queue, stmmac_rx_vlan(priv->dev, skb); skb->protocol = eth_type_trans(skb, priv->dev); - if (unlikely(!coe)) + if (unlikely(!coe) || !stmmac_has_ip_ethertype(skb)) skb_checksum_none_assert(skb); else skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -5488,7 +5524,7 @@ drain_data: stmmac_rx_vlan(priv->dev, skb); skb->protocol = eth_type_trans(skb, priv->dev); - if (unlikely(!coe)) + if (unlikely(!coe) || !stmmac_has_ip_ethertype(skb)) skb_checksum_none_assert(skb); else skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -7405,6 +7441,9 @@ int stmmac_dvr_probe(struct device *device, dev_err(priv->device, "unable to bring out of ahb reset: %pe\n", ERR_PTR(ret)); + /* Wait a bit for the reset to take effect */ + udelay(10); + /* Init MAC and get the capabilities */ ret = stmmac_hw_init(priv); if (ret) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 2f0678f15fb7e..30d5e635190e6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -276,6 +276,9 @@ static int stmmac_mtl_setup(struct platform_device *pdev, plat->tx_queues_cfg[queue].use_prio = true; } + plat->tx_queues_cfg[queue].coe_unsupported = + of_property_read_bool(q_node, "snps,coe-unsupported"); + queue++; } if (queue != plat->tx_queues_to_use) { diff --git a/drivers/net/fjes/fjes_hw.c b/drivers/net/fjes/fjes_hw.c index 704e949484d0c..b9b5554ea8620 100644 --- a/drivers/net/fjes/fjes_hw.c +++ b/drivers/net/fjes/fjes_hw.c @@ -221,21 +221,25 @@ static int fjes_hw_setup(struct fjes_hw *hw) mem_size = FJES_DEV_REQ_BUF_SIZE(hw->max_epid); hw->hw_info.req_buf = kzalloc(mem_size, GFP_KERNEL); - if (!(hw->hw_info.req_buf)) - return -ENOMEM; + if (!(hw->hw_info.req_buf)) { + result = -ENOMEM; + goto free_ep_info; + } hw->hw_info.req_buf_size = mem_size; mem_size = FJES_DEV_RES_BUF_SIZE(hw->max_epid); hw->hw_info.res_buf = kzalloc(mem_size, GFP_KERNEL); - if (!(hw->hw_info.res_buf)) - return -ENOMEM; + if (!(hw->hw_info.res_buf)) { + result = -ENOMEM; + goto free_req_buf; + } hw->hw_info.res_buf_size = mem_size; result = fjes_hw_alloc_shared_status_region(hw); if (result) - return result; + goto free_res_buf; hw->hw_info.buffer_share_bit = 0; hw->hw_info.buffer_unshare_reserve_bit = 0; @@ -246,11 +250,11 @@ static int fjes_hw_setup(struct fjes_hw *hw) result = fjes_hw_alloc_epbuf(&buf_pair->tx); if (result) - return result; + goto free_epbuf; result = fjes_hw_alloc_epbuf(&buf_pair->rx); if (result) - return result; + goto free_epbuf; spin_lock_irqsave(&hw->rx_status_lock, flags); fjes_hw_setup_epbuf(&buf_pair->tx, mac, @@ -273,6 +277,25 @@ static int fjes_hw_setup(struct fjes_hw *hw) fjes_hw_init_command_registers(hw, ¶m); return 0; + +free_epbuf: + for (epidx = 0; epidx < hw->max_epid ; epidx++) { + if (epidx == hw->my_epid) + continue; + fjes_hw_free_epbuf(&hw->ep_shm_info[epidx].tx); + fjes_hw_free_epbuf(&hw->ep_shm_info[epidx].rx); + } + fjes_hw_free_shared_status_region(hw); +free_res_buf: + kfree(hw->hw_info.res_buf); + hw->hw_info.res_buf = NULL; +free_req_buf: + kfree(hw->hw_info.req_buf); + hw->hw_info.req_buf = NULL; +free_ep_info: + kfree(hw->ep_shm_info); + hw->ep_shm_info = NULL; + return result; } static void fjes_hw_cleanup(struct fjes_hw *hw) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 706ea5263e879..cd15d7b380ab5 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -44,7 +44,7 @@ static unsigned int ring_size __ro_after_init = 128; module_param(ring_size, uint, 0444); -MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)"); +MODULE_PARM_DESC(ring_size, "Ring buffer size (# of 4K pages)"); unsigned int netvsc_ring_bytes __ro_after_init; static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE | @@ -2805,7 +2805,7 @@ static int __init netvsc_drv_init(void) pr_info("Increased ring_size to %u (min allowed)\n", ring_size); } - netvsc_ring_bytes = ring_size * PAGE_SIZE; + netvsc_ring_bytes = VMBUS_RING_SIZE(ring_size * 4096); register_netdevice_notifier(&netvsc_netdev_notifier); diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index dfd5f8e78e291..27ca25bbd1411 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -120,6 +120,11 @@ */ #define LAN8814_1PPM_FORMAT 17179 +#define PTP_RX_VERSION 0x0248 +#define PTP_TX_VERSION 0x0288 +#define PTP_MAX_VERSION(x) (((x) & GENMASK(7, 0)) << 8) +#define PTP_MIN_VERSION(x) ((x) & GENMASK(7, 0)) + #define PTP_RX_MOD 0x024F #define PTP_RX_MOD_BAD_UDPV4_CHKSUM_FORCE_FCS_DIS_ BIT(3) #define PTP_RX_TIMESTAMP_EN 0x024D @@ -3125,6 +3130,12 @@ static void lan8814_ptp_init(struct phy_device *phydev) lanphy_write_page_reg(phydev, 5, PTP_TX_PARSE_IP_ADDR_EN, 0); lanphy_write_page_reg(phydev, 5, PTP_RX_PARSE_IP_ADDR_EN, 0); + /* Disable checking for minorVersionPTP field */ + lanphy_write_page_reg(phydev, 5, PTP_RX_VERSION, + PTP_MAX_VERSION(0xff) | PTP_MIN_VERSION(0x0)); + lanphy_write_page_reg(phydev, 5, PTP_TX_VERSION, + PTP_MAX_VERSION(0xff) | PTP_MIN_VERSION(0x0)); + skb_queue_head_init(&ptp_priv->tx_queue); skb_queue_head_init(&ptp_priv->rx_queue); INIT_LIST_HEAD(&ptp_priv->rx_ts_list); diff --git a/drivers/net/tun.c b/drivers/net/tun.c index afa5497f7c35c..4a4f8c8e79fa1 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1630,13 +1630,19 @@ static int tun_xdp_act(struct tun_struct *tun, struct bpf_prog *xdp_prog, switch (act) { case XDP_REDIRECT: err = xdp_do_redirect(tun->dev, xdp, xdp_prog); - if (err) + if (err) { + dev_core_stats_rx_dropped_inc(tun->dev); return err; + } + dev_sw_netstats_rx_add(tun->dev, xdp->data_end - xdp->data); break; case XDP_TX: err = tun_xdp_tx(tun->dev, xdp); - if (err < 0) + if (err < 0) { + dev_core_stats_rx_dropped_inc(tun->dev); return err; + } + dev_sw_netstats_rx_add(tun->dev, xdp->data_end - xdp->data); break; case XDP_PASS: break; diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c index ef5baee6c9c52..1df60d51905b3 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* - * Copyright (C) 2018-2023 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation */ #include #include "iwl-drv.h" @@ -1094,7 +1094,7 @@ static int iwl_dbg_tlv_override_trig_node(struct iwl_fw_runtime *fwrt, node_trig = (void *)node_tlv->data; } - memcpy(node_trig->data + offset, trig->data, trig_data_len); + memcpy((u8 *)node_trig->data + offset, trig->data, trig_data_len); node_tlv->length = cpu_to_le32(size); if (policy & IWL_FW_INI_APPLY_POLICY_OVERRIDE_CFG) { diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 919cc53bc02e3..bceb27b1baa18 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -1322,12 +1322,12 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) * value of the frequency. In such a case, do not abort but * configure the hardware to the desired frequency forcefully. */ - forced = opp_table->rate_clk_single != target_freq; + forced = opp_table->rate_clk_single != freq; } - ret = _set_opp(dev, opp_table, opp, &target_freq, forced); + ret = _set_opp(dev, opp_table, opp, &freq, forced); - if (target_freq) + if (freq) dev_pm_opp_put(opp); put_opp_table: diff --git a/drivers/parisc/power.c b/drivers/parisc/power.c index 332bcc0053a5e..498bae2e3403c 100644 --- a/drivers/parisc/power.c +++ b/drivers/parisc/power.c @@ -238,7 +238,7 @@ static int __init power_init(void) if (running_on_qemu && soft_power_reg) register_sys_off_handler(SYS_OFF_MODE_POWER_OFF, SYS_OFF_PRIO_DEFAULT, qemu_power_off, (void *)soft_power_reg); - else + if (!running_on_qemu || soft_power_reg) power_task = kthread_run(kpowerswd, (void*)soft_power_reg, KTHREAD_NAME); if (IS_ERR(power_task)) { diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c index 33ab207493e3e..33bb58dc3f78c 100644 --- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c +++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c @@ -23,23 +23,23 @@ static int (*uncore_read)(struct uncore_data *data, unsigned int *min, unsigned static int (*uncore_write)(struct uncore_data *data, unsigned int input, unsigned int min_max); static int (*uncore_read_freq)(struct uncore_data *data, unsigned int *freq); -static ssize_t show_domain_id(struct device *dev, struct device_attribute *attr, char *buf) +static ssize_t show_domain_id(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - struct uncore_data *data = container_of(attr, struct uncore_data, domain_id_dev_attr); + struct uncore_data *data = container_of(attr, struct uncore_data, domain_id_kobj_attr); return sprintf(buf, "%u\n", data->domain_id); } -static ssize_t show_fabric_cluster_id(struct device *dev, struct device_attribute *attr, char *buf) +static ssize_t show_fabric_cluster_id(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - struct uncore_data *data = container_of(attr, struct uncore_data, fabric_cluster_id_dev_attr); + struct uncore_data *data = container_of(attr, struct uncore_data, fabric_cluster_id_kobj_attr); return sprintf(buf, "%u\n", data->cluster_id); } -static ssize_t show_package_id(struct device *dev, struct device_attribute *attr, char *buf) +static ssize_t show_package_id(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - struct uncore_data *data = container_of(attr, struct uncore_data, package_id_dev_attr); + struct uncore_data *data = container_of(attr, struct uncore_data, package_id_kobj_attr); return sprintf(buf, "%u\n", data->package_id); } @@ -97,30 +97,30 @@ static ssize_t show_perf_status_freq_khz(struct uncore_data *data, char *buf) } #define store_uncore_min_max(name, min_max) \ - static ssize_t store_##name(struct device *dev, \ - struct device_attribute *attr, \ + static ssize_t store_##name(struct kobject *kobj, \ + struct kobj_attribute *attr, \ const char *buf, size_t count) \ { \ - struct uncore_data *data = container_of(attr, struct uncore_data, name##_dev_attr);\ + struct uncore_data *data = container_of(attr, struct uncore_data, name##_kobj_attr);\ \ return store_min_max_freq_khz(data, buf, count, \ min_max); \ } #define show_uncore_min_max(name, min_max) \ - static ssize_t show_##name(struct device *dev, \ - struct device_attribute *attr, char *buf)\ + static ssize_t show_##name(struct kobject *kobj, \ + struct kobj_attribute *attr, char *buf)\ { \ - struct uncore_data *data = container_of(attr, struct uncore_data, name##_dev_attr);\ + struct uncore_data *data = container_of(attr, struct uncore_data, name##_kobj_attr);\ \ return show_min_max_freq_khz(data, buf, min_max); \ } #define show_uncore_perf_status(name) \ - static ssize_t show_##name(struct device *dev, \ - struct device_attribute *attr, char *buf)\ + static ssize_t show_##name(struct kobject *kobj, \ + struct kobj_attribute *attr, char *buf)\ { \ - struct uncore_data *data = container_of(attr, struct uncore_data, name##_dev_attr);\ + struct uncore_data *data = container_of(attr, struct uncore_data, name##_kobj_attr);\ \ return show_perf_status_freq_khz(data, buf); \ } @@ -134,11 +134,11 @@ show_uncore_min_max(max_freq_khz, 1); show_uncore_perf_status(current_freq_khz); #define show_uncore_data(member_name) \ - static ssize_t show_##member_name(struct device *dev, \ - struct device_attribute *attr, char *buf)\ + static ssize_t show_##member_name(struct kobject *kobj, \ + struct kobj_attribute *attr, char *buf)\ { \ struct uncore_data *data = container_of(attr, struct uncore_data,\ - member_name##_dev_attr);\ + member_name##_kobj_attr);\ \ return sysfs_emit(buf, "%u\n", \ data->member_name); \ @@ -149,29 +149,29 @@ show_uncore_data(initial_max_freq_khz); #define init_attribute_rw(_name) \ do { \ - sysfs_attr_init(&data->_name##_dev_attr.attr); \ - data->_name##_dev_attr.show = show_##_name; \ - data->_name##_dev_attr.store = store_##_name; \ - data->_name##_dev_attr.attr.name = #_name; \ - data->_name##_dev_attr.attr.mode = 0644; \ + sysfs_attr_init(&data->_name##_kobj_attr.attr); \ + data->_name##_kobj_attr.show = show_##_name; \ + data->_name##_kobj_attr.store = store_##_name; \ + data->_name##_kobj_attr.attr.name = #_name; \ + data->_name##_kobj_attr.attr.mode = 0644; \ } while (0) #define init_attribute_ro(_name) \ do { \ - sysfs_attr_init(&data->_name##_dev_attr.attr); \ - data->_name##_dev_attr.show = show_##_name; \ - data->_name##_dev_attr.store = NULL; \ - data->_name##_dev_attr.attr.name = #_name; \ - data->_name##_dev_attr.attr.mode = 0444; \ + sysfs_attr_init(&data->_name##_kobj_attr.attr); \ + data->_name##_kobj_attr.show = show_##_name; \ + data->_name##_kobj_attr.store = NULL; \ + data->_name##_kobj_attr.attr.name = #_name; \ + data->_name##_kobj_attr.attr.mode = 0444; \ } while (0) #define init_attribute_root_ro(_name) \ do { \ - sysfs_attr_init(&data->_name##_dev_attr.attr); \ - data->_name##_dev_attr.show = show_##_name; \ - data->_name##_dev_attr.store = NULL; \ - data->_name##_dev_attr.attr.name = #_name; \ - data->_name##_dev_attr.attr.mode = 0400; \ + sysfs_attr_init(&data->_name##_kobj_attr.attr); \ + data->_name##_kobj_attr.show = show_##_name; \ + data->_name##_kobj_attr.store = NULL; \ + data->_name##_kobj_attr.attr.name = #_name; \ + data->_name##_kobj_attr.attr.mode = 0400; \ } while (0) static int create_attr_group(struct uncore_data *data, char *name) @@ -186,21 +186,21 @@ static int create_attr_group(struct uncore_data *data, char *name) if (data->domain_id != UNCORE_DOMAIN_ID_INVALID) { init_attribute_root_ro(domain_id); - data->uncore_attrs[index++] = &data->domain_id_dev_attr.attr; + data->uncore_attrs[index++] = &data->domain_id_kobj_attr.attr; init_attribute_root_ro(fabric_cluster_id); - data->uncore_attrs[index++] = &data->fabric_cluster_id_dev_attr.attr; + data->uncore_attrs[index++] = &data->fabric_cluster_id_kobj_attr.attr; init_attribute_root_ro(package_id); - data->uncore_attrs[index++] = &data->package_id_dev_attr.attr; + data->uncore_attrs[index++] = &data->package_id_kobj_attr.attr; } - data->uncore_attrs[index++] = &data->max_freq_khz_dev_attr.attr; - data->uncore_attrs[index++] = &data->min_freq_khz_dev_attr.attr; - data->uncore_attrs[index++] = &data->initial_min_freq_khz_dev_attr.attr; - data->uncore_attrs[index++] = &data->initial_max_freq_khz_dev_attr.attr; + data->uncore_attrs[index++] = &data->max_freq_khz_kobj_attr.attr; + data->uncore_attrs[index++] = &data->min_freq_khz_kobj_attr.attr; + data->uncore_attrs[index++] = &data->initial_min_freq_khz_kobj_attr.attr; + data->uncore_attrs[index++] = &data->initial_max_freq_khz_kobj_attr.attr; ret = uncore_read_freq(data, &freq); if (!ret) - data->uncore_attrs[index++] = &data->current_freq_khz_dev_attr.attr; + data->uncore_attrs[index++] = &data->current_freq_khz_kobj_attr.attr; data->uncore_attrs[index] = NULL; diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.h b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.h index 7afb69977c7e8..0e5bf507e5552 100644 --- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.h +++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.h @@ -26,14 +26,14 @@ * @instance_id: Unique instance id to append to directory name * @name: Sysfs entry name for this instance * @uncore_attr_group: Attribute group storage - * @max_freq_khz_dev_attr: Storage for device attribute max_freq_khz - * @mix_freq_khz_dev_attr: Storage for device attribute min_freq_khz - * @initial_max_freq_khz_dev_attr: Storage for device attribute initial_max_freq_khz - * @initial_min_freq_khz_dev_attr: Storage for device attribute initial_min_freq_khz - * @current_freq_khz_dev_attr: Storage for device attribute current_freq_khz - * @domain_id_dev_attr: Storage for device attribute domain_id - * @fabric_cluster_id_dev_attr: Storage for device attribute fabric_cluster_id - * @package_id_dev_attr: Storage for device attribute package_id + * @max_freq_khz_kobj_attr: Storage for kobject attribute max_freq_khz + * @mix_freq_khz_kobj_attr: Storage for kobject attribute min_freq_khz + * @initial_max_freq_khz_kobj_attr: Storage for kobject attribute initial_max_freq_khz + * @initial_min_freq_khz_kobj_attr: Storage for kobject attribute initial_min_freq_khz + * @current_freq_khz_kobj_attr: Storage for kobject attribute current_freq_khz + * @domain_id_kobj_attr: Storage for kobject attribute domain_id + * @fabric_cluster_id_kobj_attr: Storage for kobject attribute fabric_cluster_id + * @package_id_kobj_attr: Storage for kobject attribute package_id * @uncore_attrs: Attribute storage for group creation * * This structure is used to encapsulate all data related to uncore sysfs @@ -53,14 +53,14 @@ struct uncore_data { char name[32]; struct attribute_group uncore_attr_group; - struct device_attribute max_freq_khz_dev_attr; - struct device_attribute min_freq_khz_dev_attr; - struct device_attribute initial_max_freq_khz_dev_attr; - struct device_attribute initial_min_freq_khz_dev_attr; - struct device_attribute current_freq_khz_dev_attr; - struct device_attribute domain_id_dev_attr; - struct device_attribute fabric_cluster_id_dev_attr; - struct device_attribute package_id_dev_attr; + struct kobj_attribute max_freq_khz_kobj_attr; + struct kobj_attribute min_freq_khz_kobj_attr; + struct kobj_attribute initial_max_freq_khz_kobj_attr; + struct kobj_attribute initial_min_freq_khz_kobj_attr; + struct kobj_attribute current_freq_khz_kobj_attr; + struct kobj_attribute domain_id_kobj_attr; + struct kobj_attribute fabric_cluster_id_kobj_attr; + struct kobj_attribute package_id_kobj_attr; struct attribute *uncore_attrs[9]; }; diff --git a/drivers/platform/x86/p2sb.c b/drivers/platform/x86/p2sb.c index 1cf2471d54dde..17cc4b45e0239 100644 --- a/drivers/platform/x86/p2sb.c +++ b/drivers/platform/x86/p2sb.c @@ -26,6 +26,21 @@ static const struct x86_cpu_id p2sb_cpu_ids[] = { {} }; +/* + * Cache BAR0 of P2SB device functions 0 to 7. + * TODO: The constant 8 is the number of functions that PCI specification + * defines. Same definitions exist tree-wide. Unify this definition and + * the other definitions then move to include/uapi/linux/pci.h. + */ +#define NR_P2SB_RES_CACHE 8 + +struct p2sb_res_cache { + u32 bus_dev_id; + struct resource res; +}; + +static struct p2sb_res_cache p2sb_resources[NR_P2SB_RES_CACHE]; + static int p2sb_get_devfn(unsigned int *devfn) { unsigned int fn = P2SB_DEVFN_DEFAULT; @@ -39,8 +54,16 @@ static int p2sb_get_devfn(unsigned int *devfn) return 0; } +static bool p2sb_valid_resource(struct resource *res) +{ + if (res->flags) + return true; + + return false; +} + /* Copy resource from the first BAR of the device in question */ -static int p2sb_read_bar0(struct pci_dev *pdev, struct resource *mem) +static void p2sb_read_bar0(struct pci_dev *pdev, struct resource *mem) { struct resource *bar0 = &pdev->resource[0]; @@ -56,49 +79,66 @@ static int p2sb_read_bar0(struct pci_dev *pdev, struct resource *mem) mem->end = bar0->end; mem->flags = bar0->flags; mem->desc = bar0->desc; - - return 0; } -static int p2sb_scan_and_read(struct pci_bus *bus, unsigned int devfn, struct resource *mem) +static void p2sb_scan_and_cache_devfn(struct pci_bus *bus, unsigned int devfn) { + struct p2sb_res_cache *cache = &p2sb_resources[PCI_FUNC(devfn)]; struct pci_dev *pdev; - int ret; pdev = pci_scan_single_device(bus, devfn); if (!pdev) - return -ENODEV; + return; - ret = p2sb_read_bar0(pdev, mem); + p2sb_read_bar0(pdev, &cache->res); + cache->bus_dev_id = bus->dev.id; pci_stop_and_remove_bus_device(pdev); - return ret; } -/** - * p2sb_bar - Get Primary to Sideband (P2SB) bridge device BAR - * @bus: PCI bus to communicate with - * @devfn: PCI slot and function to communicate with - * @mem: memory resource to be filled in - * - * The BIOS prevents the P2SB device from being enumerated by the PCI - * subsystem, so we need to unhide and hide it back to lookup the BAR. - * - * if @bus is NULL, the bus 0 in domain 0 will be used. - * If @devfn is 0, it will be replaced by devfn of the P2SB device. - * - * Caller must provide a valid pointer to @mem. - * - * Locking is handled by pci_rescan_remove_lock mutex. - * - * Return: - * 0 on success or appropriate errno value on error. - */ -int p2sb_bar(struct pci_bus *bus, unsigned int devfn, struct resource *mem) +static int p2sb_scan_and_cache(struct pci_bus *bus, unsigned int devfn) +{ + unsigned int slot, fn; + + if (PCI_FUNC(devfn) == 0) { + /* + * When function number of the P2SB device is zero, scan it and + * other function numbers, and if devices are available, cache + * their BAR0s. + */ + slot = PCI_SLOT(devfn); + for (fn = 0; fn < NR_P2SB_RES_CACHE; fn++) + p2sb_scan_and_cache_devfn(bus, PCI_DEVFN(slot, fn)); + } else { + /* Scan the P2SB device and cache its BAR0 */ + p2sb_scan_and_cache_devfn(bus, devfn); + } + + if (!p2sb_valid_resource(&p2sb_resources[PCI_FUNC(devfn)].res)) + return -ENOENT; + + return 0; +} + +static struct pci_bus *p2sb_get_bus(struct pci_bus *bus) +{ + static struct pci_bus *p2sb_bus; + + bus = bus ?: p2sb_bus; + if (bus) + return bus; + + /* Assume P2SB is on the bus 0 in domain 0 */ + p2sb_bus = pci_find_bus(0, 0); + return p2sb_bus; +} + +static int p2sb_cache_resources(void) { - struct pci_dev *pdev_p2sb; unsigned int devfn_p2sb; u32 value = P2SBC_HIDE; + struct pci_bus *bus; + u16 class; int ret; /* Get devfn for P2SB device itself */ @@ -106,8 +146,17 @@ int p2sb_bar(struct pci_bus *bus, unsigned int devfn, struct resource *mem) if (ret) return ret; - /* if @bus is NULL, use bus 0 in domain 0 */ - bus = bus ?: pci_find_bus(0, 0); + bus = p2sb_get_bus(NULL); + if (!bus) + return -ENODEV; + + /* + * When a device with same devfn exists and its device class is not + * PCI_CLASS_MEMORY_OTHER for P2SB, do not touch it. + */ + pci_bus_read_config_word(bus, devfn_p2sb, PCI_CLASS_DEVICE, &class); + if (!PCI_POSSIBLE_ERROR(class) && class != PCI_CLASS_MEMORY_OTHER) + return -ENODEV; /* * Prevent concurrent PCI bus scan from seeing the P2SB device and @@ -115,17 +164,16 @@ int p2sb_bar(struct pci_bus *bus, unsigned int devfn, struct resource *mem) */ pci_lock_rescan_remove(); - /* Unhide the P2SB device, if needed */ + /* + * The BIOS prevents the P2SB device from being enumerated by the PCI + * subsystem, so we need to unhide and hide it back to lookup the BAR. + * Unhide the P2SB device here, if needed. + */ pci_bus_read_config_dword(bus, devfn_p2sb, P2SBC, &value); if (value & P2SBC_HIDE) pci_bus_write_config_dword(bus, devfn_p2sb, P2SBC, 0); - pdev_p2sb = pci_scan_single_device(bus, devfn_p2sb); - if (devfn) - ret = p2sb_scan_and_read(bus, devfn, mem); - else - ret = p2sb_read_bar0(pdev_p2sb, mem); - pci_stop_and_remove_bus_device(pdev_p2sb); + ret = p2sb_scan_and_cache(bus, devfn_p2sb); /* Hide the P2SB device, if it was hidden */ if (value & P2SBC_HIDE) @@ -133,12 +181,62 @@ int p2sb_bar(struct pci_bus *bus, unsigned int devfn, struct resource *mem) pci_unlock_rescan_remove(); - if (ret) - return ret; + return ret; +} + +/** + * p2sb_bar - Get Primary to Sideband (P2SB) bridge device BAR + * @bus: PCI bus to communicate with + * @devfn: PCI slot and function to communicate with + * @mem: memory resource to be filled in + * + * If @bus is NULL, the bus 0 in domain 0 will be used. + * If @devfn is 0, it will be replaced by devfn of the P2SB device. + * + * Caller must provide a valid pointer to @mem. + * + * Return: + * 0 on success or appropriate errno value on error. + */ +int p2sb_bar(struct pci_bus *bus, unsigned int devfn, struct resource *mem) +{ + struct p2sb_res_cache *cache; + int ret; + + bus = p2sb_get_bus(bus); + if (!bus) + return -ENODEV; + + if (!devfn) { + ret = p2sb_get_devfn(&devfn); + if (ret) + return ret; + } - if (mem->flags == 0) + cache = &p2sb_resources[PCI_FUNC(devfn)]; + if (cache->bus_dev_id != bus->dev.id) return -ENODEV; + if (!p2sb_valid_resource(&cache->res)) + return -ENOENT; + + memcpy(mem, &cache->res, sizeof(*mem)); return 0; } EXPORT_SYMBOL_GPL(p2sb_bar); + +static int __init p2sb_fs_init(void) +{ + p2sb_cache_resources(); + return 0; +} + +/* + * pci_rescan_remove_lock to avoid access to unhidden P2SB devices can + * not be locked in sysfs pci bus rescan path because of deadlock. To + * avoid the deadlock, access to P2SB devices with the lock at an early + * step in kernel initialization and cache required resources. This + * should happen after subsys_initcall which initializes PCI subsystem + * and before device_initcall which requires P2SB resources. + */ +fs_initcall(p2sb_fs_init); diff --git a/drivers/rpmsg/virtio_rpmsg_bus.c b/drivers/rpmsg/virtio_rpmsg_bus.c index 905ac7910c98f..f1af0f6746150 100644 --- a/drivers/rpmsg/virtio_rpmsg_bus.c +++ b/drivers/rpmsg/virtio_rpmsg_bus.c @@ -378,6 +378,7 @@ static void virtio_rpmsg_release_device(struct device *dev) struct rpmsg_device *rpdev = to_rpmsg_device(dev); struct virtio_rpmsg_channel *vch = to_virtio_rpmsg_channel(rpdev); + kfree(rpdev->driver_override); kfree(vch); } diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index 228fb2d11c709..7d99cd2c37a0b 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -231,7 +231,7 @@ static int cmos_read_time(struct device *dev, struct rtc_time *t) if (!pm_trace_rtc_valid()) return -EIO; - ret = mc146818_get_time(t); + ret = mc146818_get_time(t, 1000); if (ret < 0) { dev_err_ratelimited(dev, "unable to read current time\n"); return ret; @@ -292,7 +292,7 @@ static int cmos_read_alarm(struct device *dev, struct rtc_wkalrm *t) /* This not only a rtc_op, but also called directly */ if (!is_valid_irq(cmos->irq)) - return -EIO; + return -ETIMEDOUT; /* Basic alarms only support hour, minute, and seconds fields. * Some also support day and month, for alarms up to a year in @@ -307,7 +307,7 @@ static int cmos_read_alarm(struct device *dev, struct rtc_wkalrm *t) * * Use the mc146818_avoid_UIP() function to avoid this. */ - if (!mc146818_avoid_UIP(cmos_read_alarm_callback, &p)) + if (!mc146818_avoid_UIP(cmos_read_alarm_callback, 10, &p)) return -EIO; if (!(p.rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { @@ -556,8 +556,8 @@ static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t) * * Use mc146818_avoid_UIP() to avoid this. */ - if (!mc146818_avoid_UIP(cmos_set_alarm_callback, &p)) - return -EIO; + if (!mc146818_avoid_UIP(cmos_set_alarm_callback, 10, &p)) + return -ETIMEDOUT; cmos->alarm_expires = rtc_tm_to_time64(&t->time); @@ -818,18 +818,24 @@ static void rtc_wake_off(struct device *dev) } #ifdef CONFIG_X86 -/* Enable use_acpi_alarm mode for Intel platforms no earlier than 2015 */ static void use_acpi_alarm_quirks(void) { - if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_INTEL: + if (dmi_get_bios_year() < 2015) + return; + break; + case X86_VENDOR_AMD: + case X86_VENDOR_HYGON: + if (dmi_get_bios_year() < 2021) + return; + break; + default: return; - + } if (!is_hpet_enabled()) return; - if (dmi_get_bios_year() < 2015) - return; - use_acpi_alarm = true; } #else diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c index f1c09f1db044c..651bf3c279c74 100644 --- a/drivers/rtc/rtc-mc146818-lib.c +++ b/drivers/rtc/rtc-mc146818-lib.c @@ -8,26 +8,31 @@ #include #endif +#define UIP_RECHECK_DELAY 100 /* usec */ +#define UIP_RECHECK_DELAY_MS (USEC_PER_MSEC / UIP_RECHECK_DELAY) +#define UIP_RECHECK_LOOPS_MS(x) (x / UIP_RECHECK_DELAY_MS) + /* * Execute a function while the UIP (Update-in-progress) bit of the RTC is - * unset. + * unset. The timeout is configurable by the caller in ms. * * Warning: callback may be executed more then once. */ bool mc146818_avoid_UIP(void (*callback)(unsigned char seconds, void *param), + int timeout, void *param) { int i; unsigned long flags; unsigned char seconds; - for (i = 0; i < 100; i++) { + for (i = 0; UIP_RECHECK_LOOPS_MS(i) < timeout; i++) { spin_lock_irqsave(&rtc_lock, flags); /* * Check whether there is an update in progress during which the * readout is unspecified. The maximum update time is ~2ms. Poll - * every 100 usec for completion. + * for completion. * * Store the second value before checking UIP so a long lasting * NMI which happens to hit after the UIP check cannot make @@ -37,7 +42,7 @@ bool mc146818_avoid_UIP(void (*callback)(unsigned char seconds, void *param), if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) { spin_unlock_irqrestore(&rtc_lock, flags); - udelay(100); + udelay(UIP_RECHECK_DELAY); continue; } @@ -56,7 +61,7 @@ bool mc146818_avoid_UIP(void (*callback)(unsigned char seconds, void *param), */ if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) { spin_unlock_irqrestore(&rtc_lock, flags); - udelay(100); + udelay(UIP_RECHECK_DELAY); continue; } @@ -72,6 +77,10 @@ bool mc146818_avoid_UIP(void (*callback)(unsigned char seconds, void *param), } spin_unlock_irqrestore(&rtc_lock, flags); + if (UIP_RECHECK_LOOPS_MS(i) >= 100) + pr_warn("Reading current time from RTC took around %li ms\n", + UIP_RECHECK_LOOPS_MS(i)); + return true; } return false; @@ -84,7 +93,7 @@ EXPORT_SYMBOL_GPL(mc146818_avoid_UIP); */ bool mc146818_does_rtc_work(void) { - return mc146818_avoid_UIP(NULL, NULL); + return mc146818_avoid_UIP(NULL, 1000, NULL); } EXPORT_SYMBOL_GPL(mc146818_does_rtc_work); @@ -130,15 +139,27 @@ static void mc146818_get_time_callback(unsigned char seconds, void *param_in) p->ctrl = CMOS_READ(RTC_CONTROL); } -int mc146818_get_time(struct rtc_time *time) +/** + * mc146818_get_time - Get the current time from the RTC + * @time: pointer to struct rtc_time to store the current time + * @timeout: timeout value in ms + * + * This function reads the current time from the RTC and stores it in the + * provided struct rtc_time. The timeout parameter specifies the maximum + * time to wait for the RTC to become ready. + * + * Return: 0 on success, -ETIMEDOUT if the RTC did not become ready within + * the specified timeout, or another error code if an error occurred. + */ +int mc146818_get_time(struct rtc_time *time, int timeout) { struct mc146818_get_time_callback_param p = { .time = time }; - if (!mc146818_avoid_UIP(mc146818_get_time_callback, &p)) { + if (!mc146818_avoid_UIP(mc146818_get_time_callback, timeout, &p)) { memset(time, 0, sizeof(*time)); - return -EIO; + return -ETIMEDOUT; } if (!(p.ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD) diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index 4db538a551925..88f41f95cc94d 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -32,7 +32,8 @@ #define AP_RESET_INTERVAL 20 /* Reset sleep interval (20ms) */ -static int vfio_ap_mdev_reset_queues(struct ap_queue_table *qtable); +static int vfio_ap_mdev_reset_queues(struct ap_matrix_mdev *matrix_mdev); +static int vfio_ap_mdev_reset_qlist(struct list_head *qlist); static struct vfio_ap_queue *vfio_ap_find_queue(int apqn); static const struct vfio_device_ops vfio_ap_matrix_dev_ops; static void vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q); @@ -457,6 +458,7 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q, VFIO_AP_DBF_WARN("%s: gisc registration failed: nisc=%d, isc=%d, apqn=%#04x\n", __func__, nisc, isc, q->apqn); + vfio_unpin_pages(&q->matrix_mdev->vdev, nib, 1); status.response_code = AP_RESPONSE_INVALID_GISA; return status; } @@ -661,17 +663,23 @@ static bool vfio_ap_mdev_filter_cdoms(struct ap_matrix_mdev *matrix_mdev) * device driver. * * @matrix_mdev: the matrix mdev whose matrix is to be filtered. + * @apm_filtered: a 256-bit bitmap for storing the APIDs filtered from the + * guest's AP configuration that are still in the host's AP + * configuration. * * Note: If an APQN referencing a queue device that is not bound to the vfio_ap * driver, its APID will be filtered from the guest's APCB. The matrix * structure precludes filtering an individual APQN, so its APID will be - * filtered. + * filtered. Consequently, all queues associated with the adapter that + * are in the host's AP configuration must be reset. If queues are + * subsequently made available again to the guest, they should re-appear + * in a reset state * * Return: a boolean value indicating whether the KVM guest's APCB was changed * by the filtering or not. */ -static bool vfio_ap_mdev_filter_matrix(unsigned long *apm, unsigned long *aqm, - struct ap_matrix_mdev *matrix_mdev) +static bool vfio_ap_mdev_filter_matrix(struct ap_matrix_mdev *matrix_mdev, + unsigned long *apm_filtered) { unsigned long apid, apqi, apqn; DECLARE_BITMAP(prev_shadow_apm, AP_DEVICES); @@ -681,6 +689,7 @@ static bool vfio_ap_mdev_filter_matrix(unsigned long *apm, unsigned long *aqm, bitmap_copy(prev_shadow_apm, matrix_mdev->shadow_apcb.apm, AP_DEVICES); bitmap_copy(prev_shadow_aqm, matrix_mdev->shadow_apcb.aqm, AP_DOMAINS); vfio_ap_matrix_init(&matrix_dev->info, &matrix_mdev->shadow_apcb); + bitmap_clear(apm_filtered, 0, AP_DEVICES); /* * Copy the adapters, domains and control domains to the shadow_apcb @@ -692,8 +701,9 @@ static bool vfio_ap_mdev_filter_matrix(unsigned long *apm, unsigned long *aqm, bitmap_and(matrix_mdev->shadow_apcb.aqm, matrix_mdev->matrix.aqm, (unsigned long *)matrix_dev->info.aqm, AP_DOMAINS); - for_each_set_bit_inv(apid, apm, AP_DEVICES) { - for_each_set_bit_inv(apqi, aqm, AP_DOMAINS) { + for_each_set_bit_inv(apid, matrix_mdev->shadow_apcb.apm, AP_DEVICES) { + for_each_set_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm, + AP_DOMAINS) { /* * If the APQN is not bound to the vfio_ap device * driver, then we can't assign it to the guest's @@ -705,8 +715,16 @@ static bool vfio_ap_mdev_filter_matrix(unsigned long *apm, unsigned long *aqm, apqn = AP_MKQID(apid, apqi); q = vfio_ap_mdev_get_queue(matrix_mdev, apqn); if (!q || q->reset_status.response_code) { - clear_bit_inv(apid, - matrix_mdev->shadow_apcb.apm); + clear_bit_inv(apid, matrix_mdev->shadow_apcb.apm); + + /* + * If the adapter was previously plugged into + * the guest, let's let the caller know that + * the APID was filtered. + */ + if (test_bit_inv(apid, prev_shadow_apm)) + set_bit_inv(apid, apm_filtered); + break; } } @@ -808,7 +826,7 @@ static void vfio_ap_mdev_remove(struct mdev_device *mdev) mutex_lock(&matrix_dev->guests_lock); mutex_lock(&matrix_dev->mdevs_lock); - vfio_ap_mdev_reset_queues(&matrix_mdev->qtable); + vfio_ap_mdev_reset_queues(matrix_mdev); vfio_ap_mdev_unlink_fr_queues(matrix_mdev); list_del(&matrix_mdev->node); mutex_unlock(&matrix_dev->mdevs_lock); @@ -918,6 +936,47 @@ static void vfio_ap_mdev_link_adapter(struct ap_matrix_mdev *matrix_mdev, AP_MKQID(apid, apqi)); } +static void collect_queues_to_reset(struct ap_matrix_mdev *matrix_mdev, + unsigned long apid, + struct list_head *qlist) +{ + struct vfio_ap_queue *q; + unsigned long apqi; + + for_each_set_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm, AP_DOMAINS) { + q = vfio_ap_mdev_get_queue(matrix_mdev, AP_MKQID(apid, apqi)); + if (q) + list_add_tail(&q->reset_qnode, qlist); + } +} + +static void reset_queues_for_apid(struct ap_matrix_mdev *matrix_mdev, + unsigned long apid) +{ + struct list_head qlist; + + INIT_LIST_HEAD(&qlist); + collect_queues_to_reset(matrix_mdev, apid, &qlist); + vfio_ap_mdev_reset_qlist(&qlist); +} + +static int reset_queues_for_apids(struct ap_matrix_mdev *matrix_mdev, + unsigned long *apm_reset) +{ + struct list_head qlist; + unsigned long apid; + + if (bitmap_empty(apm_reset, AP_DEVICES)) + return 0; + + INIT_LIST_HEAD(&qlist); + + for_each_set_bit_inv(apid, apm_reset, AP_DEVICES) + collect_queues_to_reset(matrix_mdev, apid, &qlist); + + return vfio_ap_mdev_reset_qlist(&qlist); +} + /** * assign_adapter_store - parses the APID from @buf and sets the * corresponding bit in the mediated matrix device's APM @@ -958,7 +1017,7 @@ static ssize_t assign_adapter_store(struct device *dev, { int ret; unsigned long apid; - DECLARE_BITMAP(apm_delta, AP_DEVICES); + DECLARE_BITMAP(apm_filtered, AP_DEVICES); struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); mutex_lock(&ap_perms_mutex); @@ -987,12 +1046,11 @@ static ssize_t assign_adapter_store(struct device *dev, } vfio_ap_mdev_link_adapter(matrix_mdev, apid); - memset(apm_delta, 0, sizeof(apm_delta)); - set_bit_inv(apid, apm_delta); - if (vfio_ap_mdev_filter_matrix(apm_delta, - matrix_mdev->matrix.aqm, matrix_mdev)) + if (vfio_ap_mdev_filter_matrix(matrix_mdev, apm_filtered)) { vfio_ap_mdev_update_guest_apcb(matrix_mdev); + reset_queues_for_apids(matrix_mdev, apm_filtered); + } ret = count; done: @@ -1023,11 +1081,12 @@ static struct vfio_ap_queue * adapter was assigned. * @matrix_mdev: the matrix mediated device to which the adapter was assigned. * @apid: the APID of the unassigned adapter. - * @qtable: table for storing queues associated with unassigned adapter. + * @qlist: list for storing queues associated with unassigned adapter that + * need to be reset. */ static void vfio_ap_mdev_unlink_adapter(struct ap_matrix_mdev *matrix_mdev, unsigned long apid, - struct ap_queue_table *qtable) + struct list_head *qlist) { unsigned long apqi; struct vfio_ap_queue *q; @@ -1035,11 +1094,10 @@ static void vfio_ap_mdev_unlink_adapter(struct ap_matrix_mdev *matrix_mdev, for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, AP_DOMAINS) { q = vfio_ap_unlink_apqn_fr_mdev(matrix_mdev, apid, apqi); - if (q && qtable) { + if (q && qlist) { if (test_bit_inv(apid, matrix_mdev->shadow_apcb.apm) && test_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm)) - hash_add(qtable->queues, &q->mdev_qnode, - q->apqn); + list_add_tail(&q->reset_qnode, qlist); } } } @@ -1047,26 +1105,23 @@ static void vfio_ap_mdev_unlink_adapter(struct ap_matrix_mdev *matrix_mdev, static void vfio_ap_mdev_hot_unplug_adapter(struct ap_matrix_mdev *matrix_mdev, unsigned long apid) { - int loop_cursor; - struct vfio_ap_queue *q; - struct ap_queue_table *qtable = kzalloc(sizeof(*qtable), GFP_KERNEL); + struct vfio_ap_queue *q, *tmpq; + struct list_head qlist; - hash_init(qtable->queues); - vfio_ap_mdev_unlink_adapter(matrix_mdev, apid, qtable); + INIT_LIST_HEAD(&qlist); + vfio_ap_mdev_unlink_adapter(matrix_mdev, apid, &qlist); if (test_bit_inv(apid, matrix_mdev->shadow_apcb.apm)) { clear_bit_inv(apid, matrix_mdev->shadow_apcb.apm); vfio_ap_mdev_update_guest_apcb(matrix_mdev); } - vfio_ap_mdev_reset_queues(qtable); + vfio_ap_mdev_reset_qlist(&qlist); - hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode) { + list_for_each_entry_safe(q, tmpq, &qlist, reset_qnode) { vfio_ap_unlink_mdev_fr_queue(q); - hash_del(&q->mdev_qnode); + list_del(&q->reset_qnode); } - - kfree(qtable); } /** @@ -1167,7 +1222,7 @@ static ssize_t assign_domain_store(struct device *dev, { int ret; unsigned long apqi; - DECLARE_BITMAP(aqm_delta, AP_DOMAINS); + DECLARE_BITMAP(apm_filtered, AP_DEVICES); struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); mutex_lock(&ap_perms_mutex); @@ -1196,12 +1251,11 @@ static ssize_t assign_domain_store(struct device *dev, } vfio_ap_mdev_link_domain(matrix_mdev, apqi); - memset(aqm_delta, 0, sizeof(aqm_delta)); - set_bit_inv(apqi, aqm_delta); - if (vfio_ap_mdev_filter_matrix(matrix_mdev->matrix.apm, aqm_delta, - matrix_mdev)) + if (vfio_ap_mdev_filter_matrix(matrix_mdev, apm_filtered)) { vfio_ap_mdev_update_guest_apcb(matrix_mdev); + reset_queues_for_apids(matrix_mdev, apm_filtered); + } ret = count; done: @@ -1214,7 +1268,7 @@ static DEVICE_ATTR_WO(assign_domain); static void vfio_ap_mdev_unlink_domain(struct ap_matrix_mdev *matrix_mdev, unsigned long apqi, - struct ap_queue_table *qtable) + struct list_head *qlist) { unsigned long apid; struct vfio_ap_queue *q; @@ -1222,11 +1276,10 @@ static void vfio_ap_mdev_unlink_domain(struct ap_matrix_mdev *matrix_mdev, for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, AP_DEVICES) { q = vfio_ap_unlink_apqn_fr_mdev(matrix_mdev, apid, apqi); - if (q && qtable) { + if (q && qlist) { if (test_bit_inv(apid, matrix_mdev->shadow_apcb.apm) && test_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm)) - hash_add(qtable->queues, &q->mdev_qnode, - q->apqn); + list_add_tail(&q->reset_qnode, qlist); } } } @@ -1234,26 +1287,23 @@ static void vfio_ap_mdev_unlink_domain(struct ap_matrix_mdev *matrix_mdev, static void vfio_ap_mdev_hot_unplug_domain(struct ap_matrix_mdev *matrix_mdev, unsigned long apqi) { - int loop_cursor; - struct vfio_ap_queue *q; - struct ap_queue_table *qtable = kzalloc(sizeof(*qtable), GFP_KERNEL); + struct vfio_ap_queue *q, *tmpq; + struct list_head qlist; - hash_init(qtable->queues); - vfio_ap_mdev_unlink_domain(matrix_mdev, apqi, qtable); + INIT_LIST_HEAD(&qlist); + vfio_ap_mdev_unlink_domain(matrix_mdev, apqi, &qlist); if (test_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm)) { clear_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm); vfio_ap_mdev_update_guest_apcb(matrix_mdev); } - vfio_ap_mdev_reset_queues(qtable); + vfio_ap_mdev_reset_qlist(&qlist); - hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode) { + list_for_each_entry_safe(q, tmpq, &qlist, reset_qnode) { vfio_ap_unlink_mdev_fr_queue(q); - hash_del(&q->mdev_qnode); + list_del(&q->reset_qnode); } - - kfree(qtable); } /** @@ -1608,7 +1658,7 @@ static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev) get_update_locks_for_kvm(kvm); kvm_arch_crypto_clear_masks(kvm); - vfio_ap_mdev_reset_queues(&matrix_mdev->qtable); + vfio_ap_mdev_reset_queues(matrix_mdev); kvm_put_kvm(kvm); matrix_mdev->kvm = NULL; @@ -1744,15 +1794,33 @@ static void vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q) } } -static int vfio_ap_mdev_reset_queues(struct ap_queue_table *qtable) +static int vfio_ap_mdev_reset_queues(struct ap_matrix_mdev *matrix_mdev) { int ret = 0, loop_cursor; struct vfio_ap_queue *q; - hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode) + hash_for_each(matrix_mdev->qtable.queues, loop_cursor, q, mdev_qnode) vfio_ap_mdev_reset_queue(q); - hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode) { + hash_for_each(matrix_mdev->qtable.queues, loop_cursor, q, mdev_qnode) { + flush_work(&q->reset_work); + + if (q->reset_status.response_code) + ret = -EIO; + } + + return ret; +} + +static int vfio_ap_mdev_reset_qlist(struct list_head *qlist) +{ + int ret = 0; + struct vfio_ap_queue *q; + + list_for_each_entry(q, qlist, reset_qnode) + vfio_ap_mdev_reset_queue(q); + + list_for_each_entry(q, qlist, reset_qnode) { flush_work(&q->reset_work); if (q->reset_status.response_code) @@ -1938,7 +2006,7 @@ static ssize_t vfio_ap_mdev_ioctl(struct vfio_device *vdev, ret = vfio_ap_mdev_get_device_info(arg); break; case VFIO_DEVICE_RESET: - ret = vfio_ap_mdev_reset_queues(&matrix_mdev->qtable); + ret = vfio_ap_mdev_reset_queues(matrix_mdev); break; case VFIO_DEVICE_GET_IRQ_INFO: ret = vfio_ap_get_irq_info(arg); @@ -2070,6 +2138,7 @@ int vfio_ap_mdev_probe_queue(struct ap_device *apdev) { int ret; struct vfio_ap_queue *q; + DECLARE_BITMAP(apm_filtered, AP_DEVICES); struct ap_matrix_mdev *matrix_mdev; ret = sysfs_create_group(&apdev->device.kobj, &vfio_queue_attr_group); @@ -2091,15 +2160,28 @@ int vfio_ap_mdev_probe_queue(struct ap_device *apdev) if (matrix_mdev) { vfio_ap_mdev_link_queue(matrix_mdev, q); - if (vfio_ap_mdev_filter_matrix(matrix_mdev->matrix.apm, - matrix_mdev->matrix.aqm, - matrix_mdev)) + /* + * If we're in the process of handling the adding of adapters or + * domains to the host's AP configuration, then let the + * vfio_ap device driver's on_scan_complete callback filter the + * matrix and update the guest's AP configuration after all of + * the new queue devices are probed. + */ + if (!bitmap_empty(matrix_mdev->apm_add, AP_DEVICES) || + !bitmap_empty(matrix_mdev->aqm_add, AP_DOMAINS)) + goto done; + + if (vfio_ap_mdev_filter_matrix(matrix_mdev, apm_filtered)) { vfio_ap_mdev_update_guest_apcb(matrix_mdev); + reset_queues_for_apids(matrix_mdev, apm_filtered); + } } + +done: dev_set_drvdata(&apdev->device, q); release_update_locks_for_mdev(matrix_mdev); - return 0; + return ret; err_remove_group: sysfs_remove_group(&apdev->device.kobj, &vfio_queue_attr_group); @@ -2116,26 +2198,40 @@ void vfio_ap_mdev_remove_queue(struct ap_device *apdev) q = dev_get_drvdata(&apdev->device); get_update_locks_for_queue(q); matrix_mdev = q->matrix_mdev; + apid = AP_QID_CARD(q->apqn); + apqi = AP_QID_QUEUE(q->apqn); if (matrix_mdev) { - vfio_ap_unlink_queue_fr_mdev(q); - - apid = AP_QID_CARD(q->apqn); - apqi = AP_QID_QUEUE(q->apqn); - - /* - * If the queue is assigned to the guest's APCB, then remove - * the adapter's APID from the APCB and hot it into the guest. - */ + /* If the queue is assigned to the guest's AP configuration */ if (test_bit_inv(apid, matrix_mdev->shadow_apcb.apm) && test_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm)) { + /* + * Since the queues are defined via a matrix of adapters + * and domains, it is not possible to hot unplug a + * single queue; so, let's unplug the adapter. + */ clear_bit_inv(apid, matrix_mdev->shadow_apcb.apm); vfio_ap_mdev_update_guest_apcb(matrix_mdev); + reset_queues_for_apid(matrix_mdev, apid); + goto done; } } - vfio_ap_mdev_reset_queue(q); - flush_work(&q->reset_work); + /* + * If the queue is not in the host's AP configuration, then resetting + * it will fail with response code 01, (APQN not valid); so, let's make + * sure it is in the host's config. + */ + if (test_bit_inv(apid, (unsigned long *)matrix_dev->info.apm) && + test_bit_inv(apqi, (unsigned long *)matrix_dev->info.aqm)) { + vfio_ap_mdev_reset_queue(q); + flush_work(&q->reset_work); + } + +done: + if (matrix_mdev) + vfio_ap_unlink_queue_fr_mdev(q); + dev_set_drvdata(&apdev->device, NULL); kfree(q); release_update_locks_for_mdev(matrix_mdev); @@ -2443,39 +2539,30 @@ void vfio_ap_on_cfg_changed(struct ap_config_info *cur_cfg_info, static void vfio_ap_mdev_hot_plug_cfg(struct ap_matrix_mdev *matrix_mdev) { - bool do_hotplug = false; - int filter_domains = 0; - int filter_adapters = 0; - DECLARE_BITMAP(apm, AP_DEVICES); - DECLARE_BITMAP(aqm, AP_DOMAINS); + DECLARE_BITMAP(apm_filtered, AP_DEVICES); + bool filter_domains, filter_adapters, filter_cdoms, do_hotplug = false; mutex_lock(&matrix_mdev->kvm->lock); mutex_lock(&matrix_dev->mdevs_lock); - filter_adapters = bitmap_and(apm, matrix_mdev->matrix.apm, - matrix_mdev->apm_add, AP_DEVICES); - filter_domains = bitmap_and(aqm, matrix_mdev->matrix.aqm, - matrix_mdev->aqm_add, AP_DOMAINS); - - if (filter_adapters && filter_domains) - do_hotplug |= vfio_ap_mdev_filter_matrix(apm, aqm, matrix_mdev); - else if (filter_adapters) - do_hotplug |= - vfio_ap_mdev_filter_matrix(apm, - matrix_mdev->shadow_apcb.aqm, - matrix_mdev); - else - do_hotplug |= - vfio_ap_mdev_filter_matrix(matrix_mdev->shadow_apcb.apm, - aqm, matrix_mdev); + filter_adapters = bitmap_intersects(matrix_mdev->matrix.apm, + matrix_mdev->apm_add, AP_DEVICES); + filter_domains = bitmap_intersects(matrix_mdev->matrix.aqm, + matrix_mdev->aqm_add, AP_DOMAINS); + filter_cdoms = bitmap_intersects(matrix_mdev->matrix.adm, + matrix_mdev->adm_add, AP_DOMAINS); - if (bitmap_intersects(matrix_mdev->matrix.adm, matrix_mdev->adm_add, - AP_DOMAINS)) + if (filter_adapters || filter_domains) + do_hotplug = vfio_ap_mdev_filter_matrix(matrix_mdev, apm_filtered); + + if (filter_cdoms) do_hotplug |= vfio_ap_mdev_filter_cdoms(matrix_mdev); if (do_hotplug) vfio_ap_mdev_update_guest_apcb(matrix_mdev); + reset_queues_for_apids(matrix_mdev, apm_filtered); + mutex_unlock(&matrix_dev->mdevs_lock); mutex_unlock(&matrix_mdev->kvm->lock); } diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h index 88aff8b81f2fc..98d37aa27044a 100644 --- a/drivers/s390/crypto/vfio_ap_private.h +++ b/drivers/s390/crypto/vfio_ap_private.h @@ -133,6 +133,8 @@ struct ap_matrix_mdev { * @apqn: the APQN of the AP queue device * @saved_isc: the guest ISC registered with the GIB interface * @mdev_qnode: allows the vfio_ap_queue struct to be added to a hashtable + * @reset_qnode: allows the vfio_ap_queue struct to be added to a list of queues + * that need to be reset * @reset_status: the status from the last reset of the queue * @reset_work: work to wait for queue reset to complete */ @@ -143,6 +145,7 @@ struct vfio_ap_queue { #define VFIO_AP_ISC_INVALID 0xff unsigned char saved_isc; struct hlist_node mdev_qnode; + struct list_head reset_qnode; struct ap_queue_status reset_status; struct work_struct reset_work; }; diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 1223d34c04da3..d983f4a0e9f14 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -2196,15 +2196,18 @@ void scsi_eh_flush_done_q(struct list_head *done_q) struct scsi_cmnd *scmd, *next; list_for_each_entry_safe(scmd, next, done_q, eh_entry) { + struct scsi_device *sdev = scmd->device; + list_del_init(&scmd->eh_entry); - if (scsi_device_online(scmd->device) && - !scsi_noretry_cmd(scmd) && scsi_cmd_retry_allowed(scmd) && - scsi_eh_should_retry_cmd(scmd)) { + if (scsi_device_online(sdev) && !scsi_noretry_cmd(scmd) && + scsi_cmd_retry_allowed(scmd) && + scsi_eh_should_retry_cmd(scmd)) { SCSI_LOG_ERROR_RECOVERY(3, scmd_printk(KERN_INFO, scmd, "%s: flush retry cmd\n", current->comm)); scsi_queue_insert(scmd, SCSI_MLQUEUE_EH_RETRY); + blk_mq_kick_requeue_list(sdev->request_queue); } else { /* * If just we got sense for the device (called diff --git a/drivers/soc/fsl/qe/qmc.c b/drivers/soc/fsl/qe/qmc.c index b3c292c9a14ea..8dc73cc1a83b1 100644 --- a/drivers/soc/fsl/qe/qmc.c +++ b/drivers/soc/fsl/qe/qmc.c @@ -175,7 +175,7 @@ struct qmc_chan { struct list_head list; unsigned int id; struct qmc *qmc; - void *__iomem s_param; + void __iomem *s_param; enum qmc_mode mode; u64 tx_ts_mask; u64 rx_ts_mask; @@ -203,9 +203,9 @@ struct qmc_chan { struct qmc { struct device *dev; struct tsa_serial *tsa_serial; - void *__iomem scc_regs; - void *__iomem scc_pram; - void *__iomem dpram; + void __iomem *scc_regs; + void __iomem *scc_pram; + void __iomem *dpram; u16 scc_pram_offset; cbd_t __iomem *bd_table; dma_addr_t bd_dma_addr; @@ -218,37 +218,37 @@ struct qmc { struct qmc_chan *chans[64]; }; -static inline void qmc_write16(void *__iomem addr, u16 val) +static inline void qmc_write16(void __iomem *addr, u16 val) { iowrite16be(val, addr); } -static inline u16 qmc_read16(void *__iomem addr) +static inline u16 qmc_read16(void __iomem *addr) { return ioread16be(addr); } -static inline void qmc_setbits16(void *__iomem addr, u16 set) +static inline void qmc_setbits16(void __iomem *addr, u16 set) { qmc_write16(addr, qmc_read16(addr) | set); } -static inline void qmc_clrbits16(void *__iomem addr, u16 clr) +static inline void qmc_clrbits16(void __iomem *addr, u16 clr) { qmc_write16(addr, qmc_read16(addr) & ~clr); } -static inline void qmc_write32(void *__iomem addr, u32 val) +static inline void qmc_write32(void __iomem *addr, u32 val) { iowrite32be(val, addr); } -static inline u32 qmc_read32(void *__iomem addr) +static inline u32 qmc_read32(void __iomem *addr) { return ioread32be(addr); } -static inline void qmc_setbits32(void *__iomem addr, u32 set) +static inline void qmc_setbits32(void __iomem *addr, u32 set) { qmc_write32(addr, qmc_read32(addr) | set); } @@ -318,7 +318,7 @@ int qmc_chan_write_submit(struct qmc_chan *chan, dma_addr_t addr, size_t length, { struct qmc_xfer_desc *xfer_desc; unsigned long flags; - cbd_t *__iomem bd; + cbd_t __iomem *bd; u16 ctrl; int ret; @@ -374,7 +374,7 @@ static void qmc_chan_write_done(struct qmc_chan *chan) void (*complete)(void *context); unsigned long flags; void *context; - cbd_t *__iomem bd; + cbd_t __iomem *bd; u16 ctrl; /* @@ -425,7 +425,7 @@ int qmc_chan_read_submit(struct qmc_chan *chan, dma_addr_t addr, size_t length, { struct qmc_xfer_desc *xfer_desc; unsigned long flags; - cbd_t *__iomem bd; + cbd_t __iomem *bd; u16 ctrl; int ret; @@ -488,7 +488,7 @@ static void qmc_chan_read_done(struct qmc_chan *chan) void (*complete)(void *context, size_t size); struct qmc_xfer_desc *xfer_desc; unsigned long flags; - cbd_t *__iomem bd; + cbd_t __iomem *bd; void *context; u16 datalen; u16 ctrl; @@ -663,7 +663,7 @@ static void qmc_chan_reset_rx(struct qmc_chan *chan) { struct qmc_xfer_desc *xfer_desc; unsigned long flags; - cbd_t *__iomem bd; + cbd_t __iomem *bd; u16 ctrl; spin_lock_irqsave(&chan->rx_lock, flags); @@ -685,7 +685,6 @@ static void qmc_chan_reset_rx(struct qmc_chan *chan) qmc_read16(chan->s_param + QMC_SPE_RBASE)); chan->rx_pending = 0; - chan->is_rx_stopped = false; spin_unlock_irqrestore(&chan->rx_lock, flags); } @@ -694,7 +693,7 @@ static void qmc_chan_reset_tx(struct qmc_chan *chan) { struct qmc_xfer_desc *xfer_desc; unsigned long flags; - cbd_t *__iomem bd; + cbd_t __iomem *bd; u16 ctrl; spin_lock_irqsave(&chan->tx_lock, flags); diff --git a/drivers/soc/fsl/qe/tsa.c b/drivers/soc/fsl/qe/tsa.c index 3646153117b38..e0527b9efd050 100644 --- a/drivers/soc/fsl/qe/tsa.c +++ b/drivers/soc/fsl/qe/tsa.c @@ -98,9 +98,9 @@ #define TSA_SIRP 0x10 struct tsa_entries_area { - void *__iomem entries_start; - void *__iomem entries_next; - void *__iomem last_entry; + void __iomem *entries_start; + void __iomem *entries_next; + void __iomem *last_entry; }; struct tsa_tdm { @@ -117,8 +117,8 @@ struct tsa_tdm { struct tsa { struct device *dev; - void *__iomem si_regs; - void *__iomem si_ram; + void __iomem *si_regs; + void __iomem *si_ram; resource_size_t si_ram_sz; spinlock_t lock; int tdms; /* TSA_TDMx ORed */ @@ -135,27 +135,27 @@ static inline struct tsa *tsa_serial_get_tsa(struct tsa_serial *tsa_serial) return container_of(tsa_serial, struct tsa, serials[tsa_serial->id]); } -static inline void tsa_write32(void *__iomem addr, u32 val) +static inline void tsa_write32(void __iomem *addr, u32 val) { iowrite32be(val, addr); } -static inline void tsa_write8(void *__iomem addr, u32 val) +static inline void tsa_write8(void __iomem *addr, u32 val) { iowrite8(val, addr); } -static inline u32 tsa_read32(void *__iomem addr) +static inline u32 tsa_read32(void __iomem *addr) { return ioread32be(addr); } -static inline void tsa_clrbits32(void *__iomem addr, u32 clr) +static inline void tsa_clrbits32(void __iomem *addr, u32 clr) { tsa_write32(addr, tsa_read32(addr) & ~clr); } -static inline void tsa_clrsetbits32(void *__iomem addr, u32 clr, u32 set) +static inline void tsa_clrsetbits32(void __iomem *addr, u32 clr, u32 set) { tsa_write32(addr, (tsa_read32(addr) & ~clr) | set); } @@ -313,7 +313,7 @@ static u32 tsa_serial_id2csel(struct tsa *tsa, u32 serial_id) static int tsa_add_entry(struct tsa *tsa, struct tsa_entries_area *area, u32 count, u32 serial_id) { - void *__iomem addr; + void __iomem *addr; u32 left; u32 val; u32 cnt; diff --git a/drivers/soc/qcom/pmic_glink_altmode.c b/drivers/soc/qcom/pmic_glink_altmode.c index 6f8b2f7ae3cc1..9b0000b5f064c 100644 --- a/drivers/soc/qcom/pmic_glink_altmode.c +++ b/drivers/soc/qcom/pmic_glink_altmode.c @@ -285,7 +285,7 @@ static void pmic_glink_altmode_sc8180xp_notify(struct pmic_glink_altmode *altmod svid = mux == 2 ? USB_TYPEC_DP_SID : 0; - if (!altmode->ports[port].altmode) { + if (port >= ARRAY_SIZE(altmode->ports) || !altmode->ports[port].altmode) { dev_dbg(altmode->dev, "notification on undefined port %d\n", port); return; } @@ -328,7 +328,7 @@ static void pmic_glink_altmode_sc8280xp_notify(struct pmic_glink_altmode *altmod hpd_state = FIELD_GET(SC8280XP_HPD_STATE_MASK, notify->payload[8]); hpd_irq = FIELD_GET(SC8280XP_HPD_IRQ_MASK, notify->payload[8]); - if (!altmode->ports[port].altmode) { + if (port >= ARRAY_SIZE(altmode->ports) || !altmode->ports[port].altmode) { dev_dbg(altmode->dev, "notification on undefined port %d\n", port); return; } diff --git a/drivers/soundwire/amd_manager.c b/drivers/soundwire/amd_manager.c index 3a99f6dcdfafa..a3b1f4e6f0f90 100644 --- a/drivers/soundwire/amd_manager.c +++ b/drivers/soundwire/amd_manager.c @@ -927,6 +927,14 @@ static int amd_sdw_manager_probe(struct platform_device *pdev) amd_manager->bus.clk_stop_timeout = 200; amd_manager->bus.link_id = amd_manager->instance; + /* + * Due to BIOS compatibility, the two links are exposed within + * the scope of a single controller. If this changes, the + * controller_id will have to be updated with drv_data + * information. + */ + amd_manager->bus.controller_id = 0; + switch (amd_manager->instance) { case ACP_SDW0: amd_manager->num_dout_ports = AMD_SDW0_MAX_TX_PORTS; diff --git a/drivers/soundwire/bus.c b/drivers/soundwire/bus.c index 0e7bc3c40f9df..e7553c38be59d 100644 --- a/drivers/soundwire/bus.c +++ b/drivers/soundwire/bus.c @@ -22,6 +22,10 @@ static int sdw_get_id(struct sdw_bus *bus) return rc; bus->id = rc; + + if (bus->controller_id == -1) + bus->controller_id = rc; + return 0; } diff --git a/drivers/soundwire/debugfs.c b/drivers/soundwire/debugfs.c index d1553cb771874..67abd7e52f092 100644 --- a/drivers/soundwire/debugfs.c +++ b/drivers/soundwire/debugfs.c @@ -20,7 +20,7 @@ void sdw_bus_debugfs_init(struct sdw_bus *bus) return; /* create the debugfs master-N */ - snprintf(name, sizeof(name), "master-%d-%d", bus->id, bus->link_id); + snprintf(name, sizeof(name), "master-%d-%d", bus->controller_id, bus->link_id); bus->debugfs = debugfs_create_dir(name, sdw_debugfs_root); } diff --git a/drivers/soundwire/intel_auxdevice.c b/drivers/soundwire/intel_auxdevice.c index 7f15e3549e539..93698532deac4 100644 --- a/drivers/soundwire/intel_auxdevice.c +++ b/drivers/soundwire/intel_auxdevice.c @@ -234,6 +234,9 @@ static int intel_link_probe(struct auxiliary_device *auxdev, cdns->instance = sdw->instance; cdns->msg_count = 0; + /* single controller for all SoundWire links */ + bus->controller_id = 0; + bus->link_id = auxdev->id; bus->clk_stop_timeout = 1; diff --git a/drivers/soundwire/master.c b/drivers/soundwire/master.c index 9b05c9e25ebe4..51abedbbaa663 100644 --- a/drivers/soundwire/master.c +++ b/drivers/soundwire/master.c @@ -145,7 +145,7 @@ int sdw_master_device_add(struct sdw_bus *bus, struct device *parent, md->dev.fwnode = fwnode; md->dev.dma_mask = parent->dma_mask; - dev_set_name(&md->dev, "sdw-master-%d", bus->id); + dev_set_name(&md->dev, "sdw-master-%d-%d", bus->controller_id, bus->link_id); ret = device_register(&md->dev); if (ret) { diff --git a/drivers/soundwire/qcom.c b/drivers/soundwire/qcom.c index 55be9f4b8d59a..e3ae4e4e07ac5 100644 --- a/drivers/soundwire/qcom.c +++ b/drivers/soundwire/qcom.c @@ -1612,6 +1612,9 @@ static int qcom_swrm_probe(struct platform_device *pdev) } } + /* FIXME: is there a DT-defined value to use ? */ + ctrl->bus.controller_id = -1; + ret = sdw_bus_master_add(&ctrl->bus, dev, dev->fwnode); if (ret) { dev_err(dev, "Failed to register Soundwire controller (%d)\n", diff --git a/drivers/soundwire/slave.c b/drivers/soundwire/slave.c index c1c1a2ac293af..060c2982e26b0 100644 --- a/drivers/soundwire/slave.c +++ b/drivers/soundwire/slave.c @@ -39,14 +39,14 @@ int sdw_slave_add(struct sdw_bus *bus, slave->dev.fwnode = fwnode; if (id->unique_id == SDW_IGNORED_UNIQUE_ID) { - /* name shall be sdw:link:mfg:part:class */ - dev_set_name(&slave->dev, "sdw:%01x:%04x:%04x:%02x", - bus->link_id, id->mfg_id, id->part_id, + /* name shall be sdw:ctrl:link:mfg:part:class */ + dev_set_name(&slave->dev, "sdw:%01x:%01x:%04x:%04x:%02x", + bus->controller_id, bus->link_id, id->mfg_id, id->part_id, id->class_id); } else { - /* name shall be sdw:link:mfg:part:class:unique */ - dev_set_name(&slave->dev, "sdw:%01x:%04x:%04x:%02x:%01x", - bus->link_id, id->mfg_id, id->part_id, + /* name shall be sdw:ctrl:link:mfg:part:class:unique */ + dev_set_name(&slave->dev, "sdw:%01x:%01x:%04x:%04x:%02x:%01x", + bus->controller_id, bus->link_id, id->mfg_id, id->part_id, id->class_id, id->unique_id); } diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c index ef08fcac2f6da..0407b91183caa 100644 --- a/drivers/spi/spi-bcm-qspi.c +++ b/drivers/spi/spi-bcm-qspi.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include #include "spi-bcm-qspi.h" @@ -1221,7 +1221,7 @@ static int bcm_qspi_exec_mem_op(struct spi_mem *mem, /* non-aligned and very short transfers are handled by MSPI */ if (!IS_ALIGNED((uintptr_t)addr, 4) || !IS_ALIGNED((uintptr_t)buf, 4) || - len < 4) + len < 4 || op->cmd.opcode == SPINOR_OP_RDSFDP) mspi_read = true; if (!has_bspi(qspi) || mspi_read) diff --git a/drivers/spi/spi-cadence.c b/drivers/spi/spi-cadence.c index bd96d8b546cdb..5cab7caf46586 100644 --- a/drivers/spi/spi-cadence.c +++ b/drivers/spi/spi-cadence.c @@ -317,6 +317,15 @@ static void cdns_spi_process_fifo(struct cdns_spi *xspi, int ntx, int nrx) xspi->rx_bytes -= nrx; while (ntx || nrx) { + if (nrx) { + u8 data = cdns_spi_read(xspi, CDNS_SPI_RXD); + + if (xspi->rxbuf) + *xspi->rxbuf++ = data; + + nrx--; + } + if (ntx) { if (xspi->txbuf) cdns_spi_write(xspi, CDNS_SPI_TXD, *xspi->txbuf++); @@ -326,14 +335,6 @@ static void cdns_spi_process_fifo(struct cdns_spi *xspi, int ntx, int nrx) ntx--; } - if (nrx) { - u8 data = cdns_spi_read(xspi, CDNS_SPI_RXD); - - if (xspi->rxbuf) - *xspi->rxbuf++ = data; - - nrx--; - } } } diff --git a/drivers/spi/spi-intel-pci.c b/drivers/spi/spi-intel-pci.c index 57d767a68e7b2..b9918dcc38027 100644 --- a/drivers/spi/spi-intel-pci.c +++ b/drivers/spi/spi-intel-pci.c @@ -84,7 +84,6 @@ static const struct pci_device_id intel_spi_pci_ids[] = { { PCI_VDEVICE(INTEL, 0xa2a4), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0xa324), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0xa3a4), (unsigned long)&cnl_info }, - { PCI_VDEVICE(INTEL, 0xae23), (unsigned long)&cnl_info }, { }, }; MODULE_DEVICE_TABLE(pci, intel_spi_pci_ids); diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 399e81d37b3ba..1e08cd571d21a 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -1624,6 +1624,10 @@ static int __spi_pump_transfer_message(struct spi_controller *ctlr, pm_runtime_put_noidle(ctlr->dev.parent); dev_err(&ctlr->dev, "Failed to power device: %d\n", ret); + + msg->status = ret; + spi_finalize_current_message(ctlr); + return ret; } } diff --git a/drivers/thermal/gov_bang_bang.c b/drivers/thermal/gov_bang_bang.c index 1b121066521ff..49cdfaa3a9279 100644 --- a/drivers/thermal/gov_bang_bang.c +++ b/drivers/thermal/gov_bang_bang.c @@ -13,28 +13,21 @@ #include "thermal_core.h" -static int thermal_zone_trip_update(struct thermal_zone_device *tz, int trip_id) +static int thermal_zone_trip_update(struct thermal_zone_device *tz, int trip_index) { - struct thermal_trip trip; + const struct thermal_trip *trip = &tz->trips[trip_index]; struct thermal_instance *instance; - int ret; - - ret = __thermal_zone_get_trip(tz, trip_id, &trip); - if (ret) { - pr_warn_once("Failed to retrieve trip point %d\n", trip_id); - return ret; - } - if (!trip.hysteresis) + if (!trip->hysteresis) dev_info_once(&tz->device, "Zero hysteresis value for thermal zone %s\n", tz->type); dev_dbg(&tz->device, "Trip%d[temp=%d]:temp=%d:hyst=%d\n", - trip_id, trip.temperature, tz->temperature, - trip.hysteresis); + trip_index, trip->temperature, tz->temperature, + trip->hysteresis); list_for_each_entry(instance, &tz->thermal_instances, tz_node) { - if (instance->trip != trip_id) + if (instance->trip != trip) continue; /* in case fan is in initial state, switch the fan off */ @@ -52,10 +45,10 @@ static int thermal_zone_trip_update(struct thermal_zone_device *tz, int trip_id) * enable fan when temperature exceeds trip_temp and disable * the fan in case it falls below trip_temp minus hysteresis */ - if (instance->target == 0 && tz->temperature >= trip.temperature) + if (instance->target == 0 && tz->temperature >= trip->temperature) instance->target = 1; else if (instance->target == 1 && - tz->temperature <= trip.temperature - trip.hysteresis) + tz->temperature <= trip->temperature - trip->hysteresis) instance->target = 0; dev_dbg(&instance->cdev->device, "target=%d\n", diff --git a/drivers/thermal/gov_fair_share.c b/drivers/thermal/gov_fair_share.c index 03c2daeb6ee8b..2abeb8979f500 100644 --- a/drivers/thermal/gov_fair_share.c +++ b/drivers/thermal/gov_fair_share.c @@ -49,7 +49,7 @@ static long get_target_state(struct thermal_zone_device *tz, /** * fair_share_throttle - throttles devices associated with the given zone * @tz: thermal_zone_device - * @trip: trip point index + * @trip_index: trip point index * * Throttling Logic: This uses three parameters to calculate the new * throttle state of the cooling devices associated with the given zone. @@ -65,8 +65,9 @@ static long get_target_state(struct thermal_zone_device *tz, * (Heavily assumes the trip points are in ascending order) * new_state of cooling device = P3 * P2 * P1 */ -static int fair_share_throttle(struct thermal_zone_device *tz, int trip) +static int fair_share_throttle(struct thermal_zone_device *tz, int trip_index) { + const struct thermal_trip *trip = &tz->trips[trip_index]; struct thermal_instance *instance; int total_weight = 0; int total_instance = 0; diff --git a/drivers/thermal/gov_power_allocator.c b/drivers/thermal/gov_power_allocator.c index 8642f1096b91c..fc969642f70b7 100644 --- a/drivers/thermal/gov_power_allocator.c +++ b/drivers/thermal/gov_power_allocator.c @@ -90,12 +90,14 @@ static u32 estimate_sustainable_power(struct thermal_zone_device *tz) u32 sustainable_power = 0; struct thermal_instance *instance; struct power_allocator_params *params = tz->governor_data; + const struct thermal_trip *trip_max_desired_temperature = + &tz->trips[params->trip_max_desired_temperature]; list_for_each_entry(instance, &tz->thermal_instances, tz_node) { struct thermal_cooling_device *cdev = instance->cdev; u32 min_power; - if (instance->trip != params->trip_max_desired_temperature) + if (instance->trip != trip_max_desired_temperature) continue; if (!cdev_is_power_actor(cdev)) @@ -383,12 +385,13 @@ static int allocate_power(struct thermal_zone_device *tz, { struct thermal_instance *instance; struct power_allocator_params *params = tz->governor_data; + const struct thermal_trip *trip_max_desired_temperature = + &tz->trips[params->trip_max_desired_temperature]; u32 *req_power, *max_power, *granted_power, *extra_actor_power; u32 *weighted_req_power; u32 total_req_power, max_allocatable_power, total_weighted_req_power; u32 total_granted_power, power_range; int i, num_actors, total_weight, ret = 0; - int trip_max_desired_temperature = params->trip_max_desired_temperature; num_actors = 0; total_weight = 0; @@ -564,12 +567,14 @@ static void allow_maximum_power(struct thermal_zone_device *tz, bool update) { struct thermal_instance *instance; struct power_allocator_params *params = tz->governor_data; + const struct thermal_trip *trip_max_desired_temperature = + &tz->trips[params->trip_max_desired_temperature]; u32 req_power; list_for_each_entry(instance, &tz->thermal_instances, tz_node) { struct thermal_cooling_device *cdev = instance->cdev; - if ((instance->trip != params->trip_max_desired_temperature) || + if ((instance->trip != trip_max_desired_temperature) || (!cdev_is_power_actor(instance->cdev))) continue; @@ -710,7 +715,7 @@ static int power_allocator_throttle(struct thermal_zone_device *tz, int trip_id) ret = __thermal_zone_get_trip(tz, params->trip_switch_on, &trip); if (!ret && (tz->temperature < trip.temperature)) { - update = (tz->last_temperature >= trip.temperature); + update = tz->passive; tz->passive = 0; reset_pid_controller(params); allow_maximum_power(tz, update); diff --git a/drivers/thermal/gov_step_wise.c b/drivers/thermal/gov_step_wise.c index 1050fb4d94c2d..849dc1ec8d27c 100644 --- a/drivers/thermal/gov_step_wise.c +++ b/drivers/thermal/gov_step_wise.c @@ -81,26 +81,24 @@ static void update_passive_instance(struct thermal_zone_device *tz, static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip_id) { + const struct thermal_trip *trip = &tz->trips[trip_id]; enum thermal_trend trend; struct thermal_instance *instance; - struct thermal_trip trip; bool throttle = false; int old_target; - __thermal_zone_get_trip(tz, trip_id, &trip); - trend = get_tz_trend(tz, trip_id); - if (tz->temperature >= trip.temperature) { + if (tz->temperature >= trip->temperature) { throttle = true; - trace_thermal_zone_trip(tz, trip_id, trip.type); + trace_thermal_zone_trip(tz, trip_id, trip->type); } dev_dbg(&tz->device, "Trip%d[type=%d,temp=%d]:trend=%d,throttle=%d\n", - trip_id, trip.type, trip.temperature, trend, throttle); + trip_id, trip->type, trip->temperature, trend, throttle); list_for_each_entry(instance, &tz->thermal_instances, tz_node) { - if (instance->trip != trip_id) + if (instance->trip != trip) continue; old_target = instance->target; @@ -114,11 +112,11 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip_id /* Activate a passive thermal instance */ if (old_target == THERMAL_NO_TARGET && instance->target != THERMAL_NO_TARGET) - update_passive_instance(tz, trip.type, 1); + update_passive_instance(tz, trip->type, 1); /* Deactivate a passive thermal instance */ else if (old_target != THERMAL_NO_TARGET && instance->target == THERMAL_NO_TARGET) - update_passive_instance(tz, trip.type, -1); + update_passive_instance(tz, trip->type, -1); instance->initialized = true; mutex_lock(&instance->cdev->lock); diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c index c69db6c90869c..1c5a429b2e3e9 100644 --- a/drivers/thermal/intel/intel_hfi.c +++ b/drivers/thermal/intel/intel_hfi.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -34,7 +35,9 @@ #include #include #include +#include #include +#include #include #include @@ -347,6 +350,52 @@ static void init_hfi_instance(struct hfi_instance *hfi_instance) hfi_instance->data = hfi_instance->hdr + hfi_features.hdr_size; } +/* Caller must hold hfi_instance_lock. */ +static void hfi_enable(void) +{ + u64 msr_val; + + rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val); + msr_val |= HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT; + wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val); +} + +static void hfi_set_hw_table(struct hfi_instance *hfi_instance) +{ + phys_addr_t hw_table_pa; + u64 msr_val; + + hw_table_pa = virt_to_phys(hfi_instance->hw_table); + msr_val = hw_table_pa | HW_FEEDBACK_PTR_VALID_BIT; + wrmsrl(MSR_IA32_HW_FEEDBACK_PTR, msr_val); +} + +/* Caller must hold hfi_instance_lock. */ +static void hfi_disable(void) +{ + u64 msr_val; + int i; + + rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val); + msr_val &= ~HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT; + wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val); + + /* + * Wait for hardware to acknowledge the disabling of HFI. Some + * processors may not do it. Wait for ~2ms. This is a reasonable + * time for hardware to complete any pending actions on the HFI + * memory. + */ + for (i = 0; i < 2000; i++) { + rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val); + if (msr_val & PACKAGE_THERM_STATUS_HFI_UPDATED) + break; + + udelay(1); + cpu_relax(); + } +} + /** * intel_hfi_online() - Enable HFI on @cpu * @cpu: CPU in which the HFI will be enabled @@ -364,8 +413,6 @@ void intel_hfi_online(unsigned int cpu) { struct hfi_instance *hfi_instance; struct hfi_cpu_info *info; - phys_addr_t hw_table_pa; - u64 msr_val; u16 die_id; /* Nothing to do if hfi_instances are missing. */ @@ -403,14 +450,16 @@ void intel_hfi_online(unsigned int cpu) /* * Hardware is programmed with the physical address of the first page * frame of the table. Hence, the allocated memory must be page-aligned. + * + * Some processors do not forget the initial address of the HFI table + * even after having been reprogrammed. Keep using the same pages. Do + * not free them. */ hfi_instance->hw_table = alloc_pages_exact(hfi_features.nr_table_pages, GFP_KERNEL | __GFP_ZERO); if (!hfi_instance->hw_table) goto unlock; - hw_table_pa = virt_to_phys(hfi_instance->hw_table); - /* * Allocate memory to keep a local copy of the table that * hardware generates. @@ -420,16 +469,6 @@ void intel_hfi_online(unsigned int cpu) if (!hfi_instance->local_table) goto free_hw_table; - /* - * Program the address of the feedback table of this die/package. On - * some processors, hardware remembers the old address of the HFI table - * even after having been reprogrammed and re-enabled. Thus, do not free - * the pages allocated for the table or reprogram the hardware with a - * new base address. Namely, program the hardware only once. - */ - msr_val = hw_table_pa | HW_FEEDBACK_PTR_VALID_BIT; - wrmsrl(MSR_IA32_HW_FEEDBACK_PTR, msr_val); - init_hfi_instance(hfi_instance); INIT_DELAYED_WORK(&hfi_instance->update_work, hfi_update_work_fn); @@ -438,13 +477,8 @@ void intel_hfi_online(unsigned int cpu) cpumask_set_cpu(cpu, hfi_instance->cpus); - /* - * Enable the hardware feedback interface and never disable it. See - * comment on programming the address of the table. - */ - rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val); - msr_val |= HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT; - wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val); + hfi_set_hw_table(hfi_instance); + hfi_enable(); unlock: mutex_unlock(&hfi_instance_lock); @@ -484,6 +518,10 @@ void intel_hfi_offline(unsigned int cpu) mutex_lock(&hfi_instance_lock); cpumask_clear_cpu(cpu, hfi_instance->cpus); + + if (!cpumask_weight(hfi_instance->cpus)) + hfi_disable(); + mutex_unlock(&hfi_instance_lock); } @@ -532,6 +570,30 @@ static __init int hfi_parse_features(void) return 0; } +static void hfi_do_enable(void) +{ + /* This code runs only on the boot CPU. */ + struct hfi_cpu_info *info = &per_cpu(hfi_cpu_info, 0); + struct hfi_instance *hfi_instance = info->hfi_instance; + + /* No locking needed. There is no concurrency with CPU online. */ + hfi_set_hw_table(hfi_instance); + hfi_enable(); +} + +static int hfi_do_disable(void) +{ + /* No locking needed. There is no concurrency with CPU offline. */ + hfi_disable(); + + return 0; +} + +static struct syscore_ops hfi_pm_ops = { + .resume = hfi_do_enable, + .suspend = hfi_do_disable, +}; + void __init intel_hfi_init(void) { struct hfi_instance *hfi_instance; @@ -563,6 +625,8 @@ void __init intel_hfi_init(void) if (!hfi_updates_wq) goto err_nomem; + register_syscore_ops(&hfi_pm_ops); + return; err_nomem: diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 2de524fb7be55..1494ffa597547 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -606,7 +606,7 @@ struct thermal_zone_device *thermal_zone_get_by_id(int id) /** * thermal_zone_bind_cooling_device() - bind a cooling device to a thermal zone * @tz: pointer to struct thermal_zone_device - * @trip: indicates which trip point the cooling devices is + * @trip_index: indicates which trip point the cooling devices is * associated with in this thermal zone. * @cdev: pointer to struct thermal_cooling_device * @upper: the Maximum cooling state for this trip point. @@ -626,7 +626,7 @@ struct thermal_zone_device *thermal_zone_get_by_id(int id) * Return: 0 on success, the proper error value otherwise. */ int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz, - int trip, + int trip_index, struct thermal_cooling_device *cdev, unsigned long upper, unsigned long lower, unsigned int weight) @@ -635,12 +635,15 @@ int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz, struct thermal_instance *pos; struct thermal_zone_device *pos1; struct thermal_cooling_device *pos2; + const struct thermal_trip *trip; bool upper_no_limit; int result; - if (trip >= tz->num_trips || trip < 0) + if (trip_index >= tz->num_trips || trip_index < 0) return -EINVAL; + trip = &tz->trips[trip_index]; + list_for_each_entry(pos1, &thermal_tz_list, node) { if (pos1 == tz) break; @@ -745,7 +748,7 @@ EXPORT_SYMBOL_GPL(thermal_zone_bind_cooling_device); * thermal_zone_unbind_cooling_device() - unbind a cooling device from a * thermal zone. * @tz: pointer to a struct thermal_zone_device. - * @trip: indicates which trip point the cooling devices is + * @trip_index: indicates which trip point the cooling devices is * associated with in this thermal zone. * @cdev: pointer to a struct thermal_cooling_device. * @@ -756,13 +759,15 @@ EXPORT_SYMBOL_GPL(thermal_zone_bind_cooling_device); * Return: 0 on success, the proper error value otherwise. */ int thermal_zone_unbind_cooling_device(struct thermal_zone_device *tz, - int trip, + int trip_index, struct thermal_cooling_device *cdev) { struct thermal_instance *pos, *next; + const struct thermal_trip *trip; mutex_lock(&tz->lock); mutex_lock(&cdev->lock); + trip = &tz->trips[trip_index]; list_for_each_entry_safe(pos, next, &tz->thermal_instances, tz_node) { if (pos->tz == tz && pos->trip == trip && pos->cdev == cdev) { list_del(&pos->tz_node); diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h index de884bea28b66..024e82ebf5920 100644 --- a/drivers/thermal/thermal_core.h +++ b/drivers/thermal/thermal_core.h @@ -87,7 +87,7 @@ struct thermal_instance { char name[THERMAL_NAME_LENGTH]; struct thermal_zone_device *tz; struct thermal_cooling_device *cdev; - int trip; + const struct thermal_trip *trip; bool initialized; unsigned long upper; /* Highest cooling state for this trip point */ unsigned long lower; /* Lowest cooling state for this trip point */ @@ -119,6 +119,8 @@ void __thermal_zone_device_update(struct thermal_zone_device *tz, void __thermal_zone_set_trips(struct thermal_zone_device *tz); int __thermal_zone_get_trip(struct thermal_zone_device *tz, int trip_id, struct thermal_trip *trip); +int thermal_zone_trip_id(struct thermal_zone_device *tz, + const struct thermal_trip *trip); int __thermal_zone_get_temp(struct thermal_zone_device *tz, int *temp); /* sysfs I/F */ diff --git a/drivers/thermal/thermal_helpers.c b/drivers/thermal/thermal_helpers.c index 4d66372c96295..c1d0af73c85d6 100644 --- a/drivers/thermal/thermal_helpers.c +++ b/drivers/thermal/thermal_helpers.c @@ -42,14 +42,17 @@ int get_tz_trend(struct thermal_zone_device *tz, int trip_index) struct thermal_instance * get_thermal_instance(struct thermal_zone_device *tz, - struct thermal_cooling_device *cdev, int trip) + struct thermal_cooling_device *cdev, int trip_index) { struct thermal_instance *pos = NULL; struct thermal_instance *target_instance = NULL; + const struct thermal_trip *trip; mutex_lock(&tz->lock); mutex_lock(&cdev->lock); + trip = &tz->trips[trip_index]; + list_for_each_entry(pos, &tz->thermal_instances, tz_node) { if (pos->tz == tz && pos->trip == trip && pos->cdev == cdev) { target_instance = pos; diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c index 4e6a97db894e9..eef40d4f30639 100644 --- a/drivers/thermal/thermal_sysfs.c +++ b/drivers/thermal/thermal_sysfs.c @@ -943,7 +943,8 @@ trip_point_show(struct device *dev, struct device_attribute *attr, char *buf) instance = container_of(attr, struct thermal_instance, attr); - return sprintf(buf, "%d\n", instance->trip); + return sprintf(buf, "%d\n", + thermal_zone_trip_id(instance->tz, instance->trip)); } ssize_t diff --git a/drivers/thermal/thermal_trip.c b/drivers/thermal/thermal_trip.c index 597ac4144e331..afc9499128c29 100644 --- a/drivers/thermal/thermal_trip.c +++ b/drivers/thermal/thermal_trip.c @@ -17,9 +17,6 @@ int for_each_thermal_trip(struct thermal_zone_device *tz, lockdep_assert_held(&tz->lock); - if (!tz->trips) - return -ENODATA; - for (i = 0; i < tz->num_trips; i++) { ret = cb(&tz->trips[i], data); if (ret) @@ -175,3 +172,16 @@ int thermal_zone_set_trip(struct thermal_zone_device *tz, int trip_id, return 0; } + +int thermal_zone_trip_id(struct thermal_zone_device *tz, + const struct thermal_trip *trip) +{ + int i; + + for (i = 0; i < tz->num_trips; i++) { + if (&tz->trips[i] == trip) + return i; + } + + return -ENODATA; +} diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index cd36251ba1c02..2fbb9b597752b 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -1947,10 +1947,6 @@ static int imx_uart_rs485_config(struct uart_port *port, struct ktermios *termio rs485conf->flags & SER_RS485_RX_DURING_TX) imx_uart_start_rx(port); - if (port->rs485_rx_during_tx_gpio) - gpiod_set_value_cansleep(port->rs485_rx_during_tx_gpio, - !!(rs485conf->flags & SER_RS485_RX_DURING_TX)); - return 0; } diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index 6aeb821d9b1da..f75b8bceb8ca2 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -301,8 +301,8 @@ /* Misc definitions */ +#define SC16IS7XX_SPI_READ_BIT BIT(7) #define SC16IS7XX_FIFO_SIZE (64) -#define SC16IS7XX_REG_SHIFT 2 #define SC16IS7XX_GPIOS_PER_BANK 4 struct sc16is7xx_devtype { @@ -323,7 +323,8 @@ struct sc16is7xx_one_config { struct sc16is7xx_one { struct uart_port port; - u8 line; + struct regmap *regmap; + struct mutex efr_lock; /* EFR registers access */ struct kthread_work tx_work; struct kthread_work reg_work; struct kthread_delayed_work ms_work; @@ -334,7 +335,6 @@ struct sc16is7xx_one { struct sc16is7xx_port { const struct sc16is7xx_devtype *devtype; - struct regmap *regmap; struct clk *clk; #ifdef CONFIG_GPIOLIB struct gpio_chip gpio; @@ -344,7 +344,6 @@ struct sc16is7xx_port { unsigned char buf[SC16IS7XX_FIFO_SIZE]; struct kthread_worker kworker; struct task_struct *kworker_task; - struct mutex efr_lock; struct sc16is7xx_one p[]; }; @@ -362,48 +361,35 @@ static void sc16is7xx_stop_tx(struct uart_port *port); #define to_sc16is7xx_port(p,e) ((container_of((p), struct sc16is7xx_port, e))) #define to_sc16is7xx_one(p,e) ((container_of((p), struct sc16is7xx_one, e))) -static int sc16is7xx_line(struct uart_port *port) -{ - struct sc16is7xx_one *one = to_sc16is7xx_one(port, port); - - return one->line; -} - static u8 sc16is7xx_port_read(struct uart_port *port, u8 reg) { - struct sc16is7xx_port *s = dev_get_drvdata(port->dev); + struct sc16is7xx_one *one = to_sc16is7xx_one(port, port); unsigned int val = 0; - const u8 line = sc16is7xx_line(port); - regmap_read(s->regmap, (reg << SC16IS7XX_REG_SHIFT) | line, &val); + regmap_read(one->regmap, reg, &val); return val; } static void sc16is7xx_port_write(struct uart_port *port, u8 reg, u8 val) { - struct sc16is7xx_port *s = dev_get_drvdata(port->dev); - const u8 line = sc16is7xx_line(port); + struct sc16is7xx_one *one = to_sc16is7xx_one(port, port); - regmap_write(s->regmap, (reg << SC16IS7XX_REG_SHIFT) | line, val); + regmap_write(one->regmap, reg, val); } static void sc16is7xx_fifo_read(struct uart_port *port, unsigned int rxlen) { struct sc16is7xx_port *s = dev_get_drvdata(port->dev); - const u8 line = sc16is7xx_line(port); - u8 addr = (SC16IS7XX_RHR_REG << SC16IS7XX_REG_SHIFT) | line; + struct sc16is7xx_one *one = to_sc16is7xx_one(port, port); - regcache_cache_bypass(s->regmap, true); - regmap_raw_read(s->regmap, addr, s->buf, rxlen); - regcache_cache_bypass(s->regmap, false); + regmap_noinc_read(one->regmap, SC16IS7XX_RHR_REG, s->buf, rxlen); } static void sc16is7xx_fifo_write(struct uart_port *port, u8 to_send) { struct sc16is7xx_port *s = dev_get_drvdata(port->dev); - const u8 line = sc16is7xx_line(port); - u8 addr = (SC16IS7XX_THR_REG << SC16IS7XX_REG_SHIFT) | line; + struct sc16is7xx_one *one = to_sc16is7xx_one(port, port); /* * Don't send zero-length data, at least on SPI it confuses the chip @@ -412,32 +398,15 @@ static void sc16is7xx_fifo_write(struct uart_port *port, u8 to_send) if (unlikely(!to_send)) return; - regcache_cache_bypass(s->regmap, true); - regmap_raw_write(s->regmap, addr, s->buf, to_send); - regcache_cache_bypass(s->regmap, false); + regmap_noinc_write(one->regmap, SC16IS7XX_THR_REG, s->buf, to_send); } static void sc16is7xx_port_update(struct uart_port *port, u8 reg, u8 mask, u8 val) { - struct sc16is7xx_port *s = dev_get_drvdata(port->dev); - const u8 line = sc16is7xx_line(port); - - regmap_update_bits(s->regmap, (reg << SC16IS7XX_REG_SHIFT) | line, - mask, val); -} - -static int sc16is7xx_alloc_line(void) -{ - int i; - - BUILD_BUG_ON(SC16IS7XX_MAX_DEVS > BITS_PER_LONG); - - for (i = 0; i < SC16IS7XX_MAX_DEVS; i++) - if (!test_and_set_bit(i, &sc16is7xx_lines)) - break; + struct sc16is7xx_one *one = to_sc16is7xx_one(port, port); - return i; + regmap_update_bits(one->regmap, reg, mask, val); } static void sc16is7xx_power(struct uart_port *port, int on) @@ -479,7 +448,7 @@ static const struct sc16is7xx_devtype sc16is762_devtype = { static bool sc16is7xx_regmap_volatile(struct device *dev, unsigned int reg) { - switch (reg >> SC16IS7XX_REG_SHIFT) { + switch (reg) { case SC16IS7XX_RHR_REG: case SC16IS7XX_IIR_REG: case SC16IS7XX_LSR_REG: @@ -498,7 +467,7 @@ static bool sc16is7xx_regmap_volatile(struct device *dev, unsigned int reg) static bool sc16is7xx_regmap_precious(struct device *dev, unsigned int reg) { - switch (reg >> SC16IS7XX_REG_SHIFT) { + switch (reg) { case SC16IS7XX_RHR_REG: return true; default: @@ -508,9 +477,14 @@ static bool sc16is7xx_regmap_precious(struct device *dev, unsigned int reg) return false; } +static bool sc16is7xx_regmap_noinc(struct device *dev, unsigned int reg) +{ + return reg == SC16IS7XX_RHR_REG; +} + static int sc16is7xx_set_baud(struct uart_port *port, int baud) { - struct sc16is7xx_port *s = dev_get_drvdata(port->dev); + struct sc16is7xx_one *one = to_sc16is7xx_one(port, port); u8 lcr; u8 prescaler = 0; unsigned long clk = port->uartclk, div = clk / 16 / baud; @@ -533,7 +507,7 @@ static int sc16is7xx_set_baud(struct uart_port *port, int baud) * because the bulk of the interrupt processing is run as a workqueue * job in thread context. */ - mutex_lock(&s->efr_lock); + mutex_lock(&one->efr_lock); lcr = sc16is7xx_port_read(port, SC16IS7XX_LCR_REG); @@ -542,17 +516,17 @@ static int sc16is7xx_set_baud(struct uart_port *port, int baud) SC16IS7XX_LCR_CONF_MODE_B); /* Enable enhanced features */ - regcache_cache_bypass(s->regmap, true); + regcache_cache_bypass(one->regmap, true); sc16is7xx_port_update(port, SC16IS7XX_EFR_REG, SC16IS7XX_EFR_ENABLE_BIT, SC16IS7XX_EFR_ENABLE_BIT); - regcache_cache_bypass(s->regmap, false); + regcache_cache_bypass(one->regmap, false); /* Put LCR back to the normal mode */ sc16is7xx_port_write(port, SC16IS7XX_LCR_REG, lcr); - mutex_unlock(&s->efr_lock); + mutex_unlock(&one->efr_lock); sc16is7xx_port_update(port, SC16IS7XX_MCR_REG, SC16IS7XX_MCR_CLKSEL_BIT, @@ -563,10 +537,10 @@ static int sc16is7xx_set_baud(struct uart_port *port, int baud) SC16IS7XX_LCR_CONF_MODE_A); /* Write the new divisor */ - regcache_cache_bypass(s->regmap, true); + regcache_cache_bypass(one->regmap, true); sc16is7xx_port_write(port, SC16IS7XX_DLH_REG, div / 256); sc16is7xx_port_write(port, SC16IS7XX_DLL_REG, div % 256); - regcache_cache_bypass(s->regmap, false); + regcache_cache_bypass(one->regmap, false); /* Put LCR back to the normal mode */ sc16is7xx_port_write(port, SC16IS7XX_LCR_REG, lcr); @@ -668,9 +642,9 @@ static void sc16is7xx_handle_tx(struct uart_port *port) } if (uart_circ_empty(xmit) || uart_tx_stopped(port)) { - spin_lock_irqsave(&port->lock, flags); + uart_port_lock_irqsave(port, &flags); sc16is7xx_stop_tx(port); - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); return; } @@ -696,13 +670,15 @@ static void sc16is7xx_handle_tx(struct uart_port *port) sc16is7xx_fifo_write(port, to_send); } - spin_lock_irqsave(&port->lock, flags); + uart_port_lock_irqsave(port, &flags); if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) uart_write_wakeup(port); if (uart_circ_empty(xmit)) sc16is7xx_stop_tx(port); - spin_unlock_irqrestore(&port->lock, flags); + else + sc16is7xx_ier_set(port, SC16IS7XX_IER_THRI_BIT); + uart_port_unlock_irqrestore(port, flags); } static unsigned int sc16is7xx_get_hwmctrl(struct uart_port *port) @@ -720,11 +696,10 @@ static unsigned int sc16is7xx_get_hwmctrl(struct uart_port *port) static void sc16is7xx_update_mlines(struct sc16is7xx_one *one) { struct uart_port *port = &one->port; - struct sc16is7xx_port *s = dev_get_drvdata(port->dev); unsigned long flags; unsigned int status, changed; - lockdep_assert_held_once(&s->efr_lock); + lockdep_assert_held_once(&one->efr_lock); status = sc16is7xx_get_hwmctrl(port); changed = status ^ one->old_mctrl; @@ -734,7 +709,7 @@ static void sc16is7xx_update_mlines(struct sc16is7xx_one *one) one->old_mctrl = status; - spin_lock_irqsave(&port->lock, flags); + uart_port_lock_irqsave(port, &flags); if ((changed & TIOCM_RNG) && (status & TIOCM_RNG)) port->icount.rng++; if (changed & TIOCM_DSR) @@ -745,79 +720,82 @@ static void sc16is7xx_update_mlines(struct sc16is7xx_one *one) uart_handle_cts_change(port, status & TIOCM_CTS); wake_up_interruptible(&port->state->port.delta_msr_wait); - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); } static bool sc16is7xx_port_irq(struct sc16is7xx_port *s, int portno) { + bool rc = true; + unsigned int iir, rxlen; struct uart_port *port = &s->p[portno].port; + struct sc16is7xx_one *one = to_sc16is7xx_one(port, port); - do { - unsigned int iir, rxlen; - struct sc16is7xx_one *one = to_sc16is7xx_one(port, port); - - iir = sc16is7xx_port_read(port, SC16IS7XX_IIR_REG); - if (iir & SC16IS7XX_IIR_NO_INT_BIT) - return false; - - iir &= SC16IS7XX_IIR_ID_MASK; - - switch (iir) { - case SC16IS7XX_IIR_RDI_SRC: - case SC16IS7XX_IIR_RLSE_SRC: - case SC16IS7XX_IIR_RTOI_SRC: - case SC16IS7XX_IIR_XOFFI_SRC: - rxlen = sc16is7xx_port_read(port, SC16IS7XX_RXLVL_REG); - - /* - * There is a silicon bug that makes the chip report a - * time-out interrupt but no data in the FIFO. This is - * described in errata section 18.1.4. - * - * When this happens, read one byte from the FIFO to - * clear the interrupt. - */ - if (iir == SC16IS7XX_IIR_RTOI_SRC && !rxlen) - rxlen = 1; - - if (rxlen) - sc16is7xx_handle_rx(port, rxlen, iir); - break; + mutex_lock(&one->efr_lock); + + iir = sc16is7xx_port_read(port, SC16IS7XX_IIR_REG); + if (iir & SC16IS7XX_IIR_NO_INT_BIT) { + rc = false; + goto out_port_irq; + } + + iir &= SC16IS7XX_IIR_ID_MASK; + + switch (iir) { + case SC16IS7XX_IIR_RDI_SRC: + case SC16IS7XX_IIR_RLSE_SRC: + case SC16IS7XX_IIR_RTOI_SRC: + case SC16IS7XX_IIR_XOFFI_SRC: + rxlen = sc16is7xx_port_read(port, SC16IS7XX_RXLVL_REG); + + /* + * There is a silicon bug that makes the chip report a + * time-out interrupt but no data in the FIFO. This is + * described in errata section 18.1.4. + * + * When this happens, read one byte from the FIFO to + * clear the interrupt. + */ + if (iir == SC16IS7XX_IIR_RTOI_SRC && !rxlen) + rxlen = 1; + + if (rxlen) + sc16is7xx_handle_rx(port, rxlen, iir); + break; /* CTSRTS interrupt comes only when CTS goes inactive */ - case SC16IS7XX_IIR_CTSRTS_SRC: - case SC16IS7XX_IIR_MSI_SRC: - sc16is7xx_update_mlines(one); - break; - case SC16IS7XX_IIR_THRI_SRC: - sc16is7xx_handle_tx(port); - break; - default: - dev_err_ratelimited(port->dev, - "ttySC%i: Unexpected interrupt: %x", - port->line, iir); - break; - } - } while (0); - return true; + case SC16IS7XX_IIR_CTSRTS_SRC: + case SC16IS7XX_IIR_MSI_SRC: + sc16is7xx_update_mlines(one); + break; + case SC16IS7XX_IIR_THRI_SRC: + sc16is7xx_handle_tx(port); + break; + default: + dev_err_ratelimited(port->dev, + "ttySC%i: Unexpected interrupt: %x", + port->line, iir); + break; + } + +out_port_irq: + mutex_unlock(&one->efr_lock); + + return rc; } static irqreturn_t sc16is7xx_irq(int irq, void *dev_id) { - struct sc16is7xx_port *s = (struct sc16is7xx_port *)dev_id; + bool keep_polling; - mutex_lock(&s->efr_lock); + struct sc16is7xx_port *s = (struct sc16is7xx_port *)dev_id; - while (1) { - bool keep_polling = false; + do { int i; + keep_polling = false; + for (i = 0; i < s->devtype->nr_uart; ++i) keep_polling |= sc16is7xx_port_irq(s, i); - if (!keep_polling) - break; - } - - mutex_unlock(&s->efr_lock); + } while (keep_polling); return IRQ_HANDLED; } @@ -825,20 +803,15 @@ static irqreturn_t sc16is7xx_irq(int irq, void *dev_id) static void sc16is7xx_tx_proc(struct kthread_work *ws) { struct uart_port *port = &(to_sc16is7xx_one(ws, tx_work)->port); - struct sc16is7xx_port *s = dev_get_drvdata(port->dev); - unsigned long flags; + struct sc16is7xx_one *one = to_sc16is7xx_one(port, port); if ((port->rs485.flags & SER_RS485_ENABLED) && (port->rs485.delay_rts_before_send > 0)) msleep(port->rs485.delay_rts_before_send); - mutex_lock(&s->efr_lock); + mutex_lock(&one->efr_lock); sc16is7xx_handle_tx(port); - mutex_unlock(&s->efr_lock); - - spin_lock_irqsave(&port->lock, flags); - sc16is7xx_ier_set(port, SC16IS7XX_IER_THRI_BIT); - spin_unlock_irqrestore(&port->lock, flags); + mutex_unlock(&one->efr_lock); } static void sc16is7xx_reconf_rs485(struct uart_port *port) @@ -849,14 +822,14 @@ static void sc16is7xx_reconf_rs485(struct uart_port *port) struct serial_rs485 *rs485 = &port->rs485; unsigned long irqflags; - spin_lock_irqsave(&port->lock, irqflags); + uart_port_lock_irqsave(port, &irqflags); if (rs485->flags & SER_RS485_ENABLED) { efcr |= SC16IS7XX_EFCR_AUTO_RS485_BIT; if (rs485->flags & SER_RS485_RTS_AFTER_SEND) efcr |= SC16IS7XX_EFCR_RTS_INVERT_BIT; } - spin_unlock_irqrestore(&port->lock, irqflags); + uart_port_unlock_irqrestore(port, irqflags); sc16is7xx_port_update(port, SC16IS7XX_EFCR_REG, mask, efcr); } @@ -867,10 +840,10 @@ static void sc16is7xx_reg_proc(struct kthread_work *ws) struct sc16is7xx_one_config config; unsigned long irqflags; - spin_lock_irqsave(&one->port.lock, irqflags); + uart_port_lock_irqsave(&one->port, &irqflags); config = one->config; memset(&one->config, 0, sizeof(one->config)); - spin_unlock_irqrestore(&one->port.lock, irqflags); + uart_port_unlock_irqrestore(&one->port, irqflags); if (config.flags & SC16IS7XX_RECONF_MD) { u8 mcr = 0; @@ -941,9 +914,9 @@ static void sc16is7xx_ms_proc(struct kthread_work *ws) struct sc16is7xx_port *s = dev_get_drvdata(one->port.dev); if (one->port.state) { - mutex_lock(&s->efr_lock); + mutex_lock(&one->efr_lock); sc16is7xx_update_mlines(one); - mutex_unlock(&s->efr_lock); + mutex_unlock(&one->efr_lock); kthread_queue_delayed_work(&s->kworker, &one->ms_work, HZ); } @@ -976,18 +949,18 @@ static void sc16is7xx_throttle(struct uart_port *port) * value set in MCR register. Stop reading data from RX FIFO so the * AutoRTS feature will de-activate RTS output. */ - spin_lock_irqsave(&port->lock, flags); + uart_port_lock_irqsave(port, &flags); sc16is7xx_ier_clear(port, SC16IS7XX_IER_RDI_BIT); - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); } static void sc16is7xx_unthrottle(struct uart_port *port) { unsigned long flags; - spin_lock_irqsave(&port->lock, flags); + uart_port_lock_irqsave(port, &flags); sc16is7xx_ier_set(port, SC16IS7XX_IER_RDI_BIT); - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); } static unsigned int sc16is7xx_tx_empty(struct uart_port *port) @@ -1027,7 +1000,6 @@ static void sc16is7xx_set_termios(struct uart_port *port, struct ktermios *termios, const struct ktermios *old) { - struct sc16is7xx_port *s = dev_get_drvdata(port->dev); struct sc16is7xx_one *one = to_sc16is7xx_one(port, port); unsigned int lcr, flow = 0; int baud; @@ -1086,13 +1058,13 @@ static void sc16is7xx_set_termios(struct uart_port *port, port->ignore_status_mask |= SC16IS7XX_LSR_BRK_ERROR_MASK; /* As above, claim the mutex while accessing the EFR. */ - mutex_lock(&s->efr_lock); + mutex_lock(&one->efr_lock); sc16is7xx_port_write(port, SC16IS7XX_LCR_REG, SC16IS7XX_LCR_CONF_MODE_B); /* Configure flow control */ - regcache_cache_bypass(s->regmap, true); + regcache_cache_bypass(one->regmap, true); sc16is7xx_port_write(port, SC16IS7XX_XON1_REG, termios->c_cc[VSTART]); sc16is7xx_port_write(port, SC16IS7XX_XOFF1_REG, termios->c_cc[VSTOP]); @@ -1111,12 +1083,12 @@ static void sc16is7xx_set_termios(struct uart_port *port, SC16IS7XX_EFR_REG, SC16IS7XX_EFR_FLOWCTRL_BITS, flow); - regcache_cache_bypass(s->regmap, false); + regcache_cache_bypass(one->regmap, false); /* Update LCR register */ sc16is7xx_port_write(port, SC16IS7XX_LCR_REG, lcr); - mutex_unlock(&s->efr_lock); + mutex_unlock(&one->efr_lock); /* Get baud rate generator configuration */ baud = uart_get_baud_rate(port, termios, old, @@ -1126,7 +1098,7 @@ static void sc16is7xx_set_termios(struct uart_port *port, /* Setup baudrate generator */ baud = sc16is7xx_set_baud(port, baud); - spin_lock_irqsave(&port->lock, flags); + uart_port_lock_irqsave(port, &flags); /* Update timeout according to new baud rate */ uart_update_timeout(port, termios->c_cflag, baud); @@ -1134,7 +1106,7 @@ static void sc16is7xx_set_termios(struct uart_port *port, if (UART_ENABLE_MS(port, termios->c_cflag)) sc16is7xx_enable_ms(port); - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); } static int sc16is7xx_config_rs485(struct uart_port *port, struct ktermios *termios, @@ -1162,7 +1134,6 @@ static int sc16is7xx_config_rs485(struct uart_port *port, struct ktermios *termi static int sc16is7xx_startup(struct uart_port *port) { struct sc16is7xx_one *one = to_sc16is7xx_one(port, port); - struct sc16is7xx_port *s = dev_get_drvdata(port->dev); unsigned int val; unsigned long flags; @@ -1179,7 +1150,7 @@ static int sc16is7xx_startup(struct uart_port *port) sc16is7xx_port_write(port, SC16IS7XX_LCR_REG, SC16IS7XX_LCR_CONF_MODE_B); - regcache_cache_bypass(s->regmap, true); + regcache_cache_bypass(one->regmap, true); /* Enable write access to enhanced features and internal clock div */ sc16is7xx_port_update(port, SC16IS7XX_EFR_REG, @@ -1197,7 +1168,7 @@ static int sc16is7xx_startup(struct uart_port *port) SC16IS7XX_TCR_RX_RESUME(24) | SC16IS7XX_TCR_RX_HALT(48)); - regcache_cache_bypass(s->regmap, false); + regcache_cache_bypass(one->regmap, false); /* Now, initialize the UART */ sc16is7xx_port_write(port, SC16IS7XX_LCR_REG, SC16IS7XX_LCR_WORD_LEN_8); @@ -1221,9 +1192,9 @@ static int sc16is7xx_startup(struct uart_port *port) sc16is7xx_port_write(port, SC16IS7XX_IER_REG, val); /* Enable modem status polling */ - spin_lock_irqsave(&port->lock, flags); + uart_port_lock_irqsave(port, &flags); sc16is7xx_enable_ms(port); - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); return 0; } @@ -1425,7 +1396,8 @@ static int sc16is7xx_setup_gpio_chip(struct sc16is7xx_port *s) /* * Configure ports designated to operate as modem control lines. */ -static int sc16is7xx_setup_mctrl_ports(struct sc16is7xx_port *s) +static int sc16is7xx_setup_mctrl_ports(struct sc16is7xx_port *s, + struct regmap *regmap) { int i; int ret; @@ -1454,8 +1426,8 @@ static int sc16is7xx_setup_mctrl_ports(struct sc16is7xx_port *s) if (s->mctrl_mask) regmap_update_bits( - s->regmap, - SC16IS7XX_IOCONTROL_REG << SC16IS7XX_REG_SHIFT, + regmap, + SC16IS7XX_IOCONTROL_REG, SC16IS7XX_IOCONTROL_MODEM_A_BIT | SC16IS7XX_IOCONTROL_MODEM_B_BIT, s->mctrl_mask); @@ -1470,7 +1442,7 @@ static const struct serial_rs485 sc16is7xx_rs485_supported = { static int sc16is7xx_probe(struct device *dev, const struct sc16is7xx_devtype *devtype, - struct regmap *regmap, int irq) + struct regmap *regmaps[], int irq) { unsigned long freq = 0, *pfreq = dev_get_platdata(dev); unsigned int val; @@ -1478,16 +1450,20 @@ static int sc16is7xx_probe(struct device *dev, int i, ret; struct sc16is7xx_port *s; - if (IS_ERR(regmap)) - return PTR_ERR(regmap); + for (i = 0; i < devtype->nr_uart; i++) + if (IS_ERR(regmaps[i])) + return PTR_ERR(regmaps[i]); /* * This device does not have an identification register that would * tell us if we are really connected to the correct device. * The best we can do is to check if communication is at all possible. + * + * Note: regmap[0] is used in the probe function to access registers + * common to all channels/ports, as it is guaranteed to be present on + * all variants. */ - ret = regmap_read(regmap, - SC16IS7XX_LSR_REG << SC16IS7XX_REG_SHIFT, &val); + ret = regmap_read(regmaps[0], SC16IS7XX_LSR_REG, &val); if (ret < 0) return -EPROBE_DEFER; @@ -1521,10 +1497,8 @@ static int sc16is7xx_probe(struct device *dev, return -EINVAL; } - s->regmap = regmap; s->devtype = devtype; dev_set_drvdata(dev, s); - mutex_init(&s->efr_lock); kthread_init_worker(&s->kworker); s->kworker_task = kthread_run(kthread_worker_fn, &s->kworker, @@ -1536,11 +1510,17 @@ static int sc16is7xx_probe(struct device *dev, sched_set_fifo(s->kworker_task); /* reset device, purging any pending irq / data */ - regmap_write(s->regmap, SC16IS7XX_IOCONTROL_REG << SC16IS7XX_REG_SHIFT, - SC16IS7XX_IOCONTROL_SRESET_BIT); + regmap_write(regmaps[0], SC16IS7XX_IOCONTROL_REG, + SC16IS7XX_IOCONTROL_SRESET_BIT); for (i = 0; i < devtype->nr_uart; ++i) { - s->p[i].line = i; + s->p[i].port.line = find_first_zero_bit(&sc16is7xx_lines, + SC16IS7XX_MAX_DEVS); + if (s->p[i].port.line >= SC16IS7XX_MAX_DEVS) { + ret = -ERANGE; + goto out_ports; + } + /* Initialize port data */ s->p[i].port.dev = dev; s->p[i].port.irq = irq; @@ -1560,12 +1540,9 @@ static int sc16is7xx_probe(struct device *dev, s->p[i].port.rs485_supported = sc16is7xx_rs485_supported; s->p[i].port.ops = &sc16is7xx_ops; s->p[i].old_mctrl = 0; - s->p[i].port.line = sc16is7xx_alloc_line(); + s->p[i].regmap = regmaps[i]; - if (s->p[i].port.line >= SC16IS7XX_MAX_DEVS) { - ret = -ENOMEM; - goto out_ports; - } + mutex_init(&s->p[i].efr_lock); ret = uart_get_rs485_mode(&s->p[i].port); if (ret) @@ -1582,20 +1559,25 @@ static int sc16is7xx_probe(struct device *dev, kthread_init_work(&s->p[i].tx_work, sc16is7xx_tx_proc); kthread_init_work(&s->p[i].reg_work, sc16is7xx_reg_proc); kthread_init_delayed_work(&s->p[i].ms_work, sc16is7xx_ms_proc); + /* Register port */ - uart_add_one_port(&sc16is7xx_uart, &s->p[i].port); + ret = uart_add_one_port(&sc16is7xx_uart, &s->p[i].port); + if (ret) + goto out_ports; + + set_bit(s->p[i].port.line, &sc16is7xx_lines); /* Enable EFR */ sc16is7xx_port_write(&s->p[i].port, SC16IS7XX_LCR_REG, SC16IS7XX_LCR_CONF_MODE_B); - regcache_cache_bypass(s->regmap, true); + regcache_cache_bypass(regmaps[i], true); /* Enable write access to enhanced features */ sc16is7xx_port_write(&s->p[i].port, SC16IS7XX_EFR_REG, SC16IS7XX_EFR_ENABLE_BIT); - regcache_cache_bypass(s->regmap, false); + regcache_cache_bypass(regmaps[i], false); /* Restore access to general registers */ sc16is7xx_port_write(&s->p[i].port, SC16IS7XX_LCR_REG, 0x00); @@ -1615,7 +1597,7 @@ static int sc16is7xx_probe(struct device *dev, s->p[u].irda_mode = true; } - ret = sc16is7xx_setup_mctrl_ports(s); + ret = sc16is7xx_setup_mctrl_ports(s, regmaps[0]); if (ret) goto out_ports; @@ -1650,10 +1632,9 @@ static int sc16is7xx_probe(struct device *dev, #endif out_ports: - for (i--; i >= 0; i--) { - uart_remove_one_port(&sc16is7xx_uart, &s->p[i].port); - clear_bit(s->p[i].port.line, &sc16is7xx_lines); - } + for (i = 0; i < devtype->nr_uart; i++) + if (test_and_clear_bit(s->p[i].port.line, &sc16is7xx_lines)) + uart_remove_one_port(&sc16is7xx_uart, &s->p[i].port); kthread_stop(s->kworker_task); @@ -1675,8 +1656,8 @@ static void sc16is7xx_remove(struct device *dev) for (i = 0; i < s->devtype->nr_uart; i++) { kthread_cancel_delayed_work_sync(&s->p[i].ms_work); - uart_remove_one_port(&sc16is7xx_uart, &s->p[i].port); - clear_bit(s->p[i].port.line, &sc16is7xx_lines); + if (test_and_clear_bit(s->p[i].port.line, &sc16is7xx_lines)) + uart_remove_one_port(&sc16is7xx_uart, &s->p[i].port); sc16is7xx_power(&s->p[i].port, 0); } @@ -1698,19 +1679,42 @@ static const struct of_device_id __maybe_unused sc16is7xx_dt_ids[] = { MODULE_DEVICE_TABLE(of, sc16is7xx_dt_ids); static struct regmap_config regcfg = { - .reg_bits = 7, - .pad_bits = 1, + .reg_bits = 5, + .pad_bits = 3, .val_bits = 8, .cache_type = REGCACHE_RBTREE, .volatile_reg = sc16is7xx_regmap_volatile, .precious_reg = sc16is7xx_regmap_precious, + .writeable_noinc_reg = sc16is7xx_regmap_noinc, + .readable_noinc_reg = sc16is7xx_regmap_noinc, + .max_raw_read = SC16IS7XX_FIFO_SIZE, + .max_raw_write = SC16IS7XX_FIFO_SIZE, + .max_register = SC16IS7XX_EFCR_REG, }; +static const char *sc16is7xx_regmap_name(u8 port_id) +{ + switch (port_id) { + case 0: return "port0"; + case 1: return "port1"; + default: + WARN_ON(true); + return NULL; + } +} + +static unsigned int sc16is7xx_regmap_port_mask(unsigned int port_id) +{ + /* CH1,CH0 are at bits 2:1. */ + return port_id << 1; +} + #ifdef CONFIG_SERIAL_SC16IS7XX_SPI static int sc16is7xx_spi_probe(struct spi_device *spi) { const struct sc16is7xx_devtype *devtype; - struct regmap *regmap; + struct regmap *regmaps[2]; + unsigned int i; int ret; /* Setup SPI bus */ @@ -1735,11 +1739,20 @@ static int sc16is7xx_spi_probe(struct spi_device *spi) devtype = (struct sc16is7xx_devtype *)id_entry->driver_data; } - regcfg.max_register = (0xf << SC16IS7XX_REG_SHIFT) | - (devtype->nr_uart - 1); - regmap = devm_regmap_init_spi(spi, ®cfg); + for (i = 0; i < devtype->nr_uart; i++) { + regcfg.name = sc16is7xx_regmap_name(i); + /* + * If read_flag_mask is 0, the regmap code sets it to a default + * of 0x80. Since we specify our own mask, we must add the READ + * bit ourselves: + */ + regcfg.read_flag_mask = sc16is7xx_regmap_port_mask(i) | + SC16IS7XX_SPI_READ_BIT; + regcfg.write_flag_mask = sc16is7xx_regmap_port_mask(i); + regmaps[i] = devm_regmap_init_spi(spi, ®cfg); + } - return sc16is7xx_probe(&spi->dev, devtype, regmap, spi->irq); + return sc16is7xx_probe(&spi->dev, devtype, regmaps, spi->irq); } static void sc16is7xx_spi_remove(struct spi_device *spi) @@ -1778,7 +1791,8 @@ static int sc16is7xx_i2c_probe(struct i2c_client *i2c) { const struct i2c_device_id *id = i2c_client_get_device_id(i2c); const struct sc16is7xx_devtype *devtype; - struct regmap *regmap; + struct regmap *regmaps[2]; + unsigned int i; if (i2c->dev.of_node) { devtype = device_get_match_data(&i2c->dev); @@ -1788,11 +1802,14 @@ static int sc16is7xx_i2c_probe(struct i2c_client *i2c) devtype = (struct sc16is7xx_devtype *)id->driver_data; } - regcfg.max_register = (0xf << SC16IS7XX_REG_SHIFT) | - (devtype->nr_uart - 1); - regmap = devm_regmap_init_i2c(i2c, ®cfg); + for (i = 0; i < devtype->nr_uart; i++) { + regcfg.name = sc16is7xx_regmap_name(i); + regcfg.read_flag_mask = sc16is7xx_regmap_port_mask(i); + regcfg.write_flag_mask = sc16is7xx_regmap_port_mask(i); + regmaps[i] = devm_regmap_init_i2c(i2c, ®cfg); + } - return sc16is7xx_probe(&i2c->dev, devtype, regmap, i2c->irq); + return sc16is7xx_probe(&i2c->dev, devtype, regmaps, i2c->irq); } static void sc16is7xx_i2c_remove(struct i2c_client *client) diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 18b49b1439a58..083ea4de48f9a 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -1409,6 +1409,16 @@ static void uart_set_rs485_termination(struct uart_port *port, !!(rs485->flags & SER_RS485_TERMINATE_BUS)); } +static void uart_set_rs485_rx_during_tx(struct uart_port *port, + const struct serial_rs485 *rs485) +{ + if (!(rs485->flags & SER_RS485_ENABLED)) + return; + + gpiod_set_value_cansleep(port->rs485_rx_during_tx_gpio, + !!(rs485->flags & SER_RS485_RX_DURING_TX)); +} + static int uart_rs485_config(struct uart_port *port) { struct serial_rs485 *rs485 = &port->rs485; @@ -1420,12 +1430,17 @@ static int uart_rs485_config(struct uart_port *port) uart_sanitize_serial_rs485(port, rs485); uart_set_rs485_termination(port, rs485); + uart_set_rs485_rx_during_tx(port, rs485); spin_lock_irqsave(&port->lock, flags); ret = port->rs485_config(port, NULL, rs485); spin_unlock_irqrestore(&port->lock, flags); - if (ret) + if (ret) { memset(rs485, 0, sizeof(*rs485)); + /* unset GPIOs */ + gpiod_set_value_cansleep(port->rs485_term_gpio, 0); + gpiod_set_value_cansleep(port->rs485_rx_during_tx_gpio, 0); + } return ret; } @@ -1464,6 +1479,7 @@ static int uart_set_rs485_config(struct tty_struct *tty, struct uart_port *port, return ret; uart_sanitize_serial_rs485(port, &rs485); uart_set_rs485_termination(port, &rs485); + uart_set_rs485_rx_during_tx(port, &rs485); spin_lock_irqsave(&port->lock, flags); ret = port->rs485_config(port, &tty->termios, &rs485); @@ -1475,8 +1491,14 @@ static int uart_set_rs485_config(struct tty_struct *tty, struct uart_port *port, port->ops->set_mctrl(port, port->mctrl); } spin_unlock_irqrestore(&port->lock, flags); - if (ret) + if (ret) { + /* restore old GPIO settings */ + gpiod_set_value_cansleep(port->rs485_term_gpio, + !!(port->rs485.flags & SER_RS485_TERMINATE_BUS)); + gpiod_set_value_cansleep(port->rs485_rx_during_tx_gpio, + !!(port->rs485.flags & SER_RS485_RX_DURING_TX)); return ret; + } if (copy_to_user(rs485_user, &port->rs485, sizeof(port->rs485))) return -EFAULT; @@ -3572,9 +3594,10 @@ int uart_get_rs485_mode(struct uart_port *port) { struct serial_rs485 *rs485conf = &port->rs485; struct device *dev = port->dev; + enum gpiod_flags dflags; + struct gpio_desc *desc; u32 rs485_delay[2]; int ret; - int rx_during_tx_gpio_flag; if (!(port->rs485_supported.flags & SER_RS485_ENABLED)) return 0; @@ -3616,26 +3639,21 @@ int uart_get_rs485_mode(struct uart_port *port) * bus participants enable it, no communication is possible at all. * Works fine for short cables and users may enable for longer cables. */ - port->rs485_term_gpio = devm_gpiod_get_optional(dev, "rs485-term", - GPIOD_OUT_LOW); - if (IS_ERR(port->rs485_term_gpio)) { - ret = PTR_ERR(port->rs485_term_gpio); - port->rs485_term_gpio = NULL; - return dev_err_probe(dev, ret, "Cannot get rs485-term-gpios\n"); - } + desc = devm_gpiod_get_optional(dev, "rs485-term", GPIOD_OUT_LOW); + if (IS_ERR(desc)) + return dev_err_probe(dev, PTR_ERR(desc), "Cannot get rs485-term-gpios\n"); + port->rs485_term_gpio = desc; if (port->rs485_term_gpio) port->rs485_supported.flags |= SER_RS485_TERMINATE_BUS; - rx_during_tx_gpio_flag = (rs485conf->flags & SER_RS485_RX_DURING_TX) ? - GPIOD_OUT_HIGH : GPIOD_OUT_LOW; - port->rs485_rx_during_tx_gpio = devm_gpiod_get_optional(dev, - "rs485-rx-during-tx", - rx_during_tx_gpio_flag); - if (IS_ERR(port->rs485_rx_during_tx_gpio)) { - ret = PTR_ERR(port->rs485_rx_during_tx_gpio); - port->rs485_rx_during_tx_gpio = NULL; - return dev_err_probe(dev, ret, "Cannot get rs485-rx-during-tx-gpios\n"); - } + dflags = (rs485conf->flags & SER_RS485_RX_DURING_TX) ? + GPIOD_OUT_HIGH : GPIOD_OUT_LOW; + desc = devm_gpiod_get_optional(dev, "rs485-rx-during-tx", dflags); + if (IS_ERR(desc)) + return dev_err_probe(dev, PTR_ERR(desc), "Cannot get rs485-rx-during-tx-gpios\n"); + port->rs485_rx_during_tx_gpio = desc; + if (port->rs485_rx_during_tx_gpio) + port->rs485_supported.flags |= SER_RS485_RX_DURING_TX; return 0; } diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c index 5e9cf0c48813d..b6f4f436a5653 100644 --- a/drivers/tty/serial/stm32-usart.c +++ b/drivers/tty/serial/stm32-usart.c @@ -226,12 +226,6 @@ static int stm32_usart_config_rs485(struct uart_port *port, struct ktermios *ter stm32_usart_clr_bits(port, ofs->cr1, BIT(cfg->uart_enable_bit)); - if (port->rs485_rx_during_tx_gpio) - gpiod_set_value_cansleep(port->rs485_rx_during_tx_gpio, - !!(rs485conf->flags & SER_RS485_RX_DURING_TX)); - else - rs485conf->flags |= SER_RS485_RX_DURING_TX; - if (rs485conf->flags & SER_RS485_ENABLED) { cr1 = readl_relaxed(port->membase + ofs->cr1); cr3 = readl_relaxed(port->membase + ofs->cr3); @@ -256,6 +250,8 @@ static int stm32_usart_config_rs485(struct uart_port *port, struct ktermios *ter writel_relaxed(cr3, port->membase + ofs->cr3); writel_relaxed(cr1, port->membase + ofs->cr1); + + rs485conf->flags |= SER_RS485_RX_DURING_TX; } else { stm32_usart_clr_bits(port, ofs->cr3, USART_CR3_DEM | USART_CR3_DEP); diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 0971ae37f2a71..44e0437bd19d9 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -8810,12 +8810,9 @@ static void ufshcd_async_scan(void *data, async_cookie_t cookie) out: pm_runtime_put_sync(hba->dev); - /* - * If we failed to initialize the device or the device is not - * present, turn off the power/clocks etc. - */ + if (ret) - ufshcd_hba_exit(hba); + dev_err(hba->dev, "%s failed: %d\n", __func__, ret); } static enum scsi_timeout_action ufshcd_eh_timed_out(struct scsi_cmnd *scmd) diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 5219182e52e1a..2df2e9ee130d8 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -474,6 +474,14 @@ static int afs_dir_iterate_block(struct afs_vnode *dvnode, continue; } + /* Don't expose silly rename entries to userspace. */ + if (nlen > 6 && + dire->u.name[0] == '.' && + ctx->actor != afs_lookup_filldir && + ctx->actor != afs_lookup_one_filldir && + memcmp(dire->u.name, ".__afs", 6) == 0) + continue; + /* found the next entry */ if (!dir_emit(ctx, dire->u.name, nlen, ntohl(dire->u.vnode), diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 91fe57e87583c..b89b558b15926 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1245,7 +1245,8 @@ static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len, u64 bytes_left, end; u64 aligned_start = ALIGN(start, 1 << SECTOR_SHIFT); - if (WARN_ON(start != aligned_start)) { + /* Adjust the range to be aligned to 512B sectors if necessary. */ + if (start != aligned_start) { len -= aligned_start - start; len = round_down(len, 1 << SECTOR_SHIFT); start = aligned_start; @@ -4138,6 +4139,42 @@ static int prepare_allocation_clustered(struct btrfs_fs_info *fs_info, return 0; } +static int prepare_allocation_zoned(struct btrfs_fs_info *fs_info, + struct find_free_extent_ctl *ffe_ctl) +{ + if (ffe_ctl->for_treelog) { + spin_lock(&fs_info->treelog_bg_lock); + if (fs_info->treelog_bg) + ffe_ctl->hint_byte = fs_info->treelog_bg; + spin_unlock(&fs_info->treelog_bg_lock); + } else if (ffe_ctl->for_data_reloc) { + spin_lock(&fs_info->relocation_bg_lock); + if (fs_info->data_reloc_bg) + ffe_ctl->hint_byte = fs_info->data_reloc_bg; + spin_unlock(&fs_info->relocation_bg_lock); + } else if (ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA) { + struct btrfs_block_group *block_group; + + spin_lock(&fs_info->zone_active_bgs_lock); + list_for_each_entry(block_group, &fs_info->zone_active_bgs, active_bg_list) { + /* + * No lock is OK here because avail is monotinically + * decreasing, and this is just a hint. + */ + u64 avail = block_group->zone_capacity - block_group->alloc_offset; + + if (block_group_bits(block_group, ffe_ctl->flags) && + avail >= ffe_ctl->num_bytes) { + ffe_ctl->hint_byte = block_group->start; + break; + } + } + spin_unlock(&fs_info->zone_active_bgs_lock); + } + + return 0; +} + static int prepare_allocation(struct btrfs_fs_info *fs_info, struct find_free_extent_ctl *ffe_ctl, struct btrfs_space_info *space_info, @@ -4148,19 +4185,7 @@ static int prepare_allocation(struct btrfs_fs_info *fs_info, return prepare_allocation_clustered(fs_info, ffe_ctl, space_info, ins); case BTRFS_EXTENT_ALLOC_ZONED: - if (ffe_ctl->for_treelog) { - spin_lock(&fs_info->treelog_bg_lock); - if (fs_info->treelog_bg) - ffe_ctl->hint_byte = fs_info->treelog_bg; - spin_unlock(&fs_info->treelog_bg_lock); - } - if (ffe_ctl->for_data_reloc) { - spin_lock(&fs_info->relocation_bg_lock); - if (fs_info->data_reloc_bg) - ffe_ctl->hint_byte = fs_info->data_reloc_bg; - spin_unlock(&fs_info->relocation_bg_lock); - } - return 0; + return prepare_allocation_zoned(fs_info, ffe_ctl); default: BUG(); } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7bf5c2c1a54d8..f250e2083c7eb 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4447,6 +4447,8 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry) u64 root_flags; int ret; + down_write(&fs_info->subvol_sem); + /* * Don't allow to delete a subvolume with send in progress. This is * inside the inode lock so the error handling that has to drop the bit @@ -4458,25 +4460,25 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry) btrfs_warn(fs_info, "attempt to delete subvolume %llu during send", dest->root_key.objectid); - return -EPERM; + ret = -EPERM; + goto out_up_write; } if (atomic_read(&dest->nr_swapfiles)) { spin_unlock(&dest->root_item_lock); btrfs_warn(fs_info, "attempt to delete subvolume %llu with active swapfile", root->root_key.objectid); - return -EPERM; + ret = -EPERM; + goto out_up_write; } root_flags = btrfs_root_flags(&dest->root_item); btrfs_set_root_flags(&dest->root_item, root_flags | BTRFS_ROOT_SUBVOL_DEAD); spin_unlock(&dest->root_item_lock); - down_write(&fs_info->subvol_sem); - ret = may_destroy_subvol(dest); if (ret) - goto out_up_write; + goto out_undead; btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP); /* @@ -4486,7 +4488,7 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry) */ ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 5, true); if (ret) - goto out_up_write; + goto out_undead; trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { @@ -4552,15 +4554,17 @@ out_end_trans: inode->i_flags |= S_DEAD; out_release: btrfs_subvolume_release_metadata(root, &block_rsv); -out_up_write: - up_write(&fs_info->subvol_sem); +out_undead: if (ret) { spin_lock(&dest->root_item_lock); root_flags = btrfs_root_flags(&dest->root_item); btrfs_set_root_flags(&dest->root_item, root_flags & ~BTRFS_ROOT_SUBVOL_DEAD); spin_unlock(&dest->root_item_lock); - } else { + } +out_up_write: + up_write(&fs_info->subvol_sem); + if (!ret) { d_invalidate(dentry); btrfs_prune_dentries(dest); ASSERT(dest->send_in_progress == 0); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index e611b3a3ad388..908215928d6a6 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -790,6 +790,9 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, return -EOPNOTSUPP; } + if (btrfs_root_refs(&root->root_item) == 0) + return -ENOENT; + if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) return -EINVAL; @@ -2608,6 +2611,10 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) ret = -EFAULT; goto out; } + if (range.flags & ~BTRFS_DEFRAG_RANGE_FLAGS_SUPP) { + ret = -EOPNOTSUPP; + goto out; + } /* compression requires us to start the IO */ if ((range.flags & BTRFS_DEFRAG_RANGE_COMPRESS)) { range.flags |= BTRFS_DEFRAG_RANGE_START_IO; diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c index e646662e61c6b..1ea5bfb8876e4 100644 --- a/fs/btrfs/ref-verify.c +++ b/fs/btrfs/ref-verify.c @@ -886,8 +886,10 @@ int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info, out_unlock: spin_unlock(&fs_info->ref_verify_lock); out: - if (ret) + if (ret) { + btrfs_free_ref_cache(fs_info); btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY); + } return ret; } @@ -1018,8 +1020,8 @@ int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info) } } if (ret) { - btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY); btrfs_free_ref_cache(fs_info); + btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY); } btrfs_free_path(path); return ret; diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 4445a52a07076..1e3ff87d04470 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1099,12 +1099,22 @@ out: static void scrub_read_endio(struct btrfs_bio *bbio) { struct scrub_stripe *stripe = bbio->private; + struct bio_vec *bvec; + int sector_nr = calc_sector_number(stripe, bio_first_bvec_all(&bbio->bio)); + int num_sectors; + u32 bio_size = 0; + int i; + + ASSERT(sector_nr < stripe->nr_sectors); + bio_for_each_bvec_all(bvec, &bbio->bio, i) + bio_size += bvec->bv_len; + num_sectors = bio_size >> stripe->bg->fs_info->sectorsize_bits; if (bbio->bio.bi_status) { - bitmap_set(&stripe->io_error_bitmap, 0, stripe->nr_sectors); - bitmap_set(&stripe->error_bitmap, 0, stripe->nr_sectors); + bitmap_set(&stripe->io_error_bitmap, sector_nr, num_sectors); + bitmap_set(&stripe->error_bitmap, sector_nr, num_sectors); } else { - bitmap_clear(&stripe->io_error_bitmap, 0, stripe->nr_sectors); + bitmap_clear(&stripe->io_error_bitmap, sector_nr, num_sectors); } bio_put(&bbio->bio); if (atomic_dec_and_test(&stripe->pending_io)) { @@ -1640,6 +1650,9 @@ static void scrub_submit_initial_read(struct scrub_ctx *sctx, { struct btrfs_fs_info *fs_info = sctx->fs_info; struct btrfs_bio *bbio; + unsigned int nr_sectors = min_t(u64, BTRFS_STRIPE_LEN, stripe->bg->start + + stripe->bg->length - stripe->logical) >> + fs_info->sectorsize_bits; int mirror = stripe->mirror_num; ASSERT(stripe->bg); @@ -1649,14 +1662,16 @@ static void scrub_submit_initial_read(struct scrub_ctx *sctx, bbio = btrfs_bio_alloc(SCRUB_STRIPE_PAGES, REQ_OP_READ, fs_info, scrub_read_endio, stripe); - /* Read the whole stripe. */ bbio->bio.bi_iter.bi_sector = stripe->logical >> SECTOR_SHIFT; - for (int i = 0; i < BTRFS_STRIPE_LEN >> PAGE_SHIFT; i++) { + /* Read the whole range inside the chunk boundary. */ + for (unsigned int cur = 0; cur < nr_sectors; cur++) { + struct page *page = scrub_stripe_get_page(stripe, cur); + unsigned int pgoff = scrub_stripe_get_page_offset(stripe, cur); int ret; - ret = bio_add_page(&bbio->bio, stripe->pages[i], PAGE_SIZE, 0); + ret = bio_add_page(&bbio->bio, page, fs_info->sectorsize, pgoff); /* We should have allocated enough bio vectors. */ - ASSERT(ret == PAGE_SIZE); + ASSERT(ret == fs_info->sectorsize); } atomic_inc(&stripe->pending_io); diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index b1d1ac25237b7..c9198723e4cb7 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -1760,6 +1760,10 @@ static ssize_t btrfs_devinfo_scrub_speed_max_store(struct kobject *kobj, unsigned long long limit; limit = memparse(buf, &endptr); + /* There could be trailing '\n', also catch any typos after the value. */ + endptr = skip_spaces(endptr); + if (*endptr != 0) + return -EINVAL; WRITE_ONCE(device->scrub_speed_max, limit); return len; } diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index ab08a0b013112..cc6bc5985120d 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -1417,7 +1417,7 @@ static int check_extent_item(struct extent_buffer *leaf, if (unlikely(ptr + btrfs_extent_inline_ref_size(inline_type) > end)) { extent_err(leaf, slot, "inline ref item overflows extent item, ptr %lu iref size %u end %lu", - ptr, inline_type, end); + ptr, btrfs_extent_inline_ref_size(inline_type), end); return -EUCLEAN; } diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 09bc325d075dc..41a8cdce5d9f7 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1975,6 +1975,7 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group) map = block_group->physical_map; + spin_lock(&fs_info->zone_active_bgs_lock); spin_lock(&block_group->lock); if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags)) { ret = true; @@ -1987,7 +1988,6 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group) goto out_unlock; } - spin_lock(&fs_info->zone_active_bgs_lock); for (i = 0; i < map->num_stripes; i++) { struct btrfs_zoned_device_info *zinfo; int reserved = 0; @@ -2007,20 +2007,17 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group) */ if (atomic_read(&zinfo->active_zones_left) <= reserved) { ret = false; - spin_unlock(&fs_info->zone_active_bgs_lock); goto out_unlock; } if (!btrfs_dev_set_active_zone(device, physical)) { /* Cannot activate the zone */ ret = false; - spin_unlock(&fs_info->zone_active_bgs_lock); goto out_unlock; } if (!is_data) zinfo->reserved_active_zones--; } - spin_unlock(&fs_info->zone_active_bgs_lock); /* Successfully activated all the zones */ set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags); @@ -2028,8 +2025,6 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group) /* For the active block group list */ btrfs_get_block_group(block_group); - - spin_lock(&fs_info->zone_active_bgs_lock); list_add_tail(&block_group->active_bg_list, &fs_info->zone_active_bgs); spin_unlock(&fs_info->zone_active_bgs_lock); @@ -2037,6 +2032,7 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group) out_unlock: spin_unlock(&block_group->lock); + spin_unlock(&fs_info->zone_active_bgs_lock); return ret; } diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index f7bc22e74db27..32dbd1a828d01 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -1805,8 +1805,8 @@ static int dlm_tcp_bind(struct socket *sock) memcpy(&src_addr, &dlm_local_addr[0], sizeof(src_addr)); make_sockaddr(&src_addr, 0, &addr_len); - result = sock->ops->bind(sock, (struct sockaddr *)&src_addr, - addr_len); + result = kernel_bind(sock, (struct sockaddr *)&src_addr, + addr_len); if (result < 0) { /* This *may* not indicate a critical error */ log_print("could not bind for connect: %d", result); @@ -1818,7 +1818,7 @@ static int dlm_tcp_bind(struct socket *sock) static int dlm_tcp_connect(struct connection *con, struct socket *sock, struct sockaddr *addr, int addr_len) { - return sock->ops->connect(sock, addr, addr_len, O_NONBLOCK); + return kernel_connect(sock, addr, addr_len, O_NONBLOCK); } static int dlm_tcp_listen_validate(void) @@ -1850,8 +1850,8 @@ static int dlm_tcp_listen_bind(struct socket *sock) /* Bind to our port */ make_sockaddr(&dlm_local_addr[0], dlm_config.ci_tcp_port, &addr_len); - return sock->ops->bind(sock, (struct sockaddr *)&dlm_local_addr[0], - addr_len); + return kernel_bind(sock, (struct sockaddr *)&dlm_local_addr[0], + addr_len); } static const struct dlm_proto_ops dlm_tcp_ops = { @@ -1876,12 +1876,12 @@ static int dlm_sctp_connect(struct connection *con, struct socket *sock, int ret; /* - * Make sock->ops->connect() function return in specified time, + * Make kernel_connect() function return in specified time, * since O_NONBLOCK argument in connect() function does not work here, * then, we should restore the default value of this attribute. */ sock_set_sndtimeo(sock->sk, 5); - ret = sock->ops->connect(sock, addr, addr_len, 0); + ret = kernel_connect(sock, addr, addr_len, 0); sock_set_sndtimeo(sock->sk, 0); return ret; } diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c index 8d1f86487d6e9..d36b3963c0bf3 100644 --- a/fs/erofs/decompressor.c +++ b/fs/erofs/decompressor.c @@ -122,11 +122,11 @@ static int z_erofs_lz4_prepare_dstpages(struct z_erofs_lz4_decompress_ctx *ctx, } static void *z_erofs_lz4_handle_overlap(struct z_erofs_lz4_decompress_ctx *ctx, - void *inpage, unsigned int *inputmargin, int *maptype, - bool may_inplace) + void *inpage, void *out, unsigned int *inputmargin, + int *maptype, bool may_inplace) { struct z_erofs_decompress_req *rq = ctx->rq; - unsigned int omargin, total, i, j; + unsigned int omargin, total, i; struct page **in; void *src, *tmp; @@ -136,12 +136,13 @@ static void *z_erofs_lz4_handle_overlap(struct z_erofs_lz4_decompress_ctx *ctx, omargin < LZ4_DECOMPRESS_INPLACE_MARGIN(rq->inputsize)) goto docopy; - for (i = 0; i < ctx->inpages; ++i) { - DBG_BUGON(rq->in[i] == NULL); - for (j = 0; j < ctx->outpages - ctx->inpages + i; ++j) - if (rq->out[j] == rq->in[i]) - goto docopy; - } + for (i = 0; i < ctx->inpages; ++i) + if (rq->out[ctx->outpages - ctx->inpages + i] != + rq->in[i]) + goto docopy; + kunmap_local(inpage); + *maptype = 3; + return out + ((ctx->outpages - ctx->inpages) << PAGE_SHIFT); } if (ctx->inpages <= 1) { @@ -149,7 +150,6 @@ static void *z_erofs_lz4_handle_overlap(struct z_erofs_lz4_decompress_ctx *ctx, return inpage; } kunmap_local(inpage); - might_sleep(); src = erofs_vm_map_ram(rq->in, ctx->inpages); if (!src) return ERR_PTR(-ENOMEM); @@ -205,12 +205,12 @@ int z_erofs_fixup_insize(struct z_erofs_decompress_req *rq, const char *padbuf, } static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx, - u8 *out) + u8 *dst) { struct z_erofs_decompress_req *rq = ctx->rq; bool support_0padding = false, may_inplace = false; unsigned int inputmargin; - u8 *headpage, *src; + u8 *out, *headpage, *src; int ret, maptype; DBG_BUGON(*rq->in == NULL); @@ -231,11 +231,12 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx, } inputmargin = rq->pageofs_in; - src = z_erofs_lz4_handle_overlap(ctx, headpage, &inputmargin, + src = z_erofs_lz4_handle_overlap(ctx, headpage, dst, &inputmargin, &maptype, may_inplace); if (IS_ERR(src)) return PTR_ERR(src); + out = dst + rq->pageofs_out; /* legacy format could compress extra data in a pcluster. */ if (rq->partial_decoding || !support_0padding) ret = LZ4_decompress_safe_partial(src + inputmargin, out, @@ -266,7 +267,7 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx, vm_unmap_ram(src, ctx->inpages); } else if (maptype == 2) { erofs_put_pcpubuf(src); - } else { + } else if (maptype != 3) { DBG_BUGON(1); return -EFAULT; } @@ -309,7 +310,7 @@ static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, } dstmap_out: - ret = z_erofs_lz4_decompress_mem(&ctx, dst + rq->pageofs_out); + ret = z_erofs_lz4_decompress_mem(&ctx, dst); if (!dst_maptype) kunmap_local(dst); else if (dst_maptype == 2) diff --git a/fs/exec.c b/fs/exec.c index 6518e33ea813c..2f2b0acec4f01 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1410,6 +1410,9 @@ int begin_new_exec(struct linux_binprm * bprm) out_unlock: up_write(&me->signal->exec_update_lock); + if (!bprm->cred) + mutex_unlock(&me->signal->cred_guard_mutex); + out: return retval; } diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index d70f7a06bab47..9a4b73485ded8 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -6887,11 +6887,16 @@ __acquires(bitlock) static ext4_grpblk_t ext4_last_grp_cluster(struct super_block *sb, ext4_group_t grp) { - if (grp < ext4_get_groups_count(sb)) - return EXT4_CLUSTERS_PER_GROUP(sb) - 1; - return (ext4_blocks_count(EXT4_SB(sb)->s_es) - - ext4_group_first_block_no(sb, grp) - 1) >> - EXT4_CLUSTER_BITS(sb); + unsigned long nr_clusters_in_group; + + if (grp < (ext4_get_groups_count(sb) - 1)) + nr_clusters_in_group = EXT4_CLUSTERS_PER_GROUP(sb); + else + nr_clusters_in_group = (ext4_blocks_count(EXT4_SB(sb)->s_es) - + ext4_group_first_block_no(sb, grp)) + >> EXT4_CLUSTER_BITS(sb); + + return nr_clusters_in_group - 1; } static bool ext4_trim_interrupted(void) diff --git a/fs/fscache/cache.c b/fs/fscache/cache.c index d645f8b302a27..9397ed39b0b4e 100644 --- a/fs/fscache/cache.c +++ b/fs/fscache/cache.c @@ -179,13 +179,14 @@ EXPORT_SYMBOL(fscache_acquire_cache); void fscache_put_cache(struct fscache_cache *cache, enum fscache_cache_trace where) { - unsigned int debug_id = cache->debug_id; + unsigned int debug_id; bool zero; int ref; if (IS_ERR_OR_NULL(cache)) return; + debug_id = cache->debug_id; zero = __refcount_dec_and_test(&cache->ref, &ref); trace_fscache_cache(debug_id, ref - 1, where); diff --git a/fs/ioctl.c b/fs/ioctl.c index f5fd99d6b0d4e..76cf22ac97d76 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -920,8 +920,7 @@ COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, if (!f.file) return -EBADF; - /* RED-PEN how should LSM module know it's handling 32bit? */ - error = security_file_ioctl(f.file, cmd, arg); + error = security_file_ioctl_compat(f.file, cmd, arg); if (error) goto out; diff --git a/fs/namei.c b/fs/namei.c index 94565bd7e73f6..e728ba085ebee 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3021,20 +3021,14 @@ static struct dentry *lock_two_directories(struct dentry *p1, struct dentry *p2) p = d_ancestor(p2, p1); if (p) { inode_lock_nested(p2->d_inode, I_MUTEX_PARENT); - inode_lock_nested(p1->d_inode, I_MUTEX_CHILD); + inode_lock_nested(p1->d_inode, I_MUTEX_PARENT2); return p; } p = d_ancestor(p1, p2); - if (p) { - inode_lock_nested(p1->d_inode, I_MUTEX_PARENT); - inode_lock_nested(p2->d_inode, I_MUTEX_CHILD); - return p; - } - - lock_two_inodes(p1->d_inode, p2->d_inode, - I_MUTEX_PARENT, I_MUTEX_PARENT2); - return NULL; + inode_lock_nested(p1->d_inode, I_MUTEX_PARENT); + inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2); + return p; } /* @@ -4733,11 +4727,12 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname * * a) we can get into loop creation. * b) race potential - two innocent renames can create a loop together. - * That's where 4.4 screws up. Current fix: serialization on + * That's where 4.4BSD screws up. Current fix: serialization on * sb->s_vfs_rename_mutex. We might be more accurate, but that's another * story. - * c) we have to lock _four_ objects - parents and victim (if it exists), - * and source. + * c) we may have to lock up to _four_ objects - parents and victim (if it exists), + * and source (if it's a non-directory or a subdirectory that moves to + * different parent). * And that - after we got ->i_mutex on parents (until then we don't know * whether the target exists). Solution: try to be smart with locking * order for inodes. We rely on the fact that tree topology may change @@ -4769,6 +4764,7 @@ int vfs_rename(struct renamedata *rd) bool new_is_dir = false; unsigned max_links = new_dir->i_sb->s_max_links; struct name_snapshot old_name; + bool lock_old_subdir, lock_new_subdir; if (source == target) return 0; @@ -4822,15 +4818,32 @@ int vfs_rename(struct renamedata *rd) take_dentry_name_snapshot(&old_name, old_dentry); dget(new_dentry); /* - * Lock all moved children. Moved directories may need to change parent - * pointer so they need the lock to prevent against concurrent - * directory changes moving parent pointer. For regular files we've - * historically always done this. The lockdep locking subclasses are - * somewhat arbitrary but RENAME_EXCHANGE in particular can swap - * regular files and directories so it's difficult to tell which - * subclasses to use. + * Lock children. + * The source subdirectory needs to be locked on cross-directory + * rename or cross-directory exchange since its parent changes. + * The target subdirectory needs to be locked on cross-directory + * exchange due to parent change and on any rename due to becoming + * a victim. + * Non-directories need locking in all cases (for NFS reasons); + * they get locked after any subdirectories (in inode address order). + * + * NOTE: WE ONLY LOCK UNRELATED DIRECTORIES IN CROSS-DIRECTORY CASE. + * NEVER, EVER DO THAT WITHOUT ->s_vfs_rename_mutex. */ - lock_two_inodes(source, target, I_MUTEX_NORMAL, I_MUTEX_NONDIR2); + lock_old_subdir = new_dir != old_dir; + lock_new_subdir = new_dir != old_dir || !(flags & RENAME_EXCHANGE); + if (is_dir) { + if (lock_old_subdir) + inode_lock_nested(source, I_MUTEX_CHILD); + if (target && (!new_is_dir || lock_new_subdir)) + inode_lock(target); + } else if (new_is_dir) { + if (lock_new_subdir) + inode_lock_nested(target, I_MUTEX_CHILD); + inode_lock(source); + } else { + lock_two_nondirectories(source, target); + } error = -EPERM; if (IS_SWAPFILE(source) || (target && IS_SWAPFILE(target))) @@ -4878,8 +4891,9 @@ int vfs_rename(struct renamedata *rd) d_exchange(old_dentry, new_dentry); } out: - inode_unlock(source); - if (target) + if (!is_dir || lock_old_subdir) + inode_unlock(source); + if (target && (!new_is_dir || lock_new_subdir)) inode_unlock(target); dput(new_dentry); if (!error) { diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 529b3ed3b3177..f4cccbf664ceb 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -7890,14 +7890,16 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) { struct file_lock *fl; int status = false; - struct nfsd_file *nf = find_any_file(fp); + struct nfsd_file *nf; struct inode *inode; struct file_lock_context *flctx; + spin_lock(&fp->fi_lock); + nf = find_any_file_locked(fp); if (!nf) { /* Any valid lock stateid should have some sort of access */ WARN_ON_ONCE(1); - return status; + goto out; } inode = file_inode(nf->nf_file); @@ -7913,7 +7915,8 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) } spin_unlock(&flctx->flc_lock); } - nfsd_file_put(nf); +out: + spin_unlock(&fp->fi_lock); return status; } @@ -7923,10 +7926,8 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) * @cstate: NFSv4 COMPOUND state * @u: RELEASE_LOCKOWNER arguments * - * The lockowner's so_count is bumped when a lock record is added - * or when copying a conflicting lock. The latter case is brief, - * but can lead to fleeting false positives when looking for - * locks-in-use. + * Check if theree are any locks still held and if not - free the lockowner + * and any lock state that is owned. * * Return values: * %nfs_ok: lockowner released or not found @@ -7962,10 +7963,13 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, spin_unlock(&clp->cl_lock); return nfs_ok; } - if (atomic_read(&lo->lo_owner.so_count) != 2) { - spin_unlock(&clp->cl_lock); - nfs4_put_stateowner(&lo->lo_owner); - return nfserr_locks_held; + + list_for_each_entry(stp, &lo->lo_owner.so_stateids, st_perstateowner) { + if (check_for_locks(stp->st_stid.sc_file, lo)) { + spin_unlock(&clp->cl_lock); + nfs4_put_stateowner(&lo->lo_owner); + return nfserr_locks_held; + } } unhash_lockowner_locked(lo); while (!list_empty(&lo->lo_owner.so_stateids)) { diff --git a/fs/pipe.c b/fs/pipe.c index 139190165a1c2..a234035cc375d 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -437,12 +437,10 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) goto out; } -#ifdef CONFIG_WATCH_QUEUE - if (pipe->watch_queue) { + if (pipe_has_watch_queue(pipe)) { ret = -EXDEV; goto out; } -#endif /* * If it wasn't empty we try to merge new data into @@ -1307,6 +1305,11 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots) pipe->tail = tail; pipe->head = head; + if (!pipe_has_watch_queue(pipe)) { + pipe->max_usage = nr_slots; + pipe->nr_accounted = nr_slots; + } + spin_unlock_irq(&pipe->rd_wait.lock); /* This might have made more room for writers */ @@ -1324,10 +1327,8 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned int arg) unsigned int nr_slots, size; long ret = 0; -#ifdef CONFIG_WATCH_QUEUE - if (pipe->watch_queue) + if (pipe_has_watch_queue(pipe)) return -EBUSY; -#endif size = round_pipe_size(arg); nr_slots = size >> PAGE_SHIFT; @@ -1360,8 +1361,6 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned int arg) if (ret < 0) goto out_revert_acct; - pipe->max_usage = nr_slots; - pipe->nr_accounted = nr_slots; return pipe->max_usage * PAGE_SIZE; out_revert_acct: @@ -1379,10 +1378,8 @@ struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice) if (file->f_op != &pipefifo_fops || !pipe) return NULL; -#ifdef CONFIG_WATCH_QUEUE - if (for_splice && pipe->watch_queue) + if (for_splice && pipe_has_watch_queue(pipe)) return NULL; -#endif return pipe; } diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index a2584ad8808a9..3230ed7eaddec 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -138,6 +138,11 @@ cifs_dump_channel(struct seq_file *m, int i, struct cifs_chan *chan) { struct TCP_Server_Info *server = chan->server; + if (!server) { + seq_printf(m, "\n\n\t\tChannel: %d DISABLED", i+1); + return; + } + seq_printf(m, "\n\n\t\tChannel: %d ConnectionId: 0x%llx" "\n\t\tNumber of credits: %d,%d,%d Dialect 0x%x" "\n\t\tTCP status: %d Instance: %d" diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index ec1e5e20a36be..5e32c79f03a74 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -659,6 +659,7 @@ struct TCP_Server_Info { bool noautotune; /* do not autotune send buf sizes */ bool nosharesock; bool tcp_nodelay; + bool terminate; unsigned int credits; /* send no more requests at once */ unsigned int max_credits; /* can override large 32000 default at mnt */ unsigned int in_flight; /* number of requests on the wire to server */ @@ -1060,6 +1061,7 @@ struct cifs_ses { spinlock_t chan_lock; /* ========= begin: protected by chan_lock ======== */ #define CIFS_MAX_CHANNELS 16 +#define CIFS_INVAL_CHAN_INDEX (-1) #define CIFS_ALL_CHANNELS_SET(ses) \ ((1UL << (ses)->chan_count) - 1) #define CIFS_ALL_CHANS_GOOD(ses) \ diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index c858feaf4f926..c00f844205590 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -132,6 +132,7 @@ extern int SendReceiveBlockingLock(const unsigned int xid, struct smb_hdr *in_buf, struct smb_hdr *out_buf, int *bytes_returned); + void cifs_signal_cifsd_for_reconnect(struct TCP_Server_Info *server, bool all_channels); @@ -622,7 +623,7 @@ bool is_server_using_iface(struct TCP_Server_Info *server, bool is_ses_using_iface(struct cifs_ses *ses, struct cifs_server_iface *iface); void cifs_ses_mark_for_reconnect(struct cifs_ses *ses); -unsigned int +int cifs_ses_get_chan_index(struct cifs_ses *ses, struct TCP_Server_Info *server); void @@ -646,6 +647,8 @@ cifs_chan_needs_reconnect(struct cifs_ses *ses, bool cifs_chan_is_iface_active(struct cifs_ses *ses, struct TCP_Server_Info *server); +void +cifs_disable_secondary_channels(struct cifs_ses *ses); int cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server); int diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index b82f60d6f47ee..0ed6eb915c6ab 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -128,6 +128,9 @@ static void smb2_query_server_interfaces(struct work_struct *work) */ rc = SMB3_request_interfaces(0, tcon, false); if (rc) { + if (rc == -EOPNOTSUPP) + return; + cifs_dbg(FYI, "%s: failed to query server interfaces: %d\n", __func__, rc); } @@ -169,8 +172,12 @@ cifs_signal_cifsd_for_reconnect(struct TCP_Server_Info *server, list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { spin_lock(&ses->chan_lock); for (i = 0; i < ses->chan_count; i++) { + if (!ses->chans[i].server) + continue; + spin_lock(&ses->chans[i].server->srv_lock); - ses->chans[i].server->tcpStatus = CifsNeedReconnect; + if (ses->chans[i].server->tcpStatus != CifsExiting) + ses->chans[i].server->tcpStatus = CifsNeedReconnect; spin_unlock(&ses->chans[i].server->srv_lock); } spin_unlock(&ses->chan_lock); @@ -205,6 +212,18 @@ cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server, /* If server is a channel, select the primary channel */ pserver = SERVER_IS_CHAN(server) ? server->primary_server : server; + /* + * if the server has been marked for termination, there is a + * chance that the remaining channels all need reconnect. To be + * on the safer side, mark the session and trees for reconnect + * for this scenario. This might cause a few redundant session + * setup and tree connect requests, but it is better than not doing + * a tree connect when needed, and all following requests failing + */ + if (server->terminate) { + mark_smb_session = true; + server = pserver; + } spin_lock(&cifs_tcp_ses_lock); list_for_each_entry_safe(ses, nses, &pserver->smb_ses_list, smb_ses_list) { @@ -245,6 +264,8 @@ cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server, spin_lock(&tcon->tc_lock); tcon->status = TID_NEED_RECON; spin_unlock(&tcon->tc_lock); + + cancel_delayed_work(&tcon->query_interfaces); } if (ses->tcon_ipc) { ses->tcon_ipc->need_reconnect = true; @@ -1595,10 +1616,6 @@ cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect) list_del_init(&server->tcp_ses_list); spin_unlock(&cifs_tcp_ses_lock); - /* For secondary channels, we pick up ref-count on the primary server */ - if (SERVER_IS_CHAN(server)) - cifs_put_tcp_session(server->primary_server, from_reconnect); - cancel_delayed_work_sync(&server->echo); if (from_reconnect) @@ -1612,6 +1629,10 @@ cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect) else cancel_delayed_work_sync(&server->reconnect); + /* For secondary channels, we pick up ref-count on the primary server */ + if (SERVER_IS_CHAN(server)) + cifs_put_tcp_session(server->primary_server, from_reconnect); + spin_lock(&server->srv_lock); server->tcpStatus = CifsExiting; spin_unlock(&server->srv_lock); diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c index 80050e36f0451..62596299a3964 100644 --- a/fs/smb/client/sess.c +++ b/fs/smb/client/sess.c @@ -69,7 +69,7 @@ bool is_ses_using_iface(struct cifs_ses *ses, struct cifs_server_iface *iface) /* channel helper functions. assumed that chan_lock is held by caller. */ -unsigned int +int cifs_ses_get_chan_index(struct cifs_ses *ses, struct TCP_Server_Info *server) { @@ -85,14 +85,17 @@ cifs_ses_get_chan_index(struct cifs_ses *ses, cifs_dbg(VFS, "unable to get chan index for server: 0x%llx", server->conn_id); WARN_ON(1); - return 0; + return CIFS_INVAL_CHAN_INDEX; } void cifs_chan_set_in_reconnect(struct cifs_ses *ses, struct TCP_Server_Info *server) { - unsigned int chan_index = cifs_ses_get_chan_index(ses, server); + int chan_index = cifs_ses_get_chan_index(ses, server); + + if (chan_index == CIFS_INVAL_CHAN_INDEX) + return; ses->chans[chan_index].in_reconnect = true; } @@ -102,6 +105,8 @@ cifs_chan_clear_in_reconnect(struct cifs_ses *ses, struct TCP_Server_Info *server) { unsigned int chan_index = cifs_ses_get_chan_index(ses, server); + if (chan_index == CIFS_INVAL_CHAN_INDEX) + return; ses->chans[chan_index].in_reconnect = false; } @@ -111,6 +116,8 @@ cifs_chan_in_reconnect(struct cifs_ses *ses, struct TCP_Server_Info *server) { unsigned int chan_index = cifs_ses_get_chan_index(ses, server); + if (chan_index == CIFS_INVAL_CHAN_INDEX) + return true; /* err on the safer side */ return CIFS_CHAN_IN_RECONNECT(ses, chan_index); } @@ -120,6 +127,8 @@ cifs_chan_set_need_reconnect(struct cifs_ses *ses, struct TCP_Server_Info *server) { unsigned int chan_index = cifs_ses_get_chan_index(ses, server); + if (chan_index == CIFS_INVAL_CHAN_INDEX) + return; set_bit(chan_index, &ses->chans_need_reconnect); cifs_dbg(FYI, "Set reconnect bitmask for chan %u; now 0x%lx\n", @@ -131,6 +140,8 @@ cifs_chan_clear_need_reconnect(struct cifs_ses *ses, struct TCP_Server_Info *server) { unsigned int chan_index = cifs_ses_get_chan_index(ses, server); + if (chan_index == CIFS_INVAL_CHAN_INDEX) + return; clear_bit(chan_index, &ses->chans_need_reconnect); cifs_dbg(FYI, "Cleared reconnect bitmask for chan %u; now 0x%lx\n", @@ -142,6 +153,8 @@ cifs_chan_needs_reconnect(struct cifs_ses *ses, struct TCP_Server_Info *server) { unsigned int chan_index = cifs_ses_get_chan_index(ses, server); + if (chan_index == CIFS_INVAL_CHAN_INDEX) + return true; /* err on the safer side */ return CIFS_CHAN_NEEDS_RECONNECT(ses, chan_index); } @@ -151,6 +164,8 @@ cifs_chan_is_iface_active(struct cifs_ses *ses, struct TCP_Server_Info *server) { unsigned int chan_index = cifs_ses_get_chan_index(ses, server); + if (chan_index == CIFS_INVAL_CHAN_INDEX) + return true; /* err on the safer side */ return ses->chans[chan_index].iface && ses->chans[chan_index].iface->is_active; @@ -275,6 +290,64 @@ int cifs_try_adding_channels(struct cifs_ses *ses) return new_chan_count - old_chan_count; } +/* + * called when multichannel is disabled by the server. + * this always gets called from smb2_reconnect + * and cannot get called in parallel threads. + */ +void +cifs_disable_secondary_channels(struct cifs_ses *ses) +{ + int i, chan_count; + struct TCP_Server_Info *server; + struct cifs_server_iface *iface; + + spin_lock(&ses->chan_lock); + chan_count = ses->chan_count; + if (chan_count == 1) + goto done; + + ses->chan_count = 1; + + /* for all secondary channels reset the need reconnect bit */ + ses->chans_need_reconnect &= 1; + + for (i = 1; i < chan_count; i++) { + iface = ses->chans[i].iface; + server = ses->chans[i].server; + + /* + * remove these references first, since we need to unlock + * the chan_lock here, since iface_lock is a higher lock + */ + ses->chans[i].iface = NULL; + ses->chans[i].server = NULL; + spin_unlock(&ses->chan_lock); + + if (iface) { + spin_lock(&ses->iface_lock); + kref_put(&iface->refcount, release_iface); + iface->num_channels--; + if (iface->weight_fulfilled) + iface->weight_fulfilled--; + spin_unlock(&ses->iface_lock); + } + + if (server) { + if (!server->terminate) { + server->terminate = true; + cifs_signal_cifsd_for_reconnect(server, false); + } + cifs_put_tcp_session(server, false); + } + + spin_lock(&ses->chan_lock); + } + +done: + spin_unlock(&ses->chan_lock); +} + /* * update the iface for the channel if necessary. * will return 0 when iface is updated, 1 if removed, 2 otherwise @@ -293,7 +366,7 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server) spin_lock(&ses->chan_lock); chan_index = cifs_ses_get_chan_index(ses, server); - if (!chan_index) { + if (chan_index == CIFS_INVAL_CHAN_INDEX) { spin_unlock(&ses->chan_lock); return 0; } @@ -403,6 +476,11 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server) spin_lock(&ses->chan_lock); chan_index = cifs_ses_get_chan_index(ses, server); + if (chan_index == CIFS_INVAL_CHAN_INDEX) { + spin_unlock(&ses->chan_lock); + return 0; + } + ses->chans[chan_index].iface = iface; /* No iface is found. if secondary chan, drop connection */ @@ -569,14 +647,10 @@ cifs_ses_add_channel(struct cifs_ses *ses, out: if (rc && chan->server) { - /* - * we should avoid race with these delayed works before we - * remove this channel - */ - cancel_delayed_work_sync(&chan->server->echo); - cancel_delayed_work_sync(&chan->server->reconnect); + cifs_put_tcp_session(chan->server, 0); spin_lock(&ses->chan_lock); + /* we rely on all bits beyond chan_count to be clear */ cifs_chan_clear_need_reconnect(ses, chan->server); ses->chan_count--; @@ -586,8 +660,6 @@ out: */ WARN_ON(ses->chan_count < 1); spin_unlock(&ses->chan_lock); - - cifs_put_tcp_session(chan->server, 0); } kfree(ctx->UNC); diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 0604696f59c12..e33ed0fbc318e 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -614,7 +614,8 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf, "multichannel not available\n" "Empty network interface list returned by server %s\n", ses->server->hostname); - rc = -EINVAL; + rc = -EOPNOTSUPP; + ses->iface_last_update = jiffies; goto out; } @@ -712,7 +713,6 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf, ses->iface_count++; spin_unlock(&ses->iface_lock); - ses->iface_last_update = jiffies; next_iface: nb_iface++; next = le32_to_cpu(p->Next); @@ -734,11 +734,7 @@ next_iface: if ((bytes_left > 8) || p->Next) cifs_dbg(VFS, "%s: incomplete interface info\n", __func__); - - if (!ses->iface_count) { - rc = -EINVAL; - goto out; - } + ses->iface_last_update = jiffies; out: /* diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 5276992e36478..f5006aa97f5b3 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -156,13 +156,64 @@ out: return; } +/* helper function for code reuse */ +static int +cifs_chan_skip_or_disable(struct cifs_ses *ses, + struct TCP_Server_Info *server, + bool from_reconnect) +{ + struct TCP_Server_Info *pserver; + unsigned int chan_index; + + if (SERVER_IS_CHAN(server)) { + cifs_dbg(VFS, + "server %s does not support multichannel anymore. Skip secondary channel\n", + ses->server->hostname); + + spin_lock(&ses->chan_lock); + chan_index = cifs_ses_get_chan_index(ses, server); + if (chan_index == CIFS_INVAL_CHAN_INDEX) { + spin_unlock(&ses->chan_lock); + goto skip_terminate; + } + + ses->chans[chan_index].server = NULL; + spin_unlock(&ses->chan_lock); + + /* + * the above reference of server by channel + * needs to be dropped without holding chan_lock + * as cifs_put_tcp_session takes a higher lock + * i.e. cifs_tcp_ses_lock + */ + cifs_put_tcp_session(server, from_reconnect); + + server->terminate = true; + cifs_signal_cifsd_for_reconnect(server, false); + + /* mark primary server as needing reconnect */ + pserver = server->primary_server; + cifs_signal_cifsd_for_reconnect(pserver, false); +skip_terminate: + return -EHOSTDOWN; + } + + cifs_server_dbg(VFS, + "server does not support multichannel anymore. Disable all other channels\n"); + cifs_disable_secondary_channels(ses); + + + return 0; +} + static int smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, - struct TCP_Server_Info *server) + struct TCP_Server_Info *server, bool from_reconnect) { int rc = 0; struct nls_table *nls_codepage = NULL; struct cifs_ses *ses; + int xid; /* * SMB2s NegProt, SessSetup, Logoff do not have tcon yet so @@ -223,6 +274,12 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, return -EAGAIN; } } + + /* if server is marked for termination, cifsd will cleanup */ + if (server->terminate) { + spin_unlock(&server->srv_lock); + return -EHOSTDOWN; + } spin_unlock(&server->srv_lock); again: @@ -241,12 +298,24 @@ again: tcon->need_reconnect); mutex_lock(&ses->session_mutex); + /* + * if this is called by delayed work, and the channel has been disabled + * in parallel, the delayed work can continue to execute in parallel + * there's a chance that this channel may not exist anymore + */ + spin_lock(&server->srv_lock); + if (server->tcpStatus == CifsExiting) { + spin_unlock(&server->srv_lock); + mutex_unlock(&ses->session_mutex); + rc = -EHOSTDOWN; + goto out; + } + /* * Recheck after acquire mutex. If another thread is negotiating * and the server never sends an answer the socket will be closed * and tcpStatus set to reconnect. */ - spin_lock(&server->srv_lock); if (server->tcpStatus == CifsNeedReconnect) { spin_unlock(&server->srv_lock); mutex_unlock(&ses->session_mutex); @@ -283,6 +352,20 @@ again: rc = cifs_negotiate_protocol(0, ses, server); if (!rc) { + /* + * if server stopped supporting multichannel + * and the first channel reconnected, disable all the others. + */ + if (ses->chan_count > 1 && + !(server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL)) { + rc = cifs_chan_skip_or_disable(ses, server, + from_reconnect); + if (rc) { + mutex_unlock(&ses->session_mutex); + goto out; + } + } + rc = cifs_setup_session(0, ses, server, nls_codepage); if ((rc == -EACCES) && !tcon->retry) { mutex_unlock(&ses->session_mutex); @@ -307,15 +390,54 @@ skip_sess_setup: tcon->need_reopen_files = true; rc = cifs_tree_connect(0, tcon, nls_codepage); - mutex_unlock(&ses->session_mutex); cifs_dbg(FYI, "reconnect tcon rc = %d\n", rc); if (rc) { /* If sess reconnected but tcon didn't, something strange ... */ + mutex_unlock(&ses->session_mutex); cifs_dbg(VFS, "reconnect tcon failed rc = %d\n", rc); goto out; } + if (!rc && + (server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL)) { + mutex_unlock(&ses->session_mutex); + + /* + * query server network interfaces, in case they change + */ + xid = get_xid(); + rc = SMB3_request_interfaces(xid, tcon, false); + free_xid(xid); + + if (rc == -EOPNOTSUPP) { + /* + * some servers like Azure SMB server do not advertise + * that multichannel has been disabled with server + * capabilities, rather return STATUS_NOT_IMPLEMENTED. + * treat this as server not supporting multichannel + */ + + rc = cifs_chan_skip_or_disable(ses, server, + from_reconnect); + goto skip_add_channels; + } else if (rc) + cifs_dbg(FYI, "%s: failed to query server interfaces: %d\n", + __func__, rc); + + if (ses->chan_max > ses->chan_count && + ses->iface_count && + !SERVER_IS_CHAN(server)) { + if (ses->chan_count == 1) + cifs_server_dbg(VFS, "supports multichannel now\n"); + + cifs_try_adding_channels(ses); + } + } else { + mutex_unlock(&ses->session_mutex); + } +skip_add_channels: + if (smb2_command != SMB2_INTERNAL_CMD) mod_delayed_work(cifsiod_wq, &server->reconnect, 0); @@ -409,7 +531,7 @@ static int smb2_plain_req_init(__le16 smb2_command, struct cifs_tcon *tcon, { int rc; - rc = smb2_reconnect(smb2_command, tcon, server); + rc = smb2_reconnect(smb2_command, tcon, server, false); if (rc) return rc; @@ -2185,7 +2307,7 @@ int smb2_parse_contexts(struct TCP_Server_Info *server, noff = le16_to_cpu(cc->NameOffset); nlen = le16_to_cpu(cc->NameLength); - if (noff + nlen >= doff) + if (noff + nlen > doff) return -EINVAL; name = (char *)cc + noff; @@ -3829,12 +3951,28 @@ void smb2_reconnect_server(struct work_struct *work) int rc; bool resched = false; + /* first check if ref count has reached 0, if not inc ref count */ + spin_lock(&cifs_tcp_ses_lock); + if (!server->srv_count) { + spin_unlock(&cifs_tcp_ses_lock); + return; + } + server->srv_count++; + spin_unlock(&cifs_tcp_ses_lock); + /* If server is a channel, select the primary channel */ pserver = SERVER_IS_CHAN(server) ? server->primary_server : server; /* Prevent simultaneous reconnects that can corrupt tcon->rlist list */ mutex_lock(&pserver->reconnect_mutex); + /* if the server is marked for termination, drop the ref count here */ + if (server->terminate) { + cifs_put_tcp_session(server, true); + mutex_unlock(&pserver->reconnect_mutex); + return; + } + INIT_LIST_HEAD(&tmp_list); INIT_LIST_HEAD(&tmp_ses_list); cifs_dbg(FYI, "Reconnecting tcons and channels\n"); @@ -3879,17 +4017,10 @@ void smb2_reconnect_server(struct work_struct *work) } spin_unlock(&ses->chan_lock); } - /* - * Get the reference to server struct to be sure that the last call of - * cifs_put_tcon() in the loop below won't release the server pointer. - */ - if (tcon_exist || ses_exist) - server->srv_count++; - spin_unlock(&cifs_tcp_ses_lock); list_for_each_entry_safe(tcon, tcon2, &tmp_list, rlist) { - rc = smb2_reconnect(SMB2_INTERNAL_CMD, tcon, server); + rc = smb2_reconnect(SMB2_INTERNAL_CMD, tcon, server, true); if (!rc) cifs_reopen_persistent_handles(tcon); else @@ -3922,7 +4053,7 @@ void smb2_reconnect_server(struct work_struct *work) /* now reconnect sessions for necessary channels */ list_for_each_entry_safe(ses, ses2, &tmp_ses_list, rlist) { tcon->ses = ses; - rc = smb2_reconnect(SMB2_INTERNAL_CMD, tcon, server); + rc = smb2_reconnect(SMB2_INTERNAL_CMD, tcon, server, true); if (rc) resched = true; list_del_init(&ses->rlist); @@ -3937,8 +4068,7 @@ done: mutex_unlock(&pserver->reconnect_mutex); /* now we can safely release srv struct */ - if (tcon_exist || ses_exist) - cifs_put_tcp_session(server, 1); + cifs_put_tcp_session(server, true); } int diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c index a136fc4cc2b5f..5a3ca62d2f07f 100644 --- a/fs/smb/client/smb2transport.c +++ b/fs/smb/client/smb2transport.c @@ -413,7 +413,13 @@ generate_smb3signingkey(struct cifs_ses *ses, ses->ses_status == SES_GOOD); chan_index = cifs_ses_get_chan_index(ses, server); - /* TODO: introduce ref counting for channels when the can be freed */ + if (chan_index == CIFS_INVAL_CHAN_INDEX) { + spin_unlock(&ses->chan_lock); + spin_unlock(&ses->ses_lock); + + return -EINVAL; + } + spin_unlock(&ses->chan_lock); spin_unlock(&ses->ses_lock); diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index d553b7a54621b..4f717ad7c21b4 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -1023,7 +1023,7 @@ struct TCP_Server_Info *cifs_pick_channel(struct cifs_ses *ses) spin_lock(&ses->chan_lock); for (i = 0; i < ses->chan_count; i++) { server = ses->chans[i].server; - if (!server) + if (!server || server->terminate) continue; /* diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c index 7977827c65410..09e1e7771592f 100644 --- a/fs/smb/server/connection.c +++ b/fs/smb/server/connection.c @@ -284,6 +284,7 @@ int ksmbd_conn_handler_loop(void *p) goto out; conn->last_active = jiffies; + set_freezable(); while (ksmbd_conn_alive(conn)) { if (try_to_freeze()) continue; diff --git a/fs/smb/server/ksmbd_netlink.h b/fs/smb/server/ksmbd_netlink.h index b7521e41402e0..0ebf91ffa2361 100644 --- a/fs/smb/server/ksmbd_netlink.h +++ b/fs/smb/server/ksmbd_netlink.h @@ -304,7 +304,8 @@ enum ksmbd_event { KSMBD_EVENT_SPNEGO_AUTHEN_REQUEST, KSMBD_EVENT_SPNEGO_AUTHEN_RESPONSE = 15, - KSMBD_EVENT_MAX + __KSMBD_EVENT_MAX, + KSMBD_EVENT_MAX = __KSMBD_EVENT_MAX - 1 }; /* diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index e0eb7cb2a5258..53dfaac425c68 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -105,7 +105,7 @@ static int alloc_lease(struct oplock_info *opinfo, struct lease_ctx_info *lctx) lease->is_dir = lctx->is_dir; memcpy(lease->parent_lease_key, lctx->parent_lease_key, SMB2_LEASE_KEY_SIZE); lease->version = lctx->version; - lease->epoch = le16_to_cpu(lctx->epoch); + lease->epoch = le16_to_cpu(lctx->epoch) + 1; INIT_LIST_HEAD(&opinfo->lease_entry); opinfo->o_lease = lease; @@ -546,6 +546,7 @@ static struct oplock_info *same_client_has_lease(struct ksmbd_inode *ci, atomic_read(&ci->sop_count)) == 1) { if (lease->state != SMB2_LEASE_NONE_LE && lease->state == (lctx->req_state & lease->state)) { + lease->epoch++; lease->state |= lctx->req_state; if (lctx->req_state & SMB2_LEASE_WRITE_CACHING_LE) @@ -556,13 +557,17 @@ static struct oplock_info *same_client_has_lease(struct ksmbd_inode *ci, atomic_read(&ci->sop_count)) > 1) { if (lctx->req_state == (SMB2_LEASE_READ_CACHING_LE | - SMB2_LEASE_HANDLE_CACHING_LE)) + SMB2_LEASE_HANDLE_CACHING_LE)) { + lease->epoch++; lease->state = lctx->req_state; + } } if (lctx->req_state && lease->state == - SMB2_LEASE_NONE_LE) + SMB2_LEASE_NONE_LE) { + lease->epoch++; lease_none_upgrade(opinfo, lctx->req_state); + } } read_lock(&ci->m_lock); } @@ -1035,7 +1040,8 @@ static void copy_lease(struct oplock_info *op1, struct oplock_info *op2) SMB2_LEASE_KEY_SIZE); lease2->duration = lease1->duration; lease2->flags = lease1->flags; - lease2->epoch = lease1->epoch++; + lease2->epoch = lease1->epoch; + lease2->version = lease1->version; } static int add_lease_global_list(struct oplock_info *opinfo) @@ -1453,7 +1459,7 @@ void create_lease_buf(u8 *rbuf, struct lease *lease) memcpy(buf->lcontext.LeaseKey, lease->lease_key, SMB2_LEASE_KEY_SIZE); buf->lcontext.LeaseFlags = lease->flags; - buf->lcontext.Epoch = cpu_to_le16(++lease->epoch); + buf->lcontext.Epoch = cpu_to_le16(lease->epoch); buf->lcontext.LeaseState = lease->state; memcpy(buf->lcontext.ParentLeaseKey, lease->parent_lease_key, SMB2_LEASE_KEY_SIZE); diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 297ed5c5ce8df..6ddfe3fef55f3 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -2323,11 +2323,12 @@ out: * @eabuf: set info command buffer * @buf_len: set info command buffer length * @path: dentry path for get ea + * @get_write: get write access to a mount * * Return: 0 on success, otherwise error */ static int smb2_set_ea(struct smb2_ea_info *eabuf, unsigned int buf_len, - const struct path *path) + const struct path *path, bool get_write) { struct mnt_idmap *idmap = mnt_idmap(path->mnt); char *attr_name = NULL, *value; @@ -3015,7 +3016,7 @@ int smb2_open(struct ksmbd_work *work) rc = smb2_set_ea(&ea_buf->ea, le32_to_cpu(ea_buf->ccontext.DataLength), - &path); + &path, false); if (rc == -EOPNOTSUPP) rc = 0; else if (rc) @@ -5580,6 +5581,7 @@ static int smb2_rename(struct ksmbd_work *work, if (!file_info->ReplaceIfExists) flags = RENAME_NOREPLACE; + smb_break_all_levII_oplock(work, fp, 0); rc = ksmbd_vfs_rename(work, &fp->filp->f_path, new_name, flags); out: kfree(new_name); @@ -5992,7 +5994,7 @@ static int smb2_set_info_file(struct ksmbd_work *work, struct ksmbd_file *fp, return -EINVAL; return smb2_set_ea((struct smb2_ea_info *)req->Buffer, - buf_len, &fp->filp->f_path); + buf_len, &fp->filp->f_path, true); } case FILE_POSITION_INFORMATION: { diff --git a/fs/smb/server/transport_ipc.c b/fs/smb/server/transport_ipc.c index b49d47bdafc94..f29bb03f0dc47 100644 --- a/fs/smb/server/transport_ipc.c +++ b/fs/smb/server/transport_ipc.c @@ -74,7 +74,7 @@ static int handle_unsupported_event(struct sk_buff *skb, struct genl_info *info) static int handle_generic_event(struct sk_buff *skb, struct genl_info *info); static int ksmbd_ipc_heartbeat_request(void); -static const struct nla_policy ksmbd_nl_policy[KSMBD_EVENT_MAX] = { +static const struct nla_policy ksmbd_nl_policy[KSMBD_EVENT_MAX + 1] = { [KSMBD_EVENT_UNSPEC] = { .len = 0, }, @@ -403,7 +403,7 @@ static int handle_generic_event(struct sk_buff *skb, struct genl_info *info) return -EPERM; #endif - if (type >= KSMBD_EVENT_MAX) { + if (type > KSMBD_EVENT_MAX) { WARN_ON(1); return -EINVAL; } diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 2f48c58d47cdd..be45972ff95d9 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -1225,6 +1225,8 @@ out_cancel: dir_ui->ui_size = dir->i_size; mutex_unlock(&dir_ui->ui_mutex); out_inode: + /* Free inode->i_link before inode is marked as bad. */ + fscrypt_free_inode(inode); make_bad_inode(inode); iput(inode); out_fname: diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 819a3568b28f4..13007b6bc9f33 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1503,6 +1503,18 @@ xfs_fs_fill_super( mp->m_super = sb; + /* + * Copy VFS mount flags from the context now that all parameter parsing + * is guaranteed to have been completed by either the old mount API or + * the newer fsopen/fsconfig API. + */ + if (fc->sb_flags & SB_RDONLY) + set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); + if (fc->sb_flags & SB_DIRSYNC) + mp->m_features |= XFS_FEAT_DIRSYNC; + if (fc->sb_flags & SB_SYNCHRONOUS) + mp->m_features |= XFS_FEAT_WSYNC; + error = xfs_fs_validate_params(mp); if (error) return error; @@ -1972,6 +1984,11 @@ static const struct fs_context_operations xfs_context_ops = { .free = xfs_fs_free, }; +/* + * WARNING: do not initialise any parameters in this function that depend on + * mount option parsing having already been performed as this can be called from + * fsopen() before any parameters have been set. + */ static int xfs_init_fs_context( struct fs_context *fc) { @@ -2003,16 +2020,6 @@ static int xfs_init_fs_context( mp->m_logbsize = -1; mp->m_allocsize_log = 16; /* 64k */ - /* - * Copy binary VFS mount flags we are interested in. - */ - if (fc->sb_flags & SB_RDONLY) - set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); - if (fc->sb_flags & SB_DIRSYNC) - mp->m_features |= XFS_FEAT_DIRSYNC; - if (fc->sb_flags & SB_SYNCHRONOUS) - mp->m_features |= XFS_FEAT_WSYNC; - fc->s_fs_info = mp; fc->ops = &xfs_context_ops; diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h index 9813fa759b75d..0a72b13781f13 100644 --- a/include/drm/drm_drv.h +++ b/include/drm/drm_drv.h @@ -110,6 +110,15 @@ enum drm_driver_feature { * Driver supports user defined GPU VA bindings for GEM objects. */ DRIVER_GEM_GPUVA = BIT(8), + /** + * @DRIVER_CURSOR_HOTSPOT: + * + * Driver supports and requires cursor hotspot information in the + * cursor plane (e.g. cursor plane has to actually track the mouse + * cursor and the clients are required to set hotspot in order for + * the cursor planes to work correctly). + */ + DRIVER_CURSOR_HOTSPOT = BIT(9), /* IMPORTANT: Below are all the legacy flags, add new ones above. */ diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h index 9c47a43f42a62..c8c2a63b9e7e0 100644 --- a/include/drm/drm_file.h +++ b/include/drm/drm_file.h @@ -228,6 +228,18 @@ struct drm_file { */ bool is_master; + /** + * @supports_virtualized_cursor_plane: + * + * This client is capable of handling the cursor plane with the + * restrictions imposed on it by the virtualized drivers. + * + * This implies that the cursor plane has to behave like a cursor + * i.e. track cursor movement. It also requires setting of the + * hotspot properties by the client on the cursor plane. + */ + bool supports_virtualized_cursor_plane; + /** * @master: * diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h index 79d62856defbf..fef775200a81f 100644 --- a/include/drm/drm_plane.h +++ b/include/drm/drm_plane.h @@ -190,6 +190,16 @@ struct drm_plane_state { */ struct drm_property_blob *fb_damage_clips; + /** + * @ignore_damage_clips: + * + * Set by drivers to indicate the drm_atomic_helper_damage_iter_init() + * helper that the @fb_damage_clips blob property should be ignored. + * + * See :ref:`damage_tracking_properties` for more information. + */ + bool ignore_damage_clips; + /** * @src: * diff --git a/include/linux/async.h b/include/linux/async.h index cce4ad31e8fcf..33c9ff4afb492 100644 --- a/include/linux/async.h +++ b/include/linux/async.h @@ -90,6 +90,8 @@ async_schedule_dev(async_func_t func, struct device *dev) return async_schedule_node(func, dev, dev_to_node(dev)); } +bool async_schedule_dev_nocall(async_func_t func, struct device *dev); + /** * async_schedule_dev_domain - A device specific version of async_schedule_domain * @func: function to execute asynchronously diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 8506690dbb9ca..31561e7897157 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -120,6 +120,7 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk, int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, struct sockaddr *uaddr, + int *uaddrlen, enum cgroup_bpf_attach_type atype, void *t_ctx, u32 *flags); @@ -230,22 +231,22 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, #define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) \ BPF_CGROUP_RUN_SK_PROG(sk, CGROUP_INET6_POST_BIND) -#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, atype) \ +#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, atype) \ ({ \ int __ret = 0; \ if (cgroup_bpf_enabled(atype)) \ - __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \ - NULL, NULL); \ + __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, uaddrlen, \ + atype, NULL, NULL); \ __ret; \ }) -#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, atype, t_ctx) \ +#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, atype, t_ctx) \ ({ \ int __ret = 0; \ if (cgroup_bpf_enabled(atype)) { \ lock_sock(sk); \ - __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \ - t_ctx, NULL); \ + __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, uaddrlen, \ + atype, t_ctx, NULL); \ release_sock(sk); \ } \ __ret; \ @@ -256,14 +257,14 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, * (at bit position 0) is to indicate CAP_NET_BIND_SERVICE capability check * should be bypassed (BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE). */ -#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, atype, bind_flags) \ +#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, uaddrlen, atype, bind_flags) \ ({ \ u32 __flags = 0; \ int __ret = 0; \ if (cgroup_bpf_enabled(atype)) { \ lock_sock(sk); \ - __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \ - NULL, &__flags); \ + __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, uaddrlen, \ + atype, NULL, &__flags); \ release_sock(sk); \ if (__flags & BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE) \ *bind_flags |= BIND_NO_CAP_NET_BIND_SERVICE; \ @@ -276,29 +277,29 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, cgroup_bpf_enabled(CGROUP_INET6_CONNECT)) && \ (sk)->sk_prot->pre_connect) -#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG(sk, uaddr, CGROUP_INET4_CONNECT) +#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, CGROUP_INET4_CONNECT) -#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG(sk, uaddr, CGROUP_INET6_CONNECT) +#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, CGROUP_INET6_CONNECT) -#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_INET4_CONNECT, NULL) +#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_INET4_CONNECT, NULL) -#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_INET6_CONNECT, NULL) +#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_INET6_CONNECT, NULL) -#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP4_SENDMSG, t_ctx) +#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP4_SENDMSG, t_ctx) -#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP6_SENDMSG, t_ctx) +#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP6_SENDMSG, t_ctx) -#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP4_RECVMSG, NULL) +#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP4_RECVMSG, NULL) -#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP6_RECVMSG, NULL) +#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP6_RECVMSG, NULL) /* The SOCK_OPS"_SK" macro should be used when sock_ops->sk is not a * fullsock and its parent fullsock cannot be traced by @@ -477,24 +478,24 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, } #define cgroup_bpf_enabled(atype) (0) -#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, atype, t_ctx) ({ 0; }) -#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, atype) ({ 0; }) +#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, atype, t_ctx) ({ 0; }) +#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, atype) ({ 0; }) #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0) #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, atype, flags) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, uaddrlen, atype, flags) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; }) #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(atype, major, minor, access) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos) ({ 0; }) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index b6e58dab8e275..2d84d820a7ba2 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -300,6 +300,17 @@ struct bpf_func_state { bool in_callback_fn; struct tnum callback_ret_range; bool in_async_callback_fn; + /* For callback calling functions that limit number of possible + * callback executions (e.g. bpf_loop) keeps track of current + * simulated iteration number. + * Value in frame N refers to number of times callback with frame + * N+1 was simulated, e.g. for the following call: + * + * bpf_loop(..., fn, ...); | suppose current frame is N + * | fn would be simulated in frame N+1 + * | number of simulations is tracked in frame N + */ + u32 callback_depth; /* The following fields should be last. See copy_func_state() */ int acquired_refs; @@ -372,10 +383,25 @@ struct bpf_verifier_state { struct bpf_active_lock active_lock; bool speculative; bool active_rcu_lock; + /* If this state was ever pointed-to by other state's loop_entry field + * this flag would be set to true. Used to avoid freeing such states + * while they are still in use. + */ + bool used_as_loop_entry; /* first and last insn idx of this verifier state */ u32 first_insn_idx; u32 last_insn_idx; + /* If this state is a part of states loop this field points to some + * parent of this state such that: + * - it is also a member of the same states loop; + * - DFS states traversal starting from initial state visits loop_entry + * state before this state. + * Used to compute topmost loop entry for state loops. + * State loops might appear because of open coded iterators logic. + * See get_loop_entry() for more information. + */ + struct bpf_verifier_state *loop_entry; /* jmp history recorded from first to last. * backtracking is using it to go from last to first. * For most states jmp_history_cnt is [0-3]. @@ -383,6 +409,8 @@ struct bpf_verifier_state { */ struct bpf_idx_pair *jmp_history; u32 jmp_history_cnt; + u32 dfs_depth; + u32 callback_unroll_depth; }; #define bpf_get_spilled_reg(slot, frame) \ @@ -490,6 +518,10 @@ struct bpf_insn_aux_data { * this instruction, regardless of any heuristics */ bool force_checkpoint; + /* true if instruction is a call to a helper function that + * accepts callback function as a parameter. + */ + bool calls_callback; }; #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ diff --git a/include/linux/filter.h b/include/linux/filter.h index 761af6b3cf2bc..77db4263d68d7 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1285,6 +1285,7 @@ struct bpf_sock_addr_kern { */ u64 tmp_reg; void *t_ctx; /* Attach type specific context. */ + u32 uaddrlen; }; struct bpf_sock_ops_kern { diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 2b8d85aae0832..74c60f9446f84 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -171,6 +171,8 @@ LSM_HOOK(int, 0, file_alloc_security, struct file *file) LSM_HOOK(void, LSM_RET_VOID, file_free_security, struct file *file) LSM_HOOK(int, 0, file_ioctl, struct file *file, unsigned int cmd, unsigned long arg) +LSM_HOOK(int, 0, file_ioctl_compat, struct file *file, unsigned int cmd, + unsigned long arg) LSM_HOOK(int, 0, mmap_addr, unsigned long addr) LSM_HOOK(int, 0, mmap_file, struct file *file, unsigned long reqprot, unsigned long prot, unsigned long flags) diff --git a/include/linux/mc146818rtc.h b/include/linux/mc146818rtc.h index b0da04fe087bb..34dfcc77f505a 100644 --- a/include/linux/mc146818rtc.h +++ b/include/linux/mc146818rtc.h @@ -126,10 +126,11 @@ struct cmos_rtc_board_info { #endif /* ARCH_RTC_LOCATION */ bool mc146818_does_rtc_work(void); -int mc146818_get_time(struct rtc_time *time); +int mc146818_get_time(struct rtc_time *time, int timeout); int mc146818_set_time(struct rtc_time *time); bool mc146818_avoid_UIP(void (*callback)(unsigned char seconds, void *param), + int timeout, void *param); #endif /* _MC146818RTC_H */ diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 1e00c2436377f..3fb428ce7d1c7 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -67,6 +67,7 @@ enum { MLX5_FLOW_TABLE_TERMINATION = BIT(2), MLX5_FLOW_TABLE_UNMANAGED = BIT(3), MLX5_FLOW_TABLE_OTHER_VPORT = BIT(4), + MLX5_FLOW_TABLE_UPLINK_VPORT = BIT(5), }; #define LEFTOVERS_RULE_NUM 2 @@ -131,6 +132,7 @@ struct mlx5_flow_handle; enum { FLOW_CONTEXT_HAS_TAG = BIT(0), + FLOW_CONTEXT_UPLINK_HAIRPIN_EN = BIT(1), }; struct mlx5_flow_context { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 8ac6ae79e0835..51eb83f779388 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -3536,7 +3536,7 @@ struct mlx5_ifc_flow_context_bits { u8 action[0x10]; u8 extended_destination[0x1]; - u8 reserved_at_81[0x1]; + u8 uplink_hairpin_en[0x1]; u8 flow_source[0x2]; u8 encrypt_decrypt_type[0x4]; u8 destination_list_size[0x18]; diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index b6b688b229ea9..f43a1cdcc6e75 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1774,6 +1774,7 @@ static inline unsigned long section_nr_to_pfn(unsigned long sec) #define SUBSECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SUBSECTION_MASK) struct mem_section_usage { + struct rcu_head rcu; #ifdef CONFIG_SPARSEMEM_VMEMMAP DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION); #endif @@ -1967,7 +1968,7 @@ static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) { int idx = subsection_map_index(pfn); - return test_bit(idx, ms->usage->subsection_map); + return test_bit(idx, READ_ONCE(ms->usage)->subsection_map); } #else static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) @@ -1991,6 +1992,7 @@ static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) static inline int pfn_valid(unsigned long pfn) { struct mem_section *ms; + int ret; /* * Ensure the upper PAGE_SHIFT bits are clear in the @@ -2004,13 +2006,19 @@ static inline int pfn_valid(unsigned long pfn) if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) return 0; ms = __pfn_to_section(pfn); - if (!valid_section(ms)) + rcu_read_lock(); + if (!valid_section(ms)) { + rcu_read_unlock(); return 0; + } /* * Traditionally early sections always returned pfn_valid() for * the entire section-sized span. */ - return early_section(ms) || pfn_section_valid(ms, pfn); + ret = early_section(ms) || pfn_section_valid(ms, pfn); + rcu_read_unlock(); + + return ret; } #endif diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index c29ace15a053a..9d0fc5109af66 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1265,6 +1265,7 @@ struct nand_secure_region { * @cont_read: Sequential page read internals * @cont_read.ongoing: Whether a continuous read is ongoing or not * @cont_read.first_page: Start of the continuous read operation + * @cont_read.pause_page: End of the current sequential cache read operation * @cont_read.last_page: End of the continuous read operation * @controller: The hardware controller structure which is shared among multiple * independent devices @@ -1321,6 +1322,7 @@ struct nand_chip { struct { bool ongoing; unsigned int first_page; + unsigned int pause_page; unsigned int last_page; } cont_read; diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 608a9eb86bff8..288a8081a9db6 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -124,6 +124,22 @@ struct pipe_buf_operations { bool (*get)(struct pipe_inode_info *, struct pipe_buffer *); }; +/** + * pipe_has_watch_queue - Check whether the pipe is a watch_queue, + * i.e. it was created with O_NOTIFICATION_PIPE + * @pipe: The pipe to check + * + * Return: true if pipe is a watch queue, false otherwise. + */ +static inline bool pipe_has_watch_queue(const struct pipe_inode_info *pipe) +{ +#ifdef CONFIG_WATCH_QUEUE + return pipe->watch_queue != NULL; +#else + return false; +#endif +} + /** * pipe_empty - Return true if the pipe is empty * @head: The pipe ring head pointer diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 51cc21ebb568b..b1fb58b435a98 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -261,8 +261,8 @@ static inline int page_try_dup_anon_rmap(struct page *page, bool compound, * guarantee the pinned page won't be randomly replaced in the * future on write faults. */ - if (likely(!is_device_private_page(page) && - unlikely(page_needs_cow_for_dma(vma, page)))) + if (likely(!is_device_private_page(page)) && + unlikely(page_needs_cow_for_dma(vma, page))) return -EBUSY; ClearPageAnonExclusive(page); diff --git a/include/linux/security.h b/include/linux/security.h index 5f16eecde00bc..b50c7f5661890 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -389,6 +389,8 @@ int security_file_permission(struct file *file, int mask); int security_file_alloc(struct file *file); void security_file_free(struct file *file); int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg); +int security_file_ioctl_compat(struct file *file, unsigned int cmd, + unsigned long arg); int security_mmap_file(struct file *file, unsigned long prot, unsigned long flags); int security_mmap_addr(unsigned long addr); @@ -987,6 +989,13 @@ static inline int security_file_ioctl(struct file *file, unsigned int cmd, return 0; } +static inline int security_file_ioctl_compat(struct file *file, + unsigned int cmd, + unsigned long arg) +{ + return 0; +} + static inline int security_mmap_file(struct file *file, unsigned long prot, unsigned long flags) { diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index bb6f073bc1591..bbbafc0feb5b0 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -588,6 +588,85 @@ struct uart_port { void *private_data; /* generic platform data pointer */ }; +/** + * uart_port_lock - Lock the UART port + * @up: Pointer to UART port structure + */ +static inline void uart_port_lock(struct uart_port *up) +{ + spin_lock(&up->lock); +} + +/** + * uart_port_lock_irq - Lock the UART port and disable interrupts + * @up: Pointer to UART port structure + */ +static inline void uart_port_lock_irq(struct uart_port *up) +{ + spin_lock_irq(&up->lock); +} + +/** + * uart_port_lock_irqsave - Lock the UART port, save and disable interrupts + * @up: Pointer to UART port structure + * @flags: Pointer to interrupt flags storage + */ +static inline void uart_port_lock_irqsave(struct uart_port *up, unsigned long *flags) +{ + spin_lock_irqsave(&up->lock, *flags); +} + +/** + * uart_port_trylock - Try to lock the UART port + * @up: Pointer to UART port structure + * + * Returns: True if lock was acquired, false otherwise + */ +static inline bool uart_port_trylock(struct uart_port *up) +{ + return spin_trylock(&up->lock); +} + +/** + * uart_port_trylock_irqsave - Try to lock the UART port, save and disable interrupts + * @up: Pointer to UART port structure + * @flags: Pointer to interrupt flags storage + * + * Returns: True if lock was acquired, false otherwise + */ +static inline bool uart_port_trylock_irqsave(struct uart_port *up, unsigned long *flags) +{ + return spin_trylock_irqsave(&up->lock, *flags); +} + +/** + * uart_port_unlock - Unlock the UART port + * @up: Pointer to UART port structure + */ +static inline void uart_port_unlock(struct uart_port *up) +{ + spin_unlock(&up->lock); +} + +/** + * uart_port_unlock_irq - Unlock the UART port and re-enable interrupts + * @up: Pointer to UART port structure + */ +static inline void uart_port_unlock_irq(struct uart_port *up) +{ + spin_unlock_irq(&up->lock); +} + +/** + * uart_port_unlock_irqrestore - Unlock the UART port, restore interrupts + * @up: Pointer to UART port structure + * @flags: The saved interrupt flags for restore + */ +static inline void uart_port_unlock_irqrestore(struct uart_port *up, unsigned long flags) +{ + spin_unlock_irqrestore(&up->lock, flags); +} + static inline int serial_port_in(struct uart_port *up, int offset) { return up->serial_in(up, offset); diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index c953b8c0d2f43..bd4418377bacf 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -500,12 +500,6 @@ static inline bool sk_psock_strp_enabled(struct sk_psock *psock) return !!psock->saved_data_ready; } -static inline bool sk_is_udp(const struct sock *sk) -{ - return sk->sk_type == SOCK_DGRAM && - sk->sk_protocol == IPPROTO_UDP; -} - #if IS_ENABLED(CONFIG_NET_SOCK_MSG) #define BPF_F_STRPARSER (1UL << 1) diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 4f3d14bb15385..c383579a008ba 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -886,7 +886,8 @@ struct sdw_master_ops { * struct sdw_bus - SoundWire bus * @dev: Shortcut to &bus->md->dev to avoid changing the entire code. * @md: Master device - * @link_id: Link id number, can be 0 to N, unique for each Master + * @controller_id: system-unique controller ID. If set to -1, the bus @id will be used. + * @link_id: Link id number, can be 0 to N, unique for each Controller * @id: bus system-wide unique id * @slaves: list of Slaves on this bus * @assigned: Bitmap for Slave device numbers. @@ -918,6 +919,7 @@ struct sdw_master_ops { struct sdw_bus { struct device *dev; struct sdw_master_device *md; + int controller_id; unsigned int link_id; int id; struct list_head slaves; diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index e3f7ee169c086..5acb77968902d 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -139,6 +139,7 @@ struct stmmac_rxq_cfg { struct stmmac_txq_cfg { u32 weight; + bool coe_unsupported; u8 mode_to_use; /* Credit Base Shaper parameters */ u32 send_slope; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 22bc6bc147f89..bbbd6fac3aff7 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -125,6 +125,7 @@ struct cachestat; #define __TYPE_IS_LL(t) (__TYPE_AS(t, 0LL) || __TYPE_AS(t, 0ULL)) #define __SC_LONG(t, a) __typeof(__builtin_choose_expr(__TYPE_IS_LL(t), 0LL, 0L)) a #define __SC_CAST(t, a) (__force t) a +#define __SC_TYPE(t, a) t #define __SC_ARGS(t, a) a #define __SC_TEST(t, a) (void)BUILD_BUG_ON_ZERO(!__TYPE_IS_LL(t) && sizeof(t) > sizeof(long)) diff --git a/include/media/v4l2-cci.h b/include/media/v4l2-cci.h index 0f6803e4b17e9..8b0b361b464cc 100644 --- a/include/media/v4l2-cci.h +++ b/include/media/v4l2-cci.h @@ -7,6 +7,8 @@ #ifndef _V4L2_CCI_H #define _V4L2_CCI_H +#include +#include #include struct i2c_client; @@ -33,11 +35,20 @@ struct cci_reg_sequence { #define CCI_REG_WIDTH_SHIFT 16 #define CCI_REG_WIDTH_MASK GENMASK(19, 16) +#define CCI_REG_WIDTH_BYTES(x) FIELD_GET(CCI_REG_WIDTH_MASK, x) +#define CCI_REG_WIDTH(x) (CCI_REG_WIDTH_BYTES(x) << 3) +#define CCI_REG_ADDR(x) FIELD_GET(CCI_REG_ADDR_MASK, x) +#define CCI_REG_LE BIT(20) + #define CCI_REG8(x) ((1 << CCI_REG_WIDTH_SHIFT) | (x)) #define CCI_REG16(x) ((2 << CCI_REG_WIDTH_SHIFT) | (x)) #define CCI_REG24(x) ((3 << CCI_REG_WIDTH_SHIFT) | (x)) #define CCI_REG32(x) ((4 << CCI_REG_WIDTH_SHIFT) | (x)) #define CCI_REG64(x) ((8 << CCI_REG_WIDTH_SHIFT) | (x)) +#define CCI_REG16_LE(x) (CCI_REG_LE | (2U << CCI_REG_WIDTH_SHIFT) | (x)) +#define CCI_REG24_LE(x) (CCI_REG_LE | (3U << CCI_REG_WIDTH_SHIFT) | (x)) +#define CCI_REG32_LE(x) (CCI_REG_LE | (4U << CCI_REG_WIDTH_SHIFT) | (x)) +#define CCI_REG64_LE(x) (CCI_REG_LE | (8U << CCI_REG_WIDTH_SHIFT) | (x)) /** * cci_read() - Read a value from a single CCI register diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 5d2fcc137b881..01a73bf74fa19 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -347,4 +347,12 @@ static inline bool inet_csk_has_ulp(const struct sock *sk) return inet_test_bit(IS_ICSK, sk) && !!inet_csk(sk)->icsk_ulp_ops; } +static inline void inet_init_csk_locks(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + spin_lock_init(&icsk->icsk_accept_queue.rskq_lock); + spin_lock_init(&icsk->icsk_accept_queue.fastopenq.lock); +} + #endif /* _INET_CONNECTION_SOCK_H */ diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 2de0e4d4a0278..2790ba58ffe5c 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -301,11 +301,6 @@ static inline unsigned long inet_cmsg_flags(const struct inet_sock *inet) #define inet_assign_bit(nr, sk, val) \ assign_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags, val) -static inline bool sk_is_inet(struct sock *sk) -{ - return sk->sk_family == AF_INET || sk->sk_family == AF_INET6; -} - /** * sk_to_full_sk - Access to a full socket * @sk: pointer to a socket diff --git a/include/net/llc_pdu.h b/include/net/llc_pdu.h index 7e73f8e5e4970..1d55ba7c45be1 100644 --- a/include/net/llc_pdu.h +++ b/include/net/llc_pdu.h @@ -262,8 +262,7 @@ static inline void llc_pdu_header_init(struct sk_buff *skb, u8 type, */ static inline void llc_pdu_decode_sa(struct sk_buff *skb, u8 *sa) { - if (skb->protocol == htons(ETH_P_802_2)) - memcpy(sa, eth_hdr(skb)->h_source, ETH_ALEN); + memcpy(sa, eth_hdr(skb)->h_source, ETH_ALEN); } /** @@ -275,8 +274,7 @@ static inline void llc_pdu_decode_sa(struct sk_buff *skb, u8 *sa) */ static inline void llc_pdu_decode_da(struct sk_buff *skb, u8 *da) { - if (skb->protocol == htons(ETH_P_802_2)) - memcpy(da, eth_hdr(skb)->h_dest, ETH_ALEN); + memcpy(da, eth_hdr(skb)->h_dest, ETH_ALEN); } /** diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index f232512505f89..e940debac4003 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -376,6 +376,10 @@ struct tcf_proto_ops { struct nlattr **tca, struct netlink_ext_ack *extack); void (*tmplt_destroy)(void *tmplt_priv); + void (*tmplt_reoffload)(struct tcf_chain *chain, + bool add, + flow_setup_cb_t *cb, + void *cb_priv); struct tcf_exts * (*get_exts)(const struct tcf_proto *tp, u32 handle); diff --git a/include/net/sock.h b/include/net/sock.h index 70a771d964676..e70c903b04f30 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2793,9 +2793,25 @@ static inline void skb_setup_tx_timestamp(struct sk_buff *skb, __u16 tsflags) &skb_shinfo(skb)->tskey); } +static inline bool sk_is_inet(const struct sock *sk) +{ + int family = READ_ONCE(sk->sk_family); + + return family == AF_INET || family == AF_INET6; +} + static inline bool sk_is_tcp(const struct sock *sk) { - return sk->sk_type == SOCK_STREAM && sk->sk_protocol == IPPROTO_TCP; + return sk_is_inet(sk) && + sk->sk_type == SOCK_STREAM && + sk->sk_protocol == IPPROTO_TCP; +} + +static inline bool sk_is_udp(const struct sock *sk) +{ + return sk_is_inet(sk) && + sk->sk_type == SOCK_DGRAM && + sk->sk_protocol == IPPROTO_UDP; } static inline bool sk_is_stream_unix(const struct sock *sk) diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h index 1f6fc8c7a84c6..5425f7ad5ebde 100644 --- a/include/net/xdp_sock_drv.h +++ b/include/net/xdp_sock_drv.h @@ -147,11 +147,29 @@ static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first) return ret; } +static inline void xsk_buff_del_tail(struct xdp_buff *tail) +{ + struct xdp_buff_xsk *xskb = container_of(tail, struct xdp_buff_xsk, xdp); + + list_del(&xskb->xskb_list_node); +} + +static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first) +{ + struct xdp_buff_xsk *xskb = container_of(first, struct xdp_buff_xsk, xdp); + struct xdp_buff_xsk *frag; + + frag = list_last_entry(&xskb->pool->xskb_list, struct xdp_buff_xsk, + xskb_list_node); + return &frag->xdp; +} + static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size) { xdp->data = xdp->data_hard_start + XDP_PACKET_HEADROOM; xdp->data_meta = xdp->data; xdp->data_end = xdp->data + size; + xdp->flags = 0; } static inline dma_addr_t xsk_buff_raw_get_dma(struct xsk_buff_pool *pool, @@ -309,6 +327,15 @@ static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first) return NULL; } +static inline void xsk_buff_del_tail(struct xdp_buff *tail) +{ +} + +static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first) +{ + return NULL; +} + static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size) { } diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index dbb8b96da50d3..6f776faaa791c 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -612,6 +612,9 @@ struct btrfs_ioctl_clone_range_args { */ #define BTRFS_DEFRAG_RANGE_COMPRESS 1 #define BTRFS_DEFRAG_RANGE_START_IO 2 +#define BTRFS_DEFRAG_RANGE_FLAGS_SUPP (BTRFS_DEFRAG_RANGE_COMPRESS | \ + BTRFS_DEFRAG_RANGE_START_IO) + struct btrfs_ioctl_defrag_range_args { /* start of the defrag operation */ __u64 start; diff --git a/kernel/async.c b/kernel/async.c index b2c4ba5686ee4..673bba6bdf3a0 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -145,6 +145,39 @@ static void async_run_entry_fn(struct work_struct *work) wake_up(&async_done); } +static async_cookie_t __async_schedule_node_domain(async_func_t func, + void *data, int node, + struct async_domain *domain, + struct async_entry *entry) +{ + async_cookie_t newcookie; + unsigned long flags; + + INIT_LIST_HEAD(&entry->domain_list); + INIT_LIST_HEAD(&entry->global_list); + INIT_WORK(&entry->work, async_run_entry_fn); + entry->func = func; + entry->data = data; + entry->domain = domain; + + spin_lock_irqsave(&async_lock, flags); + + /* allocate cookie and queue */ + newcookie = entry->cookie = next_cookie++; + + list_add_tail(&entry->domain_list, &domain->pending); + if (domain->registered) + list_add_tail(&entry->global_list, &async_global_pending); + + atomic_inc(&entry_count); + spin_unlock_irqrestore(&async_lock, flags); + + /* schedule for execution */ + queue_work_node(node, system_unbound_wq, &entry->work); + + return newcookie; +} + /** * async_schedule_node_domain - NUMA specific version of async_schedule_domain * @func: function to execute asynchronously @@ -186,29 +219,8 @@ async_cookie_t async_schedule_node_domain(async_func_t func, void *data, func(data, newcookie); return newcookie; } - INIT_LIST_HEAD(&entry->domain_list); - INIT_LIST_HEAD(&entry->global_list); - INIT_WORK(&entry->work, async_run_entry_fn); - entry->func = func; - entry->data = data; - entry->domain = domain; - - spin_lock_irqsave(&async_lock, flags); - - /* allocate cookie and queue */ - newcookie = entry->cookie = next_cookie++; - - list_add_tail(&entry->domain_list, &domain->pending); - if (domain->registered) - list_add_tail(&entry->global_list, &async_global_pending); - - atomic_inc(&entry_count); - spin_unlock_irqrestore(&async_lock, flags); - - /* schedule for execution */ - queue_work_node(node, system_unbound_wq, &entry->work); - return newcookie; + return __async_schedule_node_domain(func, data, node, domain, entry); } EXPORT_SYMBOL_GPL(async_schedule_node_domain); @@ -231,6 +243,35 @@ async_cookie_t async_schedule_node(async_func_t func, void *data, int node) } EXPORT_SYMBOL_GPL(async_schedule_node); +/** + * async_schedule_dev_nocall - A simplified variant of async_schedule_dev() + * @func: function to execute asynchronously + * @dev: device argument to be passed to function + * + * @dev is used as both the argument for the function and to provide NUMA + * context for where to run the function. + * + * If the asynchronous execution of @func is scheduled successfully, return + * true. Otherwise, do nothing and return false, unlike async_schedule_dev() + * that will run the function synchronously then. + */ +bool async_schedule_dev_nocall(async_func_t func, struct device *dev) +{ + struct async_entry *entry; + + entry = kzalloc(sizeof(struct async_entry), GFP_KERNEL); + + /* Give up if there is no memory or too much work. */ + if (!entry || atomic_read(&entry_count) > MAX_WORK) { + kfree(entry); + return false; + } + + __async_schedule_node_domain(func, dev, dev_to_node(dev), + &async_dfl_domain, entry); + return true; +} + /** * async_synchronize_full - synchronize all asynchronous function calls * diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 8090d7fb11ef6..a31704a6bb614 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -7832,6 +7832,7 @@ static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type) case BPF_PROG_TYPE_SYSCALL: return BTF_KFUNC_HOOK_SYSCALL; case BPF_PROG_TYPE_CGROUP_SKB: + case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: return BTF_KFUNC_HOOK_CGROUP_SKB; case BPF_PROG_TYPE_SCHED_ACT: return BTF_KFUNC_HOOK_SCHED_ACT; diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 03b3d4492980d..ac37bd53aee03 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1450,6 +1450,9 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk); * provided by user sockaddr * @sk: sock struct that will use sockaddr * @uaddr: sockaddr struct provided by user + * @uaddrlen: Pointer to the size of the sockaddr struct provided by user. It is + * read-only for AF_INET[6] uaddr but can be modified for AF_UNIX + * uaddr. * @atype: The type of program to be executed * @t_ctx: Pointer to attach type specific context * @flags: Pointer to u32 which contains higher bits of BPF program @@ -1462,6 +1465,7 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk); */ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, struct sockaddr *uaddr, + int *uaddrlen, enum cgroup_bpf_attach_type atype, void *t_ctx, u32 *flags) @@ -1473,6 +1477,7 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, }; struct sockaddr_storage unspec; struct cgroup *cgrp; + int ret; /* Check socket family since not all sockets represent network * endpoint (e.g. AF_UNIX). @@ -1483,11 +1488,19 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, if (!ctx.uaddr) { memset(&unspec, 0, sizeof(unspec)); ctx.uaddr = (struct sockaddr *)&unspec; + ctx.uaddrlen = 0; + } else { + ctx.uaddrlen = *uaddrlen; } cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); - return bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run, - 0, flags); + ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run, + 0, flags); + + if (!ret && uaddr) + *uaddrlen = ctx.uaddrlen; + + return ret; } EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 97fd1766818b0..a7901ed358a0f 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -542,12 +542,11 @@ static bool is_dynptr_ref_function(enum bpf_func_id func_id) return func_id == BPF_FUNC_dynptr_data; } -static bool is_callback_calling_kfunc(u32 btf_id); +static bool is_sync_callback_calling_kfunc(u32 btf_id); -static bool is_callback_calling_function(enum bpf_func_id func_id) +static bool is_sync_callback_calling_function(enum bpf_func_id func_id) { return func_id == BPF_FUNC_for_each_map_elem || - func_id == BPF_FUNC_timer_set_callback || func_id == BPF_FUNC_find_vma || func_id == BPF_FUNC_loop || func_id == BPF_FUNC_user_ringbuf_drain; @@ -558,6 +557,18 @@ static bool is_async_callback_calling_function(enum bpf_func_id func_id) return func_id == BPF_FUNC_timer_set_callback; } +static bool is_callback_calling_function(enum bpf_func_id func_id) +{ + return is_sync_callback_calling_function(func_id) || + is_async_callback_calling_function(func_id); +} + +static bool is_sync_callback_calling_insn(struct bpf_insn *insn) +{ + return (bpf_helper_call(insn) && is_sync_callback_calling_function(insn->imm)) || + (bpf_pseudo_kfunc_call(insn) && is_sync_callback_calling_kfunc(insn->imm)); +} + static bool is_storage_get_function(enum bpf_func_id func_id) { return func_id == BPF_FUNC_sk_storage_get || @@ -1771,6 +1782,9 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state, dst_state->parent = src->parent; dst_state->first_insn_idx = src->first_insn_idx; dst_state->last_insn_idx = src->last_insn_idx; + dst_state->dfs_depth = src->dfs_depth; + dst_state->callback_unroll_depth = src->callback_unroll_depth; + dst_state->used_as_loop_entry = src->used_as_loop_entry; for (i = 0; i <= src->curframe; i++) { dst = dst_state->frame[i]; if (!dst) { @@ -1786,11 +1800,203 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state, return 0; } +static u32 state_htab_size(struct bpf_verifier_env *env) +{ + return env->prog->len; +} + +static struct bpf_verifier_state_list **explored_state(struct bpf_verifier_env *env, int idx) +{ + struct bpf_verifier_state *cur = env->cur_state; + struct bpf_func_state *state = cur->frame[cur->curframe]; + + return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)]; +} + +static bool same_callsites(struct bpf_verifier_state *a, struct bpf_verifier_state *b) +{ + int fr; + + if (a->curframe != b->curframe) + return false; + + for (fr = a->curframe; fr >= 0; fr--) + if (a->frame[fr]->callsite != b->frame[fr]->callsite) + return false; + + return true; +} + +/* Open coded iterators allow back-edges in the state graph in order to + * check unbounded loops that iterators. + * + * In is_state_visited() it is necessary to know if explored states are + * part of some loops in order to decide whether non-exact states + * comparison could be used: + * - non-exact states comparison establishes sub-state relation and uses + * read and precision marks to do so, these marks are propagated from + * children states and thus are not guaranteed to be final in a loop; + * - exact states comparison just checks if current and explored states + * are identical (and thus form a back-edge). + * + * Paper "A New Algorithm for Identifying Loops in Decompilation" + * by Tao Wei, Jian Mao, Wei Zou and Yu Chen [1] presents a convenient + * algorithm for loop structure detection and gives an overview of + * relevant terminology. It also has helpful illustrations. + * + * [1] https://api.semanticscholar.org/CorpusID:15784067 + * + * We use a similar algorithm but because loop nested structure is + * irrelevant for verifier ours is significantly simpler and resembles + * strongly connected components algorithm from Sedgewick's textbook. + * + * Define topmost loop entry as a first node of the loop traversed in a + * depth first search starting from initial state. The goal of the loop + * tracking algorithm is to associate topmost loop entries with states + * derived from these entries. + * + * For each step in the DFS states traversal algorithm needs to identify + * the following situations: + * + * initial initial initial + * | | | + * V V V + * ... ... .---------> hdr + * | | | | + * V V | V + * cur .-> succ | .------... + * | | | | | | + * V | V | V V + * succ '-- cur | ... ... + * | | | + * | V V + * | succ <- cur + * | | + * | V + * | ... + * | | + * '----' + * + * (A) successor state of cur (B) successor state of cur or it's entry + * not yet traversed are in current DFS path, thus cur and succ + * are members of the same outermost loop + * + * initial initial + * | | + * V V + * ... ... + * | | + * V V + * .------... .------... + * | | | | + * V V V V + * .-> hdr ... ... ... + * | | | | | + * | V V V V + * | succ <- cur succ <- cur + * | | | + * | V V + * | ... ... + * | | | + * '----' exit + * + * (C) successor state of cur is a part of some loop but this loop + * does not include cur or successor state is not in a loop at all. + * + * Algorithm could be described as the following python code: + * + * traversed = set() # Set of traversed nodes + * entries = {} # Mapping from node to loop entry + * depths = {} # Depth level assigned to graph node + * path = set() # Current DFS path + * + * # Find outermost loop entry known for n + * def get_loop_entry(n): + * h = entries.get(n, None) + * while h in entries and entries[h] != h: + * h = entries[h] + * return h + * + * # Update n's loop entry if h's outermost entry comes + * # before n's outermost entry in current DFS path. + * def update_loop_entry(n, h): + * n1 = get_loop_entry(n) or n + * h1 = get_loop_entry(h) or h + * if h1 in path and depths[h1] <= depths[n1]: + * entries[n] = h1 + * + * def dfs(n, depth): + * traversed.add(n) + * path.add(n) + * depths[n] = depth + * for succ in G.successors(n): + * if succ not in traversed: + * # Case A: explore succ and update cur's loop entry + * # only if succ's entry is in current DFS path. + * dfs(succ, depth + 1) + * h = get_loop_entry(succ) + * update_loop_entry(n, h) + * else: + * # Case B or C depending on `h1 in path` check in update_loop_entry(). + * update_loop_entry(n, succ) + * path.remove(n) + * + * To adapt this algorithm for use with verifier: + * - use st->branch == 0 as a signal that DFS of succ had been finished + * and cur's loop entry has to be updated (case A), handle this in + * update_branch_counts(); + * - use st->branch > 0 as a signal that st is in the current DFS path; + * - handle cases B and C in is_state_visited(); + * - update topmost loop entry for intermediate states in get_loop_entry(). + */ +static struct bpf_verifier_state *get_loop_entry(struct bpf_verifier_state *st) +{ + struct bpf_verifier_state *topmost = st->loop_entry, *old; + + while (topmost && topmost->loop_entry && topmost != topmost->loop_entry) + topmost = topmost->loop_entry; + /* Update loop entries for intermediate states to avoid this + * traversal in future get_loop_entry() calls. + */ + while (st && st->loop_entry != topmost) { + old = st->loop_entry; + st->loop_entry = topmost; + st = old; + } + return topmost; +} + +static void update_loop_entry(struct bpf_verifier_state *cur, struct bpf_verifier_state *hdr) +{ + struct bpf_verifier_state *cur1, *hdr1; + + cur1 = get_loop_entry(cur) ?: cur; + hdr1 = get_loop_entry(hdr) ?: hdr; + /* The head1->branches check decides between cases B and C in + * comment for get_loop_entry(). If hdr1->branches == 0 then + * head's topmost loop entry is not in current DFS path, + * hence 'cur' and 'hdr' are not in the same loop and there is + * no need to update cur->loop_entry. + */ + if (hdr1->branches && hdr1->dfs_depth <= cur1->dfs_depth) { + cur->loop_entry = hdr; + hdr->used_as_loop_entry = true; + } +} + static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st) { while (st) { u32 br = --st->branches; + /* br == 0 signals that DFS exploration for 'st' is finished, + * thus it is necessary to update parent's loop entry if it + * turned out that st is a part of some loop. + * This is a part of 'case A' in get_loop_entry() comment. + */ + if (br == 0 && st->parent && st->loop_entry) + update_loop_entry(st->parent, st->loop_entry); + /* WARN_ON(br > 1) technically makes sense here, * but see comment in push_stack(), hence: */ @@ -3127,13 +3333,11 @@ static void mark_insn_zext(struct bpf_verifier_env *env, reg->subreg_def = DEF_NOT_SUBREG; } -static int check_reg_arg(struct bpf_verifier_env *env, u32 regno, - enum reg_arg_type t) +static int __check_reg_arg(struct bpf_verifier_env *env, struct bpf_reg_state *regs, u32 regno, + enum reg_arg_type t) { - struct bpf_verifier_state *vstate = env->cur_state; - struct bpf_func_state *state = vstate->frame[vstate->curframe]; struct bpf_insn *insn = env->prog->insnsi + env->insn_idx; - struct bpf_reg_state *reg, *regs = state->regs; + struct bpf_reg_state *reg; bool rw64; if (regno >= MAX_BPF_REG) { @@ -3174,6 +3378,15 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno, return 0; } +static int check_reg_arg(struct bpf_verifier_env *env, u32 regno, + enum reg_arg_type t) +{ + struct bpf_verifier_state *vstate = env->cur_state; + struct bpf_func_state *state = vstate->frame[vstate->curframe]; + + return __check_reg_arg(env, state->regs, regno, t); +} + static void mark_jmp_point(struct bpf_verifier_env *env, int idx) { env->insn_aux_data[idx].jmp_point = true; @@ -3412,6 +3625,8 @@ static void fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask) } } +static bool calls_callback(struct bpf_verifier_env *env, int insn_idx); + /* For given verifier state backtrack_insn() is called from the last insn to * the first insn. Its purpose is to compute a bitmask of registers and * stack slots that needs precision in the parent verifier state. @@ -3587,16 +3802,13 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx, return -EFAULT; return 0; } - } else if ((bpf_helper_call(insn) && - is_callback_calling_function(insn->imm) && - !is_async_callback_calling_function(insn->imm)) || - (bpf_pseudo_kfunc_call(insn) && is_callback_calling_kfunc(insn->imm))) { - /* callback-calling helper or kfunc call, which means - * we are exiting from subprog, but unlike the subprog - * call handling above, we shouldn't propagate - * precision of r1-r5 (if any requested), as they are - * not actually arguments passed directly to callback - * subprogs + } else if (is_sync_callback_calling_insn(insn) && idx != subseq_idx - 1) { + /* exit from callback subprog to callback-calling helper or + * kfunc call. Use idx/subseq_idx check to discern it from + * straight line code backtracking. + * Unlike the subprog call handling above, we shouldn't + * propagate precision of r1-r5 (if any requested), as they are + * not actually arguments passed directly to callback subprogs */ if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) { verbose(env, "BUG regs %x\n", bt_reg_mask(bt)); @@ -3631,10 +3843,18 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx, } else if (opcode == BPF_EXIT) { bool r0_precise; + /* Backtracking to a nested function call, 'idx' is a part of + * the inner frame 'subseq_idx' is a part of the outer frame. + * In case of a regular function call, instructions giving + * precision to registers R1-R5 should have been found already. + * In case of a callback, it is ok to have R1-R5 marked for + * backtracking, as these registers are set by the function + * invoking callback. + */ + if (subseq_idx >= 0 && calls_callback(env, subseq_idx)) + for (i = BPF_REG_1; i <= BPF_REG_5; i++) + bt_clear_reg(bt, i); if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) { - /* if backtracing was looking for registers R1-R5 - * they should have been found already. - */ verbose(env, "BUG regs %x\n", bt_reg_mask(bt)); WARN_ONCE(1, "verifier backtracking bug"); return -EFAULT; @@ -7527,6 +7747,81 @@ static int process_iter_arg(struct bpf_verifier_env *env, int regno, int insn_id return 0; } +/* Look for a previous loop entry at insn_idx: nearest parent state + * stopped at insn_idx with callsites matching those in cur->frame. + */ +static struct bpf_verifier_state *find_prev_entry(struct bpf_verifier_env *env, + struct bpf_verifier_state *cur, + int insn_idx) +{ + struct bpf_verifier_state_list *sl; + struct bpf_verifier_state *st; + + /* Explored states are pushed in stack order, most recent states come first */ + sl = *explored_state(env, insn_idx); + for (; sl; sl = sl->next) { + /* If st->branches != 0 state is a part of current DFS verification path, + * hence cur & st for a loop. + */ + st = &sl->state; + if (st->insn_idx == insn_idx && st->branches && same_callsites(st, cur) && + st->dfs_depth < cur->dfs_depth) + return st; + } + + return NULL; +} + +static void reset_idmap_scratch(struct bpf_verifier_env *env); +static bool regs_exact(const struct bpf_reg_state *rold, + const struct bpf_reg_state *rcur, + struct bpf_idmap *idmap); + +static void maybe_widen_reg(struct bpf_verifier_env *env, + struct bpf_reg_state *rold, struct bpf_reg_state *rcur, + struct bpf_idmap *idmap) +{ + if (rold->type != SCALAR_VALUE) + return; + if (rold->type != rcur->type) + return; + if (rold->precise || rcur->precise || regs_exact(rold, rcur, idmap)) + return; + __mark_reg_unknown(env, rcur); +} + +static int widen_imprecise_scalars(struct bpf_verifier_env *env, + struct bpf_verifier_state *old, + struct bpf_verifier_state *cur) +{ + struct bpf_func_state *fold, *fcur; + int i, fr; + + reset_idmap_scratch(env); + for (fr = old->curframe; fr >= 0; fr--) { + fold = old->frame[fr]; + fcur = cur->frame[fr]; + + for (i = 0; i < MAX_BPF_REG; i++) + maybe_widen_reg(env, + &fold->regs[i], + &fcur->regs[i], + &env->idmap_scratch); + + for (i = 0; i < fold->allocated_stack / BPF_REG_SIZE; i++) { + if (!is_spilled_reg(&fold->stack[i]) || + !is_spilled_reg(&fcur->stack[i])) + continue; + + maybe_widen_reg(env, + &fold->stack[i].spilled_ptr, + &fcur->stack[i].spilled_ptr, + &env->idmap_scratch); + } + } + return 0; +} + /* process_iter_next_call() is called when verifier gets to iterator's next * "method" (e.g., bpf_iter_num_next() for numbers iterator) call. We'll refer * to it as just "iter_next()" in comments below. @@ -7568,25 +7863,47 @@ static int process_iter_arg(struct bpf_verifier_env *env, int regno, int insn_id * is some statically known limit on number of iterations (e.g., if there is * an explicit `if n > 100 then break;` statement somewhere in the loop). * - * One very subtle but very important aspect is that we *always* simulate NULL - * condition first (as the current state) before we simulate non-NULL case. - * This has to do with intricacies of scalar precision tracking. By simulating - * "exit condition" of iter_next() returning NULL first, we make sure all the - * relevant precision marks *that will be set **after** we exit iterator loop* - * are propagated backwards to common parent state of NULL and non-NULL - * branches. Thanks to that, state equivalence checks done later in forked - * state, when reaching iter_next() for ACTIVE iterator, can assume that - * precision marks are finalized and won't change. Because simulating another - * ACTIVE iterator iteration won't change them (because given same input - * states we'll end up with exactly same output states which we are currently - * comparing; and verification after the loop already propagated back what - * needs to be **additionally** tracked as precise). It's subtle, grok - * precision tracking for more intuitive understanding. + * Iteration convergence logic in is_state_visited() relies on exact + * states comparison, which ignores read and precision marks. + * This is necessary because read and precision marks are not finalized + * while in the loop. Exact comparison might preclude convergence for + * simple programs like below: + * + * i = 0; + * while(iter_next(&it)) + * i++; + * + * At each iteration step i++ would produce a new distinct state and + * eventually instruction processing limit would be reached. + * + * To avoid such behavior speculatively forget (widen) range for + * imprecise scalar registers, if those registers were not precise at the + * end of the previous iteration and do not match exactly. + * + * This is a conservative heuristic that allows to verify wide range of programs, + * however it precludes verification of programs that conjure an + * imprecise value on the first loop iteration and use it as precise on a second. + * For example, the following safe program would fail to verify: + * + * struct bpf_num_iter it; + * int arr[10]; + * int i = 0, a = 0; + * bpf_iter_num_new(&it, 0, 10); + * while (bpf_iter_num_next(&it)) { + * if (a == 0) { + * a = 1; + * i = 7; // Because i changed verifier would forget + * // it's range on second loop entry. + * } else { + * arr[i] = 42; // This would fail to verify. + * } + * } + * bpf_iter_num_destroy(&it); */ static int process_iter_next_call(struct bpf_verifier_env *env, int insn_idx, struct bpf_kfunc_call_arg_meta *meta) { - struct bpf_verifier_state *cur_st = env->cur_state, *queued_st; + struct bpf_verifier_state *cur_st = env->cur_state, *queued_st, *prev_st; struct bpf_func_state *cur_fr = cur_st->frame[cur_st->curframe], *queued_fr; struct bpf_reg_state *cur_iter, *queued_iter; int iter_frameno = meta->iter.frameno; @@ -7604,6 +7921,19 @@ static int process_iter_next_call(struct bpf_verifier_env *env, int insn_idx, } if (cur_iter->iter.state == BPF_ITER_STATE_ACTIVE) { + /* Because iter_next() call is a checkpoint is_state_visitied() + * should guarantee parent state with same call sites and insn_idx. + */ + if (!cur_st->parent || cur_st->parent->insn_idx != insn_idx || + !same_callsites(cur_st->parent, cur_st)) { + verbose(env, "bug: bad parent state for iter next call"); + return -EFAULT; + } + /* Note cur_st->parent in the call below, it is necessary to skip + * checkpoint created for cur_st by is_state_visited() + * right at this instruction. + */ + prev_st = find_prev_entry(env, cur_st->parent, insn_idx); /* branch out active iter state */ queued_st = push_stack(env, insn_idx + 1, insn_idx, false); if (!queued_st) @@ -7612,6 +7942,8 @@ static int process_iter_next_call(struct bpf_verifier_env *env, int insn_idx, queued_iter = &queued_st->frame[iter_frameno]->stack[iter_spi].spilled_ptr; queued_iter->iter.state = BPF_ITER_STATE_ACTIVE; queued_iter->iter.depth++; + if (prev_st) + widen_imprecise_scalars(env, prev_st, queued_st); queued_fr = queued_st->frame[queued_st->curframe]; mark_ptr_not_null_reg(&queued_fr->regs[BPF_REG_0]); @@ -8841,7 +9173,7 @@ static void clear_caller_saved_regs(struct bpf_verifier_env *env, /* after the call registers r0 - r5 were scratched */ for (i = 0; i < CALLER_SAVED_REGS; i++) { mark_reg_not_init(env, regs, caller_saved[i]); - check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK); + __check_reg_arg(env, regs, caller_saved[i], DST_OP_NO_MARK); } } @@ -8854,11 +9186,10 @@ static int set_callee_state(struct bpf_verifier_env *env, struct bpf_func_state *caller, struct bpf_func_state *callee, int insn_idx); -static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, - int *insn_idx, int subprog, - set_callee_state_fn set_callee_state_cb) +static int setup_func_entry(struct bpf_verifier_env *env, int subprog, int callsite, + set_callee_state_fn set_callee_state_cb, + struct bpf_verifier_state *state) { - struct bpf_verifier_state *state = env->cur_state; struct bpf_func_state *caller, *callee; int err; @@ -8868,53 +9199,71 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn return -E2BIG; } - caller = state->frame[state->curframe]; if (state->frame[state->curframe + 1]) { verbose(env, "verifier bug. Frame %d already allocated\n", state->curframe + 1); return -EFAULT; } + caller = state->frame[state->curframe]; + callee = kzalloc(sizeof(*callee), GFP_KERNEL); + if (!callee) + return -ENOMEM; + state->frame[state->curframe + 1] = callee; + + /* callee cannot access r0, r6 - r9 for reading and has to write + * into its own stack before reading from it. + * callee can read/write into caller's stack + */ + init_func_state(env, callee, + /* remember the callsite, it will be used by bpf_exit */ + callsite, + state->curframe + 1 /* frameno within this callchain */, + subprog /* subprog number within this prog */); + /* Transfer references to the callee */ + err = copy_reference_state(callee, caller); + err = err ?: set_callee_state_cb(env, caller, callee, callsite); + if (err) + goto err_out; + + /* only increment it after check_reg_arg() finished */ + state->curframe++; + + return 0; + +err_out: + free_func_state(callee); + state->frame[state->curframe + 1] = NULL; + return err; +} + +static int push_callback_call(struct bpf_verifier_env *env, struct bpf_insn *insn, + int insn_idx, int subprog, + set_callee_state_fn set_callee_state_cb) +{ + struct bpf_verifier_state *state = env->cur_state, *callback_state; + struct bpf_func_state *caller, *callee; + int err; + + caller = state->frame[state->curframe]; err = btf_check_subprog_call(env, subprog, caller->regs); if (err == -EFAULT) return err; - if (subprog_is_global(env, subprog)) { - if (err) { - verbose(env, "Caller passes invalid args into func#%d\n", - subprog); - return err; - } else { - if (env->log.level & BPF_LOG_LEVEL) - verbose(env, - "Func#%d is global and valid. Skipping.\n", - subprog); - clear_caller_saved_regs(env, caller->regs); - - /* All global functions return a 64-bit SCALAR_VALUE */ - mark_reg_unknown(env, caller->regs, BPF_REG_0); - caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG; - - /* continue with next insn after call */ - return 0; - } - } /* set_callee_state is used for direct subprog calls, but we are * interested in validating only BPF helpers that can call subprogs as * callbacks */ - if (set_callee_state_cb != set_callee_state) { - if (bpf_pseudo_kfunc_call(insn) && - !is_callback_calling_kfunc(insn->imm)) { - verbose(env, "verifier bug: kfunc %s#%d not marked as callback-calling\n", - func_id_name(insn->imm), insn->imm); - return -EFAULT; - } else if (!bpf_pseudo_kfunc_call(insn) && - !is_callback_calling_function(insn->imm)) { /* helper */ - verbose(env, "verifier bug: helper %s#%d not marked as callback-calling\n", - func_id_name(insn->imm), insn->imm); - return -EFAULT; - } + if (bpf_pseudo_kfunc_call(insn) && + !is_sync_callback_calling_kfunc(insn->imm)) { + verbose(env, "verifier bug: kfunc %s#%d not marked as callback-calling\n", + func_id_name(insn->imm), insn->imm); + return -EFAULT; + } else if (!bpf_pseudo_kfunc_call(insn) && + !is_callback_calling_function(insn->imm)) { /* helper */ + verbose(env, "verifier bug: helper %s#%d not marked as callback-calling\n", + func_id_name(insn->imm), insn->imm); + return -EFAULT; } if (insn->code == (BPF_JMP | BPF_CALL) && @@ -8925,53 +9274,83 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn /* there is no real recursion here. timer callbacks are async */ env->subprog_info[subprog].is_async_cb = true; async_cb = push_async_cb(env, env->subprog_info[subprog].start, - *insn_idx, subprog); + insn_idx, subprog); if (!async_cb) return -EFAULT; callee = async_cb->frame[0]; callee->async_entry_cnt = caller->async_entry_cnt + 1; /* Convert bpf_timer_set_callback() args into timer callback args */ - err = set_callee_state_cb(env, caller, callee, *insn_idx); + err = set_callee_state_cb(env, caller, callee, insn_idx); if (err) return err; + return 0; + } + + /* for callback functions enqueue entry to callback and + * proceed with next instruction within current frame. + */ + callback_state = push_stack(env, env->subprog_info[subprog].start, insn_idx, false); + if (!callback_state) + return -ENOMEM; + + err = setup_func_entry(env, subprog, insn_idx, set_callee_state_cb, + callback_state); + if (err) + return err; + + callback_state->callback_unroll_depth++; + callback_state->frame[callback_state->curframe - 1]->callback_depth++; + caller->callback_depth = 0; + return 0; +} + +static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, + int *insn_idx) +{ + struct bpf_verifier_state *state = env->cur_state; + struct bpf_func_state *caller; + int err, subprog, target_insn; + + target_insn = *insn_idx + insn->imm + 1; + subprog = find_subprog(env, target_insn); + if (subprog < 0) { + verbose(env, "verifier bug. No program starts at insn %d\n", target_insn); + return -EFAULT; + } + + caller = state->frame[state->curframe]; + err = btf_check_subprog_call(env, subprog, caller->regs); + if (err == -EFAULT) + return err; + if (subprog_is_global(env, subprog)) { + if (err) { + verbose(env, "Caller passes invalid args into func#%d\n", subprog); + return err; + } + + if (env->log.level & BPF_LOG_LEVEL) + verbose(env, "Func#%d is global and valid. Skipping.\n", subprog); clear_caller_saved_regs(env, caller->regs); + + /* All global functions return a 64-bit SCALAR_VALUE */ mark_reg_unknown(env, caller->regs, BPF_REG_0); caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG; + /* continue with next insn after call */ return 0; } - callee = kzalloc(sizeof(*callee), GFP_KERNEL); - if (!callee) - return -ENOMEM; - state->frame[state->curframe + 1] = callee; - - /* callee cannot access r0, r6 - r9 for reading and has to write - * into its own stack before reading from it. - * callee can read/write into caller's stack + /* for regular function entry setup new frame and continue + * from that frame. */ - init_func_state(env, callee, - /* remember the callsite, it will be used by bpf_exit */ - *insn_idx /* callsite */, - state->curframe + 1 /* frameno within this callchain */, - subprog /* subprog number within this prog */); - - /* Transfer references to the callee */ - err = copy_reference_state(callee, caller); - if (err) - goto err_out; - - err = set_callee_state_cb(env, caller, callee, *insn_idx); + err = setup_func_entry(env, subprog, *insn_idx, set_callee_state, state); if (err) - goto err_out; + return err; clear_caller_saved_regs(env, caller->regs); - /* only increment it after check_reg_arg() finished */ - state->curframe++; - /* and go analyze first insn of the callee */ *insn_idx = env->subprog_info[subprog].start - 1; @@ -8979,14 +9358,10 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn verbose(env, "caller:\n"); print_verifier_state(env, caller, true); verbose(env, "callee:\n"); - print_verifier_state(env, callee, true); + print_verifier_state(env, state->frame[state->curframe], true); } - return 0; -err_out: - free_func_state(callee); - state->frame[state->curframe + 1] = NULL; - return err; + return 0; } int map_set_for_each_callback_args(struct bpf_verifier_env *env, @@ -9030,22 +9405,6 @@ static int set_callee_state(struct bpf_verifier_env *env, return 0; } -static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, - int *insn_idx) -{ - int subprog, target_insn; - - target_insn = *insn_idx + insn->imm + 1; - subprog = find_subprog(env, target_insn); - if (subprog < 0) { - verbose(env, "verifier bug. No program starts at insn %d\n", - target_insn); - return -EFAULT; - } - - return __check_func_call(env, insn, insn_idx, subprog, set_callee_state); -} - static int set_map_elem_callback_state(struct bpf_verifier_env *env, struct bpf_func_state *caller, struct bpf_func_state *callee, @@ -9238,9 +9597,10 @@ static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env) static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) { - struct bpf_verifier_state *state = env->cur_state; + struct bpf_verifier_state *state = env->cur_state, *prev_st; struct bpf_func_state *caller, *callee; struct bpf_reg_state *r0; + bool in_callback_fn; int err; callee = state->frame[state->curframe]; @@ -9276,6 +9636,11 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) verbose_invalid_scalar(env, r0, &range, "callback return", "R0"); return -EINVAL; } + if (!calls_callback(env, callee->callsite)) { + verbose(env, "BUG: in callback at %d, callsite %d !calls_callback\n", + *insn_idx, callee->callsite); + return -EFAULT; + } } else { /* return to the caller whatever r0 had in the callee */ caller->regs[BPF_REG_0] = *r0; @@ -9293,7 +9658,16 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) return err; } - *insn_idx = callee->callsite + 1; + /* for callbacks like bpf_loop or bpf_for_each_map_elem go back to callsite, + * there function call logic would reschedule callback visit. If iteration + * converges is_state_visited() would prune that visit eventually. + */ + in_callback_fn = callee->in_callback_fn; + if (in_callback_fn) + *insn_idx = callee->callsite; + else + *insn_idx = callee->callsite + 1; + if (env->log.level & BPF_LOG_LEVEL) { verbose(env, "returning from callee:\n"); print_verifier_state(env, callee, true); @@ -9303,6 +9677,24 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) /* clear everything in the callee */ free_func_state(callee); state->frame[state->curframe--] = NULL; + + /* for callbacks widen imprecise scalars to make programs like below verify: + * + * struct ctx { int i; } + * void cb(int idx, struct ctx *ctx) { ctx->i++; ... } + * ... + * struct ctx = { .i = 0; } + * bpf_loop(100, cb, &ctx, 0); + * + * This is similar to what is done in process_iter_next_call() for open + * coded iterators. + */ + prev_st = in_callback_fn ? find_prev_entry(env, state, *insn_idx) : NULL; + if (prev_st) { + err = widen_imprecise_scalars(env, prev_st, state); + if (err) + return err; + } return 0; } @@ -9684,24 +10076,37 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn } break; case BPF_FUNC_for_each_map_elem: - err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, - set_map_elem_callback_state); + err = push_callback_call(env, insn, insn_idx, meta.subprogno, + set_map_elem_callback_state); break; case BPF_FUNC_timer_set_callback: - err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, - set_timer_callback_state); + err = push_callback_call(env, insn, insn_idx, meta.subprogno, + set_timer_callback_state); break; case BPF_FUNC_find_vma: - err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, - set_find_vma_callback_state); + err = push_callback_call(env, insn, insn_idx, meta.subprogno, + set_find_vma_callback_state); break; case BPF_FUNC_snprintf: err = check_bpf_snprintf_call(env, regs); break; case BPF_FUNC_loop: update_loop_inline_state(env, meta.subprogno); - err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, - set_loop_callback_state); + /* Verifier relies on R1 value to determine if bpf_loop() iteration + * is finished, thus mark it precise. + */ + err = mark_chain_precision(env, BPF_REG_1); + if (err) + return err; + if (cur_func(env)->callback_depth < regs[BPF_REG_1].umax_value) { + err = push_callback_call(env, insn, insn_idx, meta.subprogno, + set_loop_callback_state); + } else { + cur_func(env)->callback_depth = 0; + if (env->log.level & BPF_LOG_LEVEL2) + verbose(env, "frame%d bpf_loop iteration limit reached\n", + env->cur_state->curframe); + } break; case BPF_FUNC_dynptr_from_mem: if (regs[BPF_REG_1].type != PTR_TO_MAP_VALUE) { @@ -9780,8 +10185,8 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn break; } case BPF_FUNC_user_ringbuf_drain: - err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, - set_user_ringbuf_callback_state); + err = push_callback_call(env, insn, insn_idx, meta.subprogno, + set_user_ringbuf_callback_state); break; } @@ -10631,7 +11036,7 @@ static bool is_bpf_graph_api_kfunc(u32 btf_id) btf_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]; } -static bool is_callback_calling_kfunc(u32 btf_id) +static bool is_sync_callback_calling_kfunc(u32 btf_id) { return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl]; } @@ -11335,6 +11740,21 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, return -EACCES; } + /* Check the arguments */ + err = check_kfunc_args(env, &meta, insn_idx); + if (err < 0) + return err; + + if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) { + err = push_callback_call(env, insn, insn_idx, meta.subprogno, + set_rbtree_add_callback_state); + if (err) { + verbose(env, "kfunc %s#%d failed callback verification\n", + func_name, meta.func_id); + return err; + } + } + rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta); rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta); @@ -11369,10 +11789,6 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, return -EINVAL; } - /* Check the arguments */ - err = check_kfunc_args(env, &meta, insn_idx); - if (err < 0) - return err; /* In case of release function, we get register number of refcounted * PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now. */ @@ -11406,16 +11822,6 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, } } - if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) { - err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, - set_rbtree_add_callback_state); - if (err) { - verbose(env, "kfunc %s#%d failed callback verification\n", - func_name, meta.func_id); - return err; - } - } - for (i = 0; i < CALLER_SAVED_REGS; i++) mark_reg_not_init(env, regs, caller_saved[i]); @@ -14702,21 +15108,6 @@ enum { BRANCH = 2, }; -static u32 state_htab_size(struct bpf_verifier_env *env) -{ - return env->prog->len; -} - -static struct bpf_verifier_state_list **explored_state( - struct bpf_verifier_env *env, - int idx) -{ - struct bpf_verifier_state *cur = env->cur_state; - struct bpf_func_state *state = cur->frame[cur->curframe]; - - return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)]; -} - static void mark_prune_point(struct bpf_verifier_env *env, int idx) { env->insn_aux_data[idx].prune_point = true; @@ -14737,6 +15128,15 @@ static bool is_force_checkpoint(struct bpf_verifier_env *env, int insn_idx) return env->insn_aux_data[insn_idx].force_checkpoint; } +static void mark_calls_callback(struct bpf_verifier_env *env, int idx) +{ + env->insn_aux_data[idx].calls_callback = true; +} + +static bool calls_callback(struct bpf_verifier_env *env, int insn_idx) +{ + return env->insn_aux_data[insn_idx].calls_callback; +} enum { DONE_EXPLORING = 0, @@ -14850,6 +15250,21 @@ static int visit_insn(int t, struct bpf_verifier_env *env) * async state will be pushed for further exploration. */ mark_prune_point(env, t); + /* For functions that invoke callbacks it is not known how many times + * callback would be called. Verifier models callback calling functions + * by repeatedly visiting callback bodies and returning to origin call + * instruction. + * In order to stop such iteration verifier needs to identify when a + * state identical some state from a previous iteration is reached. + * Check below forces creation of checkpoint before callback calling + * instruction to allow search for such identical states. + */ + if (is_sync_callback_calling_insn(insn)) { + mark_calls_callback(env, t); + mark_force_checkpoint(env, t); + mark_prune_point(env, t); + mark_jmp_point(env, t); + } if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { struct bpf_kfunc_call_arg_meta meta; @@ -15523,18 +15938,14 @@ static void clean_live_states(struct bpf_verifier_env *env, int insn, struct bpf_verifier_state *cur) { struct bpf_verifier_state_list *sl; - int i; sl = *explored_state(env, insn); while (sl) { if (sl->state.branches) goto next; if (sl->state.insn_idx != insn || - sl->state.curframe != cur->curframe) + !same_callsites(&sl->state, cur)) goto next; - for (i = 0; i <= cur->curframe; i++) - if (sl->state.frame[i]->callsite != cur->frame[i]->callsite) - goto next; clean_verifier_state(env, &sl->state); next: sl = sl->next; @@ -15552,8 +15963,11 @@ static bool regs_exact(const struct bpf_reg_state *rold, /* Returns true if (rold safe implies rcur safe) */ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, - struct bpf_reg_state *rcur, struct bpf_idmap *idmap) + struct bpf_reg_state *rcur, struct bpf_idmap *idmap, bool exact) { + if (exact) + return regs_exact(rold, rcur, idmap); + if (!(rold->live & REG_LIVE_READ)) /* explored state didn't use this */ return true; @@ -15670,7 +16084,7 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, } static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, - struct bpf_func_state *cur, struct bpf_idmap *idmap) + struct bpf_func_state *cur, struct bpf_idmap *idmap, bool exact) { int i, spi; @@ -15683,7 +16097,12 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, spi = i / BPF_REG_SIZE; - if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) { + if (exact && + old->stack[spi].slot_type[i % BPF_REG_SIZE] != + cur->stack[spi].slot_type[i % BPF_REG_SIZE]) + return false; + + if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ) && !exact) { i += BPF_REG_SIZE - 1; /* explored state didn't use this */ continue; @@ -15733,7 +16152,7 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, * return false to continue verification of this path */ if (!regsafe(env, &old->stack[spi].spilled_ptr, - &cur->stack[spi].spilled_ptr, idmap)) + &cur->stack[spi].spilled_ptr, idmap, exact)) return false; break; case STACK_DYNPTR: @@ -15815,16 +16234,16 @@ static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur, * the current state will reach 'bpf_exit' instruction safely */ static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old, - struct bpf_func_state *cur) + struct bpf_func_state *cur, bool exact) { int i; for (i = 0; i < MAX_BPF_REG; i++) if (!regsafe(env, &old->regs[i], &cur->regs[i], - &env->idmap_scratch)) + &env->idmap_scratch, exact)) return false; - if (!stacksafe(env, old, cur, &env->idmap_scratch)) + if (!stacksafe(env, old, cur, &env->idmap_scratch, exact)) return false; if (!refsafe(old, cur, &env->idmap_scratch)) @@ -15833,17 +16252,23 @@ static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_stat return true; } +static void reset_idmap_scratch(struct bpf_verifier_env *env) +{ + env->idmap_scratch.tmp_id_gen = env->id_gen; + memset(&env->idmap_scratch.map, 0, sizeof(env->idmap_scratch.map)); +} + static bool states_equal(struct bpf_verifier_env *env, struct bpf_verifier_state *old, - struct bpf_verifier_state *cur) + struct bpf_verifier_state *cur, + bool exact) { int i; if (old->curframe != cur->curframe) return false; - env->idmap_scratch.tmp_id_gen = env->id_gen; - memset(&env->idmap_scratch.map, 0, sizeof(env->idmap_scratch.map)); + reset_idmap_scratch(env); /* Verification state from speculative execution simulation * must never prune a non-speculative execution one. @@ -15873,7 +16298,7 @@ static bool states_equal(struct bpf_verifier_env *env, for (i = 0; i <= old->curframe; i++) { if (old->frame[i]->callsite != cur->frame[i]->callsite) return false; - if (!func_states_equal(env, old->frame[i], cur->frame[i])) + if (!func_states_equal(env, old->frame[i], cur->frame[i], exact)) return false; } return true; @@ -16127,10 +16552,11 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) { struct bpf_verifier_state_list *new_sl; struct bpf_verifier_state_list *sl, **pprev; - struct bpf_verifier_state *cur = env->cur_state, *new; - int i, j, err, states_cnt = 0; + struct bpf_verifier_state *cur = env->cur_state, *new, *loop_entry; + int i, j, n, err, states_cnt = 0; bool force_new_state = env->test_state_freq || is_force_checkpoint(env, insn_idx); bool add_new_state = force_new_state; + bool force_exact; /* bpf progs typically have pruning point every 4 instructions * http://vger.kernel.org/bpfconf2019.html#session-1 @@ -16183,9 +16609,33 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) * It's safe to assume that iterator loop will finish, taking into * account iter_next() contract of eventually returning * sticky NULL result. + * + * Note, that states have to be compared exactly in this case because + * read and precision marks might not be finalized inside the loop. + * E.g. as in the program below: + * + * 1. r7 = -16 + * 2. r6 = bpf_get_prandom_u32() + * 3. while (bpf_iter_num_next(&fp[-8])) { + * 4. if (r6 != 42) { + * 5. r7 = -32 + * 6. r6 = bpf_get_prandom_u32() + * 7. continue + * 8. } + * 9. r0 = r10 + * 10. r0 += r7 + * 11. r8 = *(u64 *)(r0 + 0) + * 12. r6 = bpf_get_prandom_u32() + * 13. } + * + * Here verifier would first visit path 1-3, create a checkpoint at 3 + * with r7=-16, continue to 4-7,3. Existing checkpoint at 3 does + * not have read or precision mark for r7 yet, thus inexact states + * comparison would discard current state with r7=-32 + * => unsafe memory access at 11 would not be caught. */ if (is_iter_next_insn(env, insn_idx)) { - if (states_equal(env, &sl->state, cur)) { + if (states_equal(env, &sl->state, cur, true)) { struct bpf_func_state *cur_frame; struct bpf_reg_state *iter_state, *iter_reg; int spi; @@ -16201,17 +16651,29 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) */ spi = __get_spi(iter_reg->off + iter_reg->var_off.value); iter_state = &func(env, iter_reg)->stack[spi].spilled_ptr; - if (iter_state->iter.state == BPF_ITER_STATE_ACTIVE) + if (iter_state->iter.state == BPF_ITER_STATE_ACTIVE) { + update_loop_entry(cur, &sl->state); goto hit; + } } goto skip_inf_loop_check; } + if (calls_callback(env, insn_idx)) { + if (states_equal(env, &sl->state, cur, true)) + goto hit; + goto skip_inf_loop_check; + } /* attempt to detect infinite loop to avoid unnecessary doomed work */ if (states_maybe_looping(&sl->state, cur) && - states_equal(env, &sl->state, cur) && - !iter_active_depths_differ(&sl->state, cur)) { + states_equal(env, &sl->state, cur, false) && + !iter_active_depths_differ(&sl->state, cur) && + sl->state.callback_unroll_depth == cur->callback_unroll_depth) { verbose_linfo(env, insn_idx, "; "); verbose(env, "infinite loop detected at insn %d\n", insn_idx); + verbose(env, "cur state:"); + print_verifier_state(env, cur->frame[cur->curframe], true); + verbose(env, "old state:"); + print_verifier_state(env, sl->state.frame[cur->curframe], true); return -EINVAL; } /* if the verifier is processing a loop, avoid adding new state @@ -16233,7 +16695,36 @@ skip_inf_loop_check: add_new_state = false; goto miss; } - if (states_equal(env, &sl->state, cur)) { + /* If sl->state is a part of a loop and this loop's entry is a part of + * current verification path then states have to be compared exactly. + * 'force_exact' is needed to catch the following case: + * + * initial Here state 'succ' was processed first, + * | it was eventually tracked to produce a + * V state identical to 'hdr'. + * .---------> hdr All branches from 'succ' had been explored + * | | and thus 'succ' has its .branches == 0. + * | V + * | .------... Suppose states 'cur' and 'succ' correspond + * | | | to the same instruction + callsites. + * | V V In such case it is necessary to check + * | ... ... if 'succ' and 'cur' are states_equal(). + * | | | If 'succ' and 'cur' are a part of the + * | V V same loop exact flag has to be set. + * | succ <- cur To check if that is the case, verify + * | | if loop entry of 'succ' is in current + * | V DFS path. + * | ... + * | | + * '----' + * + * Additional details are in the comment before get_loop_entry(). + */ + loop_entry = get_loop_entry(&sl->state); + force_exact = loop_entry && loop_entry->branches > 0; + if (states_equal(env, &sl->state, cur, force_exact)) { + if (force_exact) + update_loop_entry(cur, loop_entry); hit: sl->hit_cnt++; /* reached equivalent register/stack state, @@ -16272,13 +16763,18 @@ miss: * to keep checking from state equivalence point of view. * Higher numbers increase max_states_per_insn and verification time, * but do not meaningfully decrease insn_processed. + * 'n' controls how many times state could miss before eviction. + * Use bigger 'n' for checkpoints because evicting checkpoint states + * too early would hinder iterator convergence. */ - if (sl->miss_cnt > sl->hit_cnt * 3 + 3) { + n = is_force_checkpoint(env, insn_idx) && sl->state.branches > 0 ? 64 : 3; + if (sl->miss_cnt > sl->hit_cnt * n + n) { /* the state is unlikely to be useful. Remove it to * speed up verification */ *pprev = sl->next; - if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) { + if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE && + !sl->state.used_as_loop_entry) { u32 br = sl->state.branches; WARN_ONCE(br, @@ -16347,6 +16843,7 @@ next: cur->parent = new; cur->first_insn_idx = insn_idx; + cur->dfs_depth = new->dfs_depth + 1; clear_jmp_history(cur); new_sl->next = *explored_state(env, insn_idx); *explored_state(env, insn_idx) = new_sl; diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 27ca1c866f298..371eb1711d346 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -600,7 +600,7 @@ int __init early_irq_init(void) mutex_init(&desc[i].request_mutex); init_waitqueue_head(&desc[i].wait_for_threads); desc_set_defaults(i, &desc[i], node, NULL, NULL); - irq_resend_init(desc); + irq_resend_init(&desc[i]); } return arch_early_irq_init(); } diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 9dc728982d79a..b7246b7171b73 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -1271,6 +1271,7 @@ int kernel_kexec(void) kexec_in_progress = true; kernel_restart_prepare("kexec reboot"); migrate_to_reboot_cpu(); + syscore_shutdown(); /* * migrate_to_reboot_cpu() disables CPU hotplug assuming that diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 74edbce2320ba..d71c590550d28 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -605,11 +605,11 @@ static int crc32_threadfn(void *data) unsigned i; while (1) { - wait_event(d->go, atomic_read(&d->ready) || + wait_event(d->go, atomic_read_acquire(&d->ready) || kthread_should_stop()); if (kthread_should_stop()) { d->thr = NULL; - atomic_set(&d->stop, 1); + atomic_set_release(&d->stop, 1); wake_up(&d->done); break; } @@ -618,7 +618,7 @@ static int crc32_threadfn(void *data) for (i = 0; i < d->run_threads; i++) *d->crc32 = crc32_le(*d->crc32, d->unc[i], *d->unc_len[i]); - atomic_set(&d->stop, 1); + atomic_set_release(&d->stop, 1); wake_up(&d->done); } return 0; @@ -648,12 +648,12 @@ static int lzo_compress_threadfn(void *data) struct cmp_data *d = data; while (1) { - wait_event(d->go, atomic_read(&d->ready) || + wait_event(d->go, atomic_read_acquire(&d->ready) || kthread_should_stop()); if (kthread_should_stop()) { d->thr = NULL; d->ret = -1; - atomic_set(&d->stop, 1); + atomic_set_release(&d->stop, 1); wake_up(&d->done); break; } @@ -662,7 +662,7 @@ static int lzo_compress_threadfn(void *data) d->ret = lzo1x_1_compress(d->unc, d->unc_len, d->cmp + LZO_HEADER, &d->cmp_len, d->wrk); - atomic_set(&d->stop, 1); + atomic_set_release(&d->stop, 1); wake_up(&d->done); } return 0; @@ -797,7 +797,7 @@ static int save_image_lzo(struct swap_map_handle *handle, data[thr].unc_len = off; - atomic_set(&data[thr].ready, 1); + atomic_set_release(&data[thr].ready, 1); wake_up(&data[thr].go); } @@ -805,12 +805,12 @@ static int save_image_lzo(struct swap_map_handle *handle, break; crc->run_threads = thr; - atomic_set(&crc->ready, 1); + atomic_set_release(&crc->ready, 1); wake_up(&crc->go); for (run_threads = thr, thr = 0; thr < run_threads; thr++) { wait_event(data[thr].done, - atomic_read(&data[thr].stop)); + atomic_read_acquire(&data[thr].stop)); atomic_set(&data[thr].stop, 0); ret = data[thr].ret; @@ -849,7 +849,7 @@ static int save_image_lzo(struct swap_map_handle *handle, } } - wait_event(crc->done, atomic_read(&crc->stop)); + wait_event(crc->done, atomic_read_acquire(&crc->stop)); atomic_set(&crc->stop, 0); } @@ -1131,12 +1131,12 @@ static int lzo_decompress_threadfn(void *data) struct dec_data *d = data; while (1) { - wait_event(d->go, atomic_read(&d->ready) || + wait_event(d->go, atomic_read_acquire(&d->ready) || kthread_should_stop()); if (kthread_should_stop()) { d->thr = NULL; d->ret = -1; - atomic_set(&d->stop, 1); + atomic_set_release(&d->stop, 1); wake_up(&d->done); break; } @@ -1149,7 +1149,7 @@ static int lzo_decompress_threadfn(void *data) flush_icache_range((unsigned long)d->unc, (unsigned long)d->unc + d->unc_len); - atomic_set(&d->stop, 1); + atomic_set_release(&d->stop, 1); wake_up(&d->done); } return 0; @@ -1334,7 +1334,7 @@ static int load_image_lzo(struct swap_map_handle *handle, } if (crc->run_threads) { - wait_event(crc->done, atomic_read(&crc->stop)); + wait_event(crc->done, atomic_read_acquire(&crc->stop)); atomic_set(&crc->stop, 0); crc->run_threads = 0; } @@ -1370,7 +1370,7 @@ static int load_image_lzo(struct swap_map_handle *handle, pg = 0; } - atomic_set(&data[thr].ready, 1); + atomic_set_release(&data[thr].ready, 1); wake_up(&data[thr].go); } @@ -1389,7 +1389,7 @@ static int load_image_lzo(struct swap_map_handle *handle, for (run_threads = thr, thr = 0; thr < run_threads; thr++) { wait_event(data[thr].done, - atomic_read(&data[thr].stop)); + atomic_read_acquire(&data[thr].stop)); atomic_set(&data[thr].stop, 0); ret = data[thr].ret; @@ -1420,7 +1420,7 @@ static int load_image_lzo(struct swap_map_handle *handle, ret = snapshot_write_next(snapshot); if (ret <= 0) { crc->run_threads = thr + 1; - atomic_set(&crc->ready, 1); + atomic_set_release(&crc->ready, 1); wake_up(&crc->go); goto out_finish; } @@ -1428,13 +1428,13 @@ static int load_image_lzo(struct swap_map_handle *handle, } crc->run_threads = thr; - atomic_set(&crc->ready, 1); + atomic_set_release(&crc->ready, 1); wake_up(&crc->go); } out_finish: if (crc->run_threads) { - wait_event(crc->done, atomic_read(&crc->stop)); + wait_event(crc->done, atomic_read_acquire(&crc->stop)); atomic_set(&crc->stop, 0); } stop = ktime_get(); diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 9af42eae1ba38..4fe47ed95eebc 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1013,6 +1013,38 @@ static bool rcu_future_gp_cleanup(struct rcu_node *rnp) return needmore; } +static void swake_up_one_online_ipi(void *arg) +{ + struct swait_queue_head *wqh = arg; + + swake_up_one(wqh); +} + +static void swake_up_one_online(struct swait_queue_head *wqh) +{ + int cpu = get_cpu(); + + /* + * If called from rcutree_report_cpu_starting(), wake up + * is dangerous that late in the CPU-down hotplug process. The + * scheduler might queue an ignored hrtimer. Defer the wake up + * to an online CPU instead. + */ + if (unlikely(cpu_is_offline(cpu))) { + int target; + + target = cpumask_any_and(housekeeping_cpumask(HK_TYPE_RCU), + cpu_online_mask); + + smp_call_function_single(target, swake_up_one_online_ipi, + wqh, 0); + put_cpu(); + } else { + put_cpu(); + swake_up_one(wqh); + } +} + /* * Awaken the grace-period kthread. Don't do a self-awaken (unless in an * interrupt or softirq handler, in which case we just might immediately @@ -1037,7 +1069,7 @@ static void rcu_gp_kthread_wake(void) return; WRITE_ONCE(rcu_state.gp_wake_time, jiffies); WRITE_ONCE(rcu_state.gp_wake_seq, READ_ONCE(rcu_state.gp_seq)); - swake_up_one(&rcu_state.gp_wq); + swake_up_one_online(&rcu_state.gp_wq); } /* diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index 8239b39d945bd..6e87dc764f471 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -173,7 +173,6 @@ static bool sync_rcu_exp_done_unlocked(struct rcu_node *rnp) return ret; } - /* * Report the exit from RCU read-side critical section for the last task * that queued itself during or before the current expedited preemptible-RCU @@ -201,7 +200,7 @@ static void __rcu_report_exp_rnp(struct rcu_node *rnp, raw_spin_unlock_irqrestore_rcu_node(rnp, flags); if (wake) { smp_mb(); /* EGP done before wake_up(). */ - swake_up_one(&rcu_state.expedited_wq); + swake_up_one_online(&rcu_state.expedited_wq); } break; } diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index c108ed8a9804a..3052b1f1168e2 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -99,6 +99,7 @@ static u64 suspend_start; * Interval: 0.5sec. */ #define WATCHDOG_INTERVAL (HZ >> 1) +#define WATCHDOG_INTERVAL_MAX_NS ((2 * WATCHDOG_INTERVAL) * (NSEC_PER_SEC / HZ)) /* * Threshold: 0.0312s, when doubled: 0.0625s. @@ -134,6 +135,7 @@ static DECLARE_WORK(watchdog_work, clocksource_watchdog_work); static DEFINE_SPINLOCK(watchdog_lock); static int watchdog_running; static atomic_t watchdog_reset_pending; +static int64_t watchdog_max_interval; static inline void clocksource_watchdog_lock(unsigned long *flags) { @@ -399,8 +401,8 @@ static inline void clocksource_reset_watchdog(void) static void clocksource_watchdog(struct timer_list *unused) { u64 csnow, wdnow, cslast, wdlast, delta; + int64_t wd_nsec, cs_nsec, interval; int next_cpu, reset_pending; - int64_t wd_nsec, cs_nsec; struct clocksource *cs; enum wd_read_status read_ret; unsigned long extra_wait = 0; @@ -470,6 +472,27 @@ static void clocksource_watchdog(struct timer_list *unused) if (atomic_read(&watchdog_reset_pending)) continue; + /* + * The processing of timer softirqs can get delayed (usually + * on account of ksoftirqd not getting to run in a timely + * manner), which causes the watchdog interval to stretch. + * Skew detection may fail for longer watchdog intervals + * on account of fixed margins being used. + * Some clocksources, e.g. acpi_pm, cannot tolerate + * watchdog intervals longer than a few seconds. + */ + interval = max(cs_nsec, wd_nsec); + if (unlikely(interval > WATCHDOG_INTERVAL_MAX_NS)) { + if (system_state > SYSTEM_SCHEDULING && + interval > 2 * watchdog_max_interval) { + watchdog_max_interval = interval; + pr_warn("Long readout interval, skipping watchdog check: cs_nsec: %lld wd_nsec: %lld\n", + cs_nsec, wd_nsec); + } + watchdog_timer.expires = jiffies; + continue; + } + /* Check the deviation from the watchdog clocksource. */ md = cs->uncertainty_margin + watchdog->uncertainty_margin; if (abs(cs_nsec - wd_nsec) > md) { diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 5cbd0cee83c06..55cbc49f70d14 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -1548,6 +1548,7 @@ void tick_cancel_sched_timer(int cpu) { struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); ktime_t idle_sleeptime, iowait_sleeptime; + unsigned long idle_calls, idle_sleeps; # ifdef CONFIG_HIGH_RES_TIMERS if (ts->sched_timer.base) @@ -1556,9 +1557,13 @@ void tick_cancel_sched_timer(int cpu) idle_sleeptime = ts->idle_sleeptime; iowait_sleeptime = ts->iowait_sleeptime; + idle_calls = ts->idle_calls; + idle_sleeps = ts->idle_sleeps; memset(ts, 0, sizeof(*ts)); ts->idle_sleeptime = idle_sleeptime; ts->iowait_sleeptime = iowait_sleeptime; + ts->idle_calls = idle_calls; + ts->idle_sleeps = idle_sleeps; } #endif diff --git a/kernel/trace/tracing_map.c b/kernel/trace/tracing_map.c index c774e560f2f95..a4dcf0f243521 100644 --- a/kernel/trace/tracing_map.c +++ b/kernel/trace/tracing_map.c @@ -574,7 +574,12 @@ __tracing_map_insert(struct tracing_map *map, void *key, bool lookup_only) } memcpy(elt->key, key, map->key_size); - entry->val = elt; + /* + * Ensure the initialization is visible and + * publish the elt. + */ + smp_wmb(); + WRITE_ONCE(entry->val, elt); atomic64_inc(&map->hits); return entry->val; diff --git a/lib/crypto/mpi/ec.c b/lib/crypto/mpi/ec.c index 40f5908e57a4f..e16dca1e23d52 100644 --- a/lib/crypto/mpi/ec.c +++ b/lib/crypto/mpi/ec.c @@ -584,6 +584,9 @@ void mpi_ec_init(struct mpi_ec_ctx *ctx, enum gcry_mpi_ec_models model, ctx->a = mpi_copy(a); ctx->b = mpi_copy(b); + ctx->d = NULL; + ctx->t.two_inv_p = NULL; + ctx->t.p_barrett = use_barrett > 0 ? mpi_barrett_init(ctx->p, 0) : NULL; mpi_ec_get_reset(ctx); diff --git a/mm/memblock.c b/mm/memblock.c index 913b2520a9a00..6d18485571b4a 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -2119,6 +2119,9 @@ static void __init memmap_init_reserved_pages(void) start = region->base; end = start + region->size; + if (nid == NUMA_NO_NODE || nid >= MAX_NUMNODES) + nid = early_pfn_to_nid(PFN_DOWN(start)); + reserve_bootmem_region(start, end, nid); } } diff --git a/mm/migrate.c b/mm/migrate.c index 03bc2063ac871..b4d972d80b10c 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1026,32 +1026,31 @@ out: } /* - * To record some information during migration, we use some unused - * fields (mapping and private) of struct folio of the newly allocated - * destination folio. This is safe because nobody is using them - * except us. + * To record some information during migration, we use unused private + * field of struct folio of the newly allocated destination folio. + * This is safe because nobody is using it except us. */ -union migration_ptr { - struct anon_vma *anon_vma; - struct address_space *mapping; +enum { + PAGE_WAS_MAPPED = BIT(0), + PAGE_WAS_MLOCKED = BIT(1), + PAGE_OLD_STATES = PAGE_WAS_MAPPED | PAGE_WAS_MLOCKED, }; + static void __migrate_folio_record(struct folio *dst, - unsigned long page_was_mapped, + int old_page_state, struct anon_vma *anon_vma) { - union migration_ptr ptr = { .anon_vma = anon_vma }; - dst->mapping = ptr.mapping; - dst->private = (void *)page_was_mapped; + dst->private = (void *)anon_vma + old_page_state; } static void __migrate_folio_extract(struct folio *dst, - int *page_was_mappedp, + int *old_page_state, struct anon_vma **anon_vmap) { - union migration_ptr ptr = { .mapping = dst->mapping }; - *anon_vmap = ptr.anon_vma; - *page_was_mappedp = (unsigned long)dst->private; - dst->mapping = NULL; + unsigned long private = (unsigned long)dst->private; + + *anon_vmap = (struct anon_vma *)(private & ~PAGE_OLD_STATES); + *old_page_state = private & PAGE_OLD_STATES; dst->private = NULL; } @@ -1111,7 +1110,7 @@ static int migrate_folio_unmap(new_folio_t get_new_folio, { struct folio *dst; int rc = -EAGAIN; - int page_was_mapped = 0; + int old_page_state = 0; struct anon_vma *anon_vma = NULL; bool is_lru = !__PageMovable(&src->page); bool locked = false; @@ -1165,6 +1164,8 @@ static int migrate_folio_unmap(new_folio_t get_new_folio, folio_lock(src); } locked = true; + if (folio_test_mlocked(src)) + old_page_state |= PAGE_WAS_MLOCKED; if (folio_test_writeback(src)) { /* @@ -1214,7 +1215,7 @@ static int migrate_folio_unmap(new_folio_t get_new_folio, dst_locked = true; if (unlikely(!is_lru)) { - __migrate_folio_record(dst, page_was_mapped, anon_vma); + __migrate_folio_record(dst, old_page_state, anon_vma); return MIGRATEPAGE_UNMAP; } @@ -1240,11 +1241,11 @@ static int migrate_folio_unmap(new_folio_t get_new_folio, VM_BUG_ON_FOLIO(folio_test_anon(src) && !folio_test_ksm(src) && !anon_vma, src); try_to_migrate(src, mode == MIGRATE_ASYNC ? TTU_BATCH_FLUSH : 0); - page_was_mapped = 1; + old_page_state |= PAGE_WAS_MAPPED; } if (!folio_mapped(src)) { - __migrate_folio_record(dst, page_was_mapped, anon_vma); + __migrate_folio_record(dst, old_page_state, anon_vma); return MIGRATEPAGE_UNMAP; } @@ -1256,7 +1257,8 @@ out: if (rc == -EAGAIN) ret = NULL; - migrate_folio_undo_src(src, page_was_mapped, anon_vma, locked, ret); + migrate_folio_undo_src(src, old_page_state & PAGE_WAS_MAPPED, + anon_vma, locked, ret); migrate_folio_undo_dst(dst, dst_locked, put_new_folio, private); return rc; @@ -1269,12 +1271,12 @@ static int migrate_folio_move(free_folio_t put_new_folio, unsigned long private, struct list_head *ret) { int rc; - int page_was_mapped = 0; + int old_page_state = 0; struct anon_vma *anon_vma = NULL; bool is_lru = !__PageMovable(&src->page); struct list_head *prev; - __migrate_folio_extract(dst, &page_was_mapped, &anon_vma); + __migrate_folio_extract(dst, &old_page_state, &anon_vma); prev = dst->lru.prev; list_del(&dst->lru); @@ -1295,10 +1297,10 @@ static int migrate_folio_move(free_folio_t put_new_folio, unsigned long private, * isolated from the unevictable LRU: but this case is the easiest. */ folio_add_lru(dst); - if (page_was_mapped) + if (old_page_state & PAGE_WAS_MLOCKED) lru_add_drain(); - if (page_was_mapped) + if (old_page_state & PAGE_WAS_MAPPED) remove_migration_ptes(src, dst, false); out_unlock_both: @@ -1330,11 +1332,12 @@ out: */ if (rc == -EAGAIN) { list_add(&dst->lru, prev); - __migrate_folio_record(dst, page_was_mapped, anon_vma); + __migrate_folio_record(dst, old_page_state, anon_vma); return rc; } - migrate_folio_undo_src(src, page_was_mapped, anon_vma, true, ret); + migrate_folio_undo_src(src, old_page_state & PAGE_WAS_MAPPED, + anon_vma, true, ret); migrate_folio_undo_dst(dst, true, put_new_folio, private); return rc; @@ -1802,12 +1805,12 @@ out: dst = list_first_entry(&dst_folios, struct folio, lru); dst2 = list_next_entry(dst, lru); list_for_each_entry_safe(folio, folio2, &unmap_folios, lru) { - int page_was_mapped = 0; + int old_page_state = 0; struct anon_vma *anon_vma = NULL; - __migrate_folio_extract(dst, &page_was_mapped, &anon_vma); - migrate_folio_undo_src(folio, page_was_mapped, anon_vma, - true, ret_folios); + __migrate_folio_extract(dst, &old_page_state, &anon_vma); + migrate_folio_undo_src(folio, old_page_state & PAGE_WAS_MAPPED, + anon_vma, true, ret_folios); list_del(&dst->lru); migrate_folio_undo_dst(dst, true, put_new_folio, private); dst = dst2; diff --git a/mm/mm_init.c b/mm/mm_init.c index 50f2f34745afa..77fd04c83d046 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "internal.h" #include "slab.h" #include "shuffle.h" @@ -381,6 +382,11 @@ static void __init find_zone_movable_pfns_for_nodes(void) goto out; } + if (is_kdump_kernel()) { + pr_warn("The system is under kdump, ignore kernelcore=mirror.\n"); + goto out; + } + for_each_mem_region(r) { if (memblock_is_mirror(r)) continue; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 85741403948f5..afed33fd87612 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3809,14 +3809,9 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order, else (*no_progress_loops)++; - /* - * Make sure we converge to OOM if we cannot make any progress - * several times in the row. - */ - if (*no_progress_loops > MAX_RECLAIM_RETRIES) { - /* Before OOM, exhaust highatomic_reserve */ - return unreserve_highatomic_pageblock(ac, true); - } + if (*no_progress_loops > MAX_RECLAIM_RETRIES) + goto out; + /* * Keep reclaiming pages while there is a chance this will lead @@ -3859,6 +3854,11 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order, schedule_timeout_uninterruptible(1); else cond_resched(); +out: + /* Before OOM, exhaust highatomic_reserve */ + if (!ret) + return unreserve_highatomic_pageblock(ac, true); + return ret; } diff --git a/mm/sparse.c b/mm/sparse.c index 77d91e565045c..338cf946dee8d 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -791,6 +791,13 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages, if (empty) { unsigned long section_nr = pfn_to_section_nr(pfn); + /* + * Mark the section invalid so that valid_section() + * return false. This prevents code from dereferencing + * ms->usage array. + */ + ms->section_mem_map &= ~SECTION_HAS_MEM_MAP; + /* * When removing an early section, the usage map is kept (as the * usage maps of other sections fall into the same page). It @@ -799,16 +806,10 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages, * was allocated during boot. */ if (!PageReserved(virt_to_page(ms->usage))) { - kfree(ms->usage); - ms->usage = NULL; + kfree_rcu(ms->usage, rcu); + WRITE_ONCE(ms->usage, NULL); } memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr); - /* - * Mark the section invalid so that valid_section() - * return false. This prevents code from dereferencing - * ms->usage array. - */ - ms->section_mem_map &= ~SECTION_HAS_MEM_MAP; } /* diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index 214532173536b..a3b68243fd4b1 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -118,12 +118,16 @@ static int vlan_changelink(struct net_device *dev, struct nlattr *tb[], } if (data[IFLA_VLAN_INGRESS_QOS]) { nla_for_each_nested(attr, data[IFLA_VLAN_INGRESS_QOS], rem) { + if (nla_type(attr) != IFLA_VLAN_QOS_MAPPING) + continue; m = nla_data(attr); vlan_dev_set_ingress_priority(dev, m->to, m->from); } } if (data[IFLA_VLAN_EGRESS_QOS]) { nla_for_each_nested(attr, data[IFLA_VLAN_EGRESS_QOS], rem) { + if (nla_type(attr) != IFLA_VLAN_QOS_MAPPING) + continue; m = nla_data(attr); err = vlan_dev_set_egress_priority(dev, m->from, m->to); if (err) diff --git a/net/core/dev.c b/net/core/dev.c index e480afb50d4c1..d72a4ff689ca7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -11491,6 +11491,7 @@ static struct pernet_operations __net_initdata netdev_net_ops = { static void __net_exit default_device_exit_net(struct net *net) { + struct netdev_name_node *name_node, *tmp; struct net_device *dev, *aux; /* * Push all migratable network devices back to the @@ -11513,6 +11514,14 @@ static void __net_exit default_device_exit_net(struct net *net) snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex); if (netdev_name_in_use(&init_net, fb_name)) snprintf(fb_name, IFNAMSIZ, "dev%%d"); + + netdev_for_each_altname_safe(dev, name_node, tmp) + if (netdev_name_in_use(&init_net, name_node->name)) { + netdev_name_node_del(name_node); + synchronize_rcu(); + __netdev_name_node_alt_destroy(name_node); + } + err = dev_change_net_namespace(dev, &init_net, fb_name); if (err) { pr_emerg("%s: failed to move %s to init_net: %d\n", diff --git a/net/core/dev.h b/net/core/dev.h index fa2e9c5c41224..f2037d402144f 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -64,6 +64,9 @@ int dev_change_name(struct net_device *dev, const char *newname); #define netdev_for_each_altname(dev, namenode) \ list_for_each_entry((namenode), &(dev)->name_node->list, list) +#define netdev_for_each_altname_safe(dev, namenode, next) \ + list_for_each_entry_safe((namenode), (next), &(dev)->name_node->list, \ + list) int netdev_name_node_alt_create(struct net_device *dev, const char *name); int netdev_name_node_alt_destroy(struct net_device *dev, const char *name); diff --git a/net/core/filter.c b/net/core/filter.c index 90fe3e7543833..01f2417deef21 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -81,6 +81,8 @@ #include #include #include +#include +#include static const struct bpf_func_proto * bpf_sk_base_func_proto(enum bpf_func_id func_id); @@ -4079,10 +4081,46 @@ static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset) memset(skb_frag_address(frag) + skb_frag_size(frag), 0, offset); skb_frag_size_add(frag, offset); sinfo->xdp_frags_size += offset; + if (rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL) + xsk_buff_get_tail(xdp)->data_end += offset; return 0; } +static void bpf_xdp_shrink_data_zc(struct xdp_buff *xdp, int shrink, + struct xdp_mem_info *mem_info, bool release) +{ + struct xdp_buff *zc_frag = xsk_buff_get_tail(xdp); + + if (release) { + xsk_buff_del_tail(zc_frag); + __xdp_return(NULL, mem_info, false, zc_frag); + } else { + zc_frag->data_end -= shrink; + } +} + +static bool bpf_xdp_shrink_data(struct xdp_buff *xdp, skb_frag_t *frag, + int shrink) +{ + struct xdp_mem_info *mem_info = &xdp->rxq->mem; + bool release = skb_frag_size(frag) == shrink; + + if (mem_info->type == MEM_TYPE_XSK_BUFF_POOL) { + bpf_xdp_shrink_data_zc(xdp, shrink, mem_info, release); + goto out; + } + + if (release) { + struct page *page = skb_frag_page(frag); + + __xdp_return(page_address(page), mem_info, false, NULL); + } + +out: + return release; +} + static int bpf_xdp_frags_shrink_tail(struct xdp_buff *xdp, int offset) { struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); @@ -4097,12 +4135,7 @@ static int bpf_xdp_frags_shrink_tail(struct xdp_buff *xdp, int offset) len_free += shrink; offset -= shrink; - - if (skb_frag_size(frag) == shrink) { - struct page *page = skb_frag_page(frag); - - __xdp_return(page_address(page), &xdp->rxq->mem, - false, NULL); + if (bpf_xdp_shrink_data(xdp, frag, shrink)) { n_frags_free++; } else { skb_frag_size_sub(frag, shrink); @@ -11772,6 +11805,27 @@ __bpf_kfunc int bpf_dynptr_from_xdp(struct xdp_buff *xdp, u64 flags, return 0; } + +__bpf_kfunc int bpf_sock_addr_set_sun_path(struct bpf_sock_addr_kern *sa_kern, + const u8 *sun_path, u32 sun_path__sz) +{ + struct sockaddr_un *un; + + if (sa_kern->sk->sk_family != AF_UNIX) + return -EINVAL; + + /* We do not allow changing the address to unnamed or larger than the + * maximum allowed address size for a unix sockaddr. + */ + if (sun_path__sz == 0 || sun_path__sz > UNIX_PATH_MAX) + return -EINVAL; + + un = (struct sockaddr_un *)sa_kern->uaddr; + memcpy(un->sun_path, sun_path, sun_path__sz); + sa_kern->uaddrlen = offsetof(struct sockaddr_un, sun_path) + sun_path__sz; + + return 0; +} __diag_pop(); int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags, @@ -11796,6 +11850,10 @@ BTF_SET8_START(bpf_kfunc_check_set_xdp) BTF_ID_FLAGS(func, bpf_dynptr_from_xdp) BTF_SET8_END(bpf_kfunc_check_set_xdp) +BTF_SET8_START(bpf_kfunc_check_set_sock_addr) +BTF_ID_FLAGS(func, bpf_sock_addr_set_sun_path) +BTF_SET8_END(bpf_kfunc_check_set_sock_addr) + static const struct btf_kfunc_id_set bpf_kfunc_set_skb = { .owner = THIS_MODULE, .set = &bpf_kfunc_check_set_skb, @@ -11806,6 +11864,11 @@ static const struct btf_kfunc_id_set bpf_kfunc_set_xdp = { .set = &bpf_kfunc_check_set_xdp, }; +static const struct btf_kfunc_id_set bpf_kfunc_set_sock_addr = { + .owner = THIS_MODULE, + .set = &bpf_kfunc_check_set_sock_addr, +}; + static int __init bpf_kfunc_init(void) { int ret; @@ -11820,7 +11883,9 @@ static int __init bpf_kfunc_init(void) ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_XMIT, &bpf_kfunc_set_skb); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_SEG6LOCAL, &bpf_kfunc_set_skb); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_NETFILTER, &bpf_kfunc_set_skb); - return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp); + return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, + &bpf_kfunc_set_sock_addr); } late_initcall(bpf_kfunc_init); diff --git a/net/core/request_sock.c b/net/core/request_sock.c index f35c2e9984062..63de5c635842b 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -33,9 +33,6 @@ void reqsk_queue_alloc(struct request_sock_queue *queue) { - spin_lock_init(&queue->rskq_lock); - - spin_lock_init(&queue->fastopenq.lock); queue->fastopenq.rskq_rst_head = NULL; queue->fastopenq.rskq_rst_tail = NULL; queue->fastopenq.qlen = 0; diff --git a/net/core/sock.c b/net/core/sock.c index 5cd21e699f2d6..383e30fe79f41 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -107,6 +107,7 @@ #include #include #include +#include #include #include #include @@ -4136,8 +4137,14 @@ bool sk_busy_loop_end(void *p, unsigned long start_time) { struct sock *sk = p; - return !skb_queue_empty_lockless(&sk->sk_receive_queue) || - sk_busy_loop_timeout(sk, start_time); + if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) + return true; + + if (sk_is_udp(sk) && + !skb_queue_empty_lockless(&udp_sk(sk)->reader_queue)) + return true; + + return sk_busy_loop_timeout(sk, start_time); } EXPORT_SYMBOL(sk_busy_loop_end); #endif /* CONFIG_NET_RX_BUSY_POLL */ diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index b0a5de1303b52..1c58bd72e1245 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -330,6 +330,9 @@ lookup_protocol: if (INET_PROTOSW_REUSE & answer_flags) sk->sk_reuse = SK_CAN_REUSE; + if (INET_PROTOSW_ICSK & answer_flags) + inet_init_csk_locks(sk); + inet = inet_sk(sk); inet_assign_bit(IS_ICSK, sk, INET_PROTOSW_ICSK & answer_flags); @@ -452,7 +455,7 @@ int inet_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len) /* BPF prog is run before any checks are done so that if the prog * changes context in a wrong way it will be caught. */ - err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, + err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, &addr_len, CGROUP_INET4_BIND, &flags); if (err) return err; @@ -794,6 +797,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr, struct sock *sk = sock->sk; struct inet_sock *inet = inet_sk(sk); DECLARE_SOCKADDR(struct sockaddr_in *, sin, uaddr); + int sin_addr_len = sizeof(*sin); sin->sin_family = AF_INET; lock_sock(sk); @@ -806,7 +810,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr, } sin->sin_port = inet->inet_dport; sin->sin_addr.s_addr = inet->inet_daddr; - BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, + BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len, CGROUP_INET4_GETPEERNAME); } else { __be32 addr = inet->inet_rcv_saddr; @@ -814,12 +818,12 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr, addr = inet->inet_saddr; sin->sin_port = inet->inet_sport; sin->sin_addr.s_addr = addr; - BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, + BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len, CGROUP_INET4_GETSOCKNAME); } release_sock(sk); memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); - return sizeof(*sin); + return sin_addr_len; } EXPORT_SYMBOL(inet_getname); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 394a498c28232..762817d6c8d70 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -730,6 +730,10 @@ out: } if (req) reqsk_put(req); + + if (newsk) + inet_init_csk_locks(newsk); + return newsk; out_err: newsk = NULL; diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 75e0aee35eb78..4cb0c896caf97 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -301,7 +301,7 @@ static int ping_pre_connect(struct sock *sk, struct sockaddr *uaddr, if (addr_len < sizeof(struct sockaddr_in)) return -EINVAL; - return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr); + return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, &addr_len); } /* Checks the bind address and possibly modifies sk->sk_bound_dev_if. */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f124f6c639157..fb417aee86e64 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -722,6 +722,7 @@ void tcp_push(struct sock *sk, int flags, int mss_now, if (!test_bit(TSQ_THROTTLED, &sk->sk_tsq_flags)) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAUTOCORKING); set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags); + smp_mb__after_atomic(); } /* It is possible TX completion already happened * before we set TSQ_THROTTLED. diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 4167e8a48b60a..c7ffab37a34cd 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -194,7 +194,7 @@ static int tcp_v4_pre_connect(struct sock *sk, struct sockaddr *uaddr, sock_owned_by_me(sk); - return BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr); + return BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr, &addr_len); } /* This will initiate an outgoing connection. */ diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 9cb22a6ae1dca..7be4ddc80d95b 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1143,7 +1143,9 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (cgroup_bpf_enabled(CGROUP_UDP4_SENDMSG) && !connected) { err = BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, - (struct sockaddr *)usin, &ipc.addr); + (struct sockaddr *)usin, + &msg->msg_namelen, + &ipc.addr); if (err) goto out_free; if (usin) { @@ -1865,7 +1867,8 @@ try_again: *addr_len = sizeof(*sin); BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, - (struct sockaddr *)sin); + (struct sockaddr *)sin, + addr_len); } if (udp_test_bit(GRO_ENABLED, sk)) @@ -1904,7 +1907,7 @@ int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (addr_len < sizeof(struct sockaddr_in)) return -EINVAL; - return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr); + return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, &addr_len); } EXPORT_SYMBOL(udp_pre_connect); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 368824fe9719f..4375bfa4f6089 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -199,6 +199,9 @@ lookup_protocol: if (INET_PROTOSW_REUSE & answer_flags) sk->sk_reuse = SK_CAN_REUSE; + if (INET_PROTOSW_ICSK & answer_flags) + inet_init_csk_locks(sk); + inet = inet_sk(sk); inet_assign_bit(IS_ICSK, sk, INET_PROTOSW_ICSK & answer_flags); @@ -453,7 +456,7 @@ int inet6_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len) /* BPF prog is run before any checks are done so that if the prog * changes context in a wrong way it will be caught. */ - err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, + err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, &addr_len, CGROUP_INET6_BIND, &flags); if (err) return err; @@ -519,6 +522,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, int peer) { struct sockaddr_in6 *sin = (struct sockaddr_in6 *)uaddr; + int sin_addr_len = sizeof(*sin); struct sock *sk = sock->sk; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); @@ -538,7 +542,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, sin->sin6_addr = sk->sk_v6_daddr; if (np->sndflow) sin->sin6_flowinfo = np->flow_label; - BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, + BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len, CGROUP_INET6_GETPEERNAME); } else { if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) @@ -546,13 +550,13 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, else sin->sin6_addr = sk->sk_v6_rcv_saddr; sin->sin6_port = inet->inet_sport; - BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, + BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len, CGROUP_INET6_GETSOCKNAME); } sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr, sk->sk_bound_dev_if); release_sock(sk); - return sizeof(*sin); + return sin_addr_len; } EXPORT_SYMBOL(inet6_getname); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 5831aaa53d75e..25243737fbc42 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -56,7 +56,7 @@ static int ping_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr, if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; - return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr); + return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, &addr_len); } static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 44b6949d72b22..3783334ef2332 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -135,7 +135,7 @@ static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr, sock_owned_by_me(sk); - return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr); + return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, &addr_len); } static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index f1170dcc21d93..438476a31313c 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -410,7 +410,8 @@ try_again: *addr_len = sizeof(*sin6); BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, - (struct sockaddr *)sin6); + (struct sockaddr *)sin6, + addr_len); } if (udp_test_bit(GRO_ENABLED, sk)) @@ -1157,7 +1158,7 @@ static int udpv6_pre_connect(struct sock *sk, struct sockaddr *uaddr, if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; - return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr); + return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, &addr_len); } /** @@ -1510,6 +1511,7 @@ do_udp_sendmsg: if (cgroup_bpf_enabled(CGROUP_UDP6_SENDMSG) && !connected) { err = BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, (struct sockaddr *)sin6, + &addr_len, &fl6->saddr); if (err) goto out_no_dst; diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 9b06c380866b5..20551cfb7da6d 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -928,14 +928,15 @@ copy_uaddr: */ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { + DECLARE_SOCKADDR(struct sockaddr_llc *, addr, msg->msg_name); struct sock *sk = sock->sk; struct llc_sock *llc = llc_sk(sk); - DECLARE_SOCKADDR(struct sockaddr_llc *, addr, msg->msg_name); int flags = msg->msg_flags; int noblock = flags & MSG_DONTWAIT; + int rc = -EINVAL, copied = 0, hdrlen, hh_len; struct sk_buff *skb = NULL; + struct net_device *dev; size_t size = 0; - int rc = -EINVAL, copied = 0, hdrlen; dprintk("%s: sending from %02X to %02X\n", __func__, llc->laddr.lsap, llc->daddr.lsap); @@ -955,22 +956,29 @@ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) if (rc) goto out; } - hdrlen = llc->dev->hard_header_len + llc_ui_header_len(sk, addr); + dev = llc->dev; + hh_len = LL_RESERVED_SPACE(dev); + hdrlen = llc_ui_header_len(sk, addr); size = hdrlen + len; - if (size > llc->dev->mtu) - size = llc->dev->mtu; + size = min_t(size_t, size, READ_ONCE(dev->mtu)); copied = size - hdrlen; rc = -EINVAL; if (copied < 0) goto out; release_sock(sk); - skb = sock_alloc_send_skb(sk, size, noblock, &rc); + skb = sock_alloc_send_skb(sk, hh_len + size, noblock, &rc); lock_sock(sk); if (!skb) goto out; - skb->dev = llc->dev; + if (sock_flag(sk, SOCK_ZAPPED) || + llc->dev != dev || + hdrlen != llc_ui_header_len(sk, addr) || + hh_len != LL_RESERVED_SPACE(dev) || + size > READ_ONCE(dev->mtu)) + goto out; + skb->dev = dev; skb->protocol = llc_proto_type(addr->sllc_arphrd); - skb_reserve(skb, hdrlen); + skb_reserve(skb, hh_len + hdrlen); rc = memcpy_from_msg(skb_put(skb, copied), msg, copied); if (rc) goto out; diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c index 6e387aadffcec..4f16d9c88350b 100644 --- a/net/llc/llc_core.c +++ b/net/llc/llc_core.c @@ -135,22 +135,15 @@ static struct packet_type llc_packet_type __read_mostly = { .func = llc_rcv, }; -static struct packet_type llc_tr_packet_type __read_mostly = { - .type = cpu_to_be16(ETH_P_TR_802_2), - .func = llc_rcv, -}; - static int __init llc_init(void) { dev_add_pack(&llc_packet_type); - dev_add_pack(&llc_tr_packet_type); return 0; } static void __exit llc_exit(void) { dev_remove_pack(&llc_packet_type); - dev_remove_pack(&llc_tr_packet_type); } module_init(llc_init); diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 0c5cc75857e4f..e112300caaf75 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -398,7 +398,10 @@ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta) int i; for (i = 0; i < ARRAY_SIZE(sta->link); i++) { - if (!(sta->sta.valid_links & BIT(i))) + struct link_sta_info *link_sta; + + link_sta = rcu_access_pointer(sta->link[i]); + if (!link_sta) continue; sta_remove_link(sta, i, false); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index b28fbcb86e949..4fc8348dd799a 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -24,6 +24,7 @@ #include #define NFT_MODULE_AUTOLOAD_LIMIT (MODULE_NAME_LEN - sizeof("nft-expr-255-")) +#define NFT_SET_MAX_ANONLEN 16 unsigned int nf_tables_net_id __read_mostly; @@ -4351,6 +4352,9 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set, if (p[1] != 'd' || strchr(p + 2, '%')) return -EINVAL; + if (strnlen(name, NFT_SET_MAX_ANONLEN) >= NFT_SET_MAX_ANONLEN) + return -EINVAL; + inuse = (unsigned long *)get_zeroed_page(GFP_KERNEL); if (inuse == NULL) return -ENOMEM; @@ -10867,16 +10871,10 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, data->verdict.code = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE])); switch (data->verdict.code) { - default: - switch (data->verdict.code & NF_VERDICT_MASK) { - case NF_ACCEPT: - case NF_DROP: - case NF_QUEUE: - break; - default: - return -EINVAL; - } - fallthrough; + case NF_ACCEPT: + case NF_DROP: + case NF_QUEUE: + break; case NFT_CONTINUE: case NFT_BREAK: case NFT_RETURN: @@ -10911,6 +10909,8 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, data->verdict.chain = chain; break; + default: + return -EINVAL; } desc->len = sizeof(data->verdict); diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c index 680fe557686e4..274b6f7e6bb57 100644 --- a/net/netfilter/nft_chain_filter.c +++ b/net/netfilter/nft_chain_filter.c @@ -357,9 +357,10 @@ static int nf_tables_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct nft_base_chain *basechain; struct nftables_pernet *nft_net; - struct nft_table *table; struct nft_chain *chain, *nr; + struct nft_table *table; struct nft_ctx ctx = { .net = dev_net(dev), }; @@ -371,7 +372,8 @@ static int nf_tables_netdev_event(struct notifier_block *this, nft_net = nft_pernet(ctx.net); mutex_lock(&nft_net->commit_mutex); list_for_each_entry(table, &nft_net->tables, list) { - if (table->family != NFPROTO_NETDEV) + if (table->family != NFPROTO_NETDEV && + table->family != NFPROTO_INET) continue; ctx.family = table->family; @@ -380,6 +382,11 @@ static int nf_tables_netdev_event(struct notifier_block *this, if (!nft_is_base_chain(chain)) continue; + basechain = nft_base_chain(chain); + if (table->family == NFPROTO_INET && + basechain->ops.hooknum != NF_INET_INGRESS) + continue; + ctx.chain = chain; nft_netdev_event(event, dev, &ctx); } diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 5284cd2ad5327..f0eeda97bfcd9 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -350,6 +350,12 @@ static int nft_target_validate(const struct nft_ctx *ctx, unsigned int hook_mask = 0; int ret; + if (ctx->family != NFPROTO_IPV4 && + ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_BRIDGE && + ctx->family != NFPROTO_ARP) + return -EOPNOTSUPP; + if (nft_is_base_chain(ctx->chain)) { const struct nft_base_chain *basechain = nft_base_chain(ctx->chain); @@ -595,6 +601,12 @@ static int nft_match_validate(const struct nft_ctx *ctx, unsigned int hook_mask = 0; int ret; + if (ctx->family != NFPROTO_IPV4 && + ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_BRIDGE && + ctx->family != NFPROTO_ARP) + return -EOPNOTSUPP; + if (nft_is_base_chain(ctx->chain)) { const struct nft_base_chain *basechain = nft_base_chain(ctx->chain); diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index ab3362c483b4a..397351fa4d5f8 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -384,6 +384,11 @@ static int nft_flow_offload_validate(const struct nft_ctx *ctx, { unsigned int hook_mask = (1 << NF_INET_FORWARD); + if (ctx->family != NFPROTO_IPV4 && + ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_INET) + return -EOPNOTSUPP; + return nft_chain_validate_hooks(ctx->chain, hook_mask); } diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c index 79039afde34ec..cefa25e0dbb0a 100644 --- a/net/netfilter/nft_limit.c +++ b/net/netfilter/nft_limit.c @@ -58,17 +58,19 @@ static inline bool nft_limit_eval(struct nft_limit_priv *priv, u64 cost) static int nft_limit_init(struct nft_limit_priv *priv, const struct nlattr * const tb[], bool pkts) { + u64 unit, tokens, rate_with_burst; bool invert = false; - u64 unit, tokens; if (tb[NFTA_LIMIT_RATE] == NULL || tb[NFTA_LIMIT_UNIT] == NULL) return -EINVAL; priv->rate = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_RATE])); + if (priv->rate == 0) + return -EINVAL; + unit = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_UNIT])); - priv->nsecs = unit * NSEC_PER_SEC; - if (priv->rate == 0 || priv->nsecs < unit) + if (check_mul_overflow(unit, NSEC_PER_SEC, &priv->nsecs)) return -EOVERFLOW; if (tb[NFTA_LIMIT_BURST]) @@ -77,18 +79,25 @@ static int nft_limit_init(struct nft_limit_priv *priv, if (pkts && priv->burst == 0) priv->burst = NFT_LIMIT_PKT_BURST_DEFAULT; - if (priv->rate + priv->burst < priv->rate) + if (check_add_overflow(priv->rate, priv->burst, &rate_with_burst)) return -EOVERFLOW; if (pkts) { - tokens = div64_u64(priv->nsecs, priv->rate) * priv->burst; + u64 tmp = div64_u64(priv->nsecs, priv->rate); + + if (check_mul_overflow(tmp, priv->burst, &tokens)) + return -EOVERFLOW; } else { + u64 tmp; + /* The token bucket size limits the number of tokens can be * accumulated. tokens_max specifies the bucket size. * tokens_max = unit * (rate + burst) / rate. */ - tokens = div64_u64(priv->nsecs * (priv->rate + priv->burst), - priv->rate); + if (check_mul_overflow(priv->nsecs, rate_with_burst, &tmp)) + return -EOVERFLOW; + + tokens = div64_u64(tmp, priv->rate); } if (tb[NFTA_LIMIT_FLAGS]) { diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index 583885ce72328..808f5802c2704 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -143,6 +143,11 @@ static int nft_nat_validate(const struct nft_ctx *ctx, struct nft_nat *priv = nft_expr_priv(expr); int err; + if (ctx->family != NFPROTO_IPV4 && + ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_INET) + return -EOPNOTSUPP; + err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT); if (err < 0) return err; diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c index 35a2c28caa60b..24d9771385729 100644 --- a/net/netfilter/nft_rt.c +++ b/net/netfilter/nft_rt.c @@ -166,6 +166,11 @@ static int nft_rt_validate(const struct nft_ctx *ctx, const struct nft_expr *exp const struct nft_rt *priv = nft_expr_priv(expr); unsigned int hooks; + if (ctx->family != NFPROTO_IPV4 && + ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_INET) + return -EOPNOTSUPP; + switch (priv->key) { case NFT_RT_NEXTHOP4: case NFT_RT_NEXTHOP6: diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c index 9ed85be79452d..f30163e2ca620 100644 --- a/net/netfilter/nft_socket.c +++ b/net/netfilter/nft_socket.c @@ -242,6 +242,11 @@ static int nft_socket_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nft_data **data) { + if (ctx->family != NFPROTO_IPV4 && + ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_INET) + return -EOPNOTSUPP; + return nft_chain_validate_hooks(ctx->chain, (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN) | diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c index 13da882669a4e..1d737f89dfc18 100644 --- a/net/netfilter/nft_synproxy.c +++ b/net/netfilter/nft_synproxy.c @@ -186,7 +186,6 @@ static int nft_synproxy_do_init(const struct nft_ctx *ctx, break; #endif case NFPROTO_INET: - case NFPROTO_BRIDGE: err = nf_synproxy_ipv4_init(snet, ctx->net); if (err) goto nf_ct_failure; @@ -219,7 +218,6 @@ static void nft_synproxy_do_destroy(const struct nft_ctx *ctx) break; #endif case NFPROTO_INET: - case NFPROTO_BRIDGE: nf_synproxy_ipv4_fini(snet, ctx->net); nf_synproxy_ipv6_fini(snet, ctx->net); break; @@ -253,6 +251,11 @@ static int nft_synproxy_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nft_data **data) { + if (ctx->family != NFPROTO_IPV4 && + ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_INET) + return -EOPNOTSUPP; + return nft_chain_validate_hooks(ctx->chain, (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD)); } diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c index ae15cd693f0ec..71412adb73d41 100644 --- a/net/netfilter/nft_tproxy.c +++ b/net/netfilter/nft_tproxy.c @@ -316,6 +316,11 @@ static int nft_tproxy_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nft_data **data) { + if (ctx->family != NFPROTO_IPV4 && + ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_INET) + return -EOPNOTSUPP; + return nft_chain_validate_hooks(ctx->chain, 1 << NF_INET_PRE_ROUTING); } diff --git a/net/netfilter/nft_xfrm.c b/net/netfilter/nft_xfrm.c index 452f8587addad..1c866757db552 100644 --- a/net/netfilter/nft_xfrm.c +++ b/net/netfilter/nft_xfrm.c @@ -235,6 +235,11 @@ static int nft_xfrm_validate(const struct nft_ctx *ctx, const struct nft_expr *e const struct nft_xfrm *priv = nft_expr_priv(expr); unsigned int hooks; + if (ctx->family != NFPROTO_IPV4 && + ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_INET) + return -EOPNOTSUPP; + switch (priv->dir) { case XFRM_POLICY_IN: hooks = (1 << NF_INET_FORWARD) | diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index eb086b06d60da..d9107b545d360 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -374,7 +374,7 @@ static void netlink_skb_destructor(struct sk_buff *skb) if (is_vmalloc_addr(skb->head)) { if (!skb->cloned || !atomic_dec_return(&(skb_shinfo(skb)->dataref))) - vfree(skb->head); + vfree_atomic(skb->head); skb->head = NULL; } diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index 01c4cdfef45df..8435a20968ef5 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -419,7 +419,7 @@ static int rds_recv_track_latency(struct rds_sock *rs, sockptr_t optval, rs->rs_rx_traces = trace.rx_traces; for (i = 0; i < rs->rs_rx_traces; i++) { - if (trace.rx_trace_pos[i] > RDS_MSG_RX_DGRAM_TRACE_MAX) { + if (trace.rx_trace_pos[i] >= RDS_MSG_RX_DGRAM_TRACE_MAX) { rs->rs_rx_traces = 0; return -EFAULT; } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index a193cc7b32418..84e18b5f72a30 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1536,6 +1536,9 @@ tcf_block_playback_offloads(struct tcf_block *block, flow_setup_cb_t *cb, chain_prev = chain, chain = __tcf_get_next_chain(block, chain), tcf_chain_put(chain_prev)) { + if (chain->tmplt_ops && add) + chain->tmplt_ops->tmplt_reoffload(chain, true, cb, + cb_priv); for (tp = __tcf_get_next_proto(chain, NULL); tp; tp_prev = tp, tp = __tcf_get_next_proto(chain, tp), @@ -1551,6 +1554,9 @@ tcf_block_playback_offloads(struct tcf_block *block, flow_setup_cb_t *cb, goto err_playback_remove; } } + if (chain->tmplt_ops && !add) + chain->tmplt_ops->tmplt_reoffload(chain, false, cb, + cb_priv); } return 0; @@ -2950,7 +2956,8 @@ static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net, ops = tcf_proto_lookup_ops(name, true, extack); if (IS_ERR(ops)) return PTR_ERR(ops); - if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump) { + if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump || + !ops->tmplt_reoffload) { NL_SET_ERR_MSG(extack, "Chain templates are not supported with specified classifier"); module_put(ops->owner); return -EOPNOTSUPP; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index e5314a31f75ae..efb9d2811b73d 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -2721,6 +2721,28 @@ static void fl_tmplt_destroy(void *tmplt_priv) kfree(tmplt); } +static void fl_tmplt_reoffload(struct tcf_chain *chain, bool add, + flow_setup_cb_t *cb, void *cb_priv) +{ + struct fl_flow_tmplt *tmplt = chain->tmplt_priv; + struct flow_cls_offload cls_flower = {}; + + cls_flower.rule = flow_rule_alloc(0); + if (!cls_flower.rule) + return; + + cls_flower.common.chain_index = chain->index; + cls_flower.command = add ? FLOW_CLS_TMPLT_CREATE : + FLOW_CLS_TMPLT_DESTROY; + cls_flower.cookie = (unsigned long) tmplt; + cls_flower.rule->match.dissector = &tmplt->dissector; + cls_flower.rule->match.mask = &tmplt->mask; + cls_flower.rule->match.key = &tmplt->dummy_key; + + cb(TC_SETUP_CLSFLOWER, &cls_flower, cb_priv); + kfree(cls_flower.rule); +} + static int fl_dump_key_val(struct sk_buff *skb, void *val, int val_type, void *mask, int mask_type, int len) @@ -3628,6 +3650,7 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = { .bind_class = fl_bind_class, .tmplt_create = fl_tmplt_create, .tmplt_destroy = fl_tmplt_destroy, + .tmplt_reoffload = fl_tmplt_reoffload, .tmplt_dump = fl_tmplt_dump, .get_exts = fl_get_exts, .owner = THIS_MODULE, diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index 2c464d76b06ce..37833b96b508e 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -163,7 +163,7 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, } if (smc_conn_lgr_valid(&smc->conn) && smc->conn.lgr->is_smcd && (req->diag_ext & (1 << (SMC_DIAG_DMBINFO - 1))) && - !list_empty(&smc->conn.lgr->list)) { + !list_empty(&smc->conn.lgr->list) && smc->conn.rmb_desc) { struct smc_connection *conn = &smc->conn; struct smcd_diag_dmbinfo dinfo; struct smcd_dev *smcd = conn->lgr->smcd; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 998687421fa6a..e0ce4276274be 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -717,12 +717,12 @@ static int svc_udp_sendto(struct svc_rqst *rqstp) ARRAY_SIZE(rqstp->rq_bvec), xdr); iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec, - count, 0); + count, rqstp->rq_res.len); err = sock_sendmsg(svsk->sk_sock, &msg); if (err == -ECONNREFUSED) { /* ICMP error on earlier request. */ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec, - count, 0); + count, rqstp->rq_res.len); err = sock_sendmsg(svsk->sk_sock, &msg); } diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 774a6d1916e40..d849dc04a3343 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -166,8 +166,10 @@ static int xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) contd = XDP_PKT_CONTD; err = __xsk_rcv_zc(xs, xskb, len, contd); - if (err || likely(!frags)) - goto out; + if (err) + goto err; + if (likely(!frags)) + return 0; xskb_list = &xskb->pool->xskb_list; list_for_each_entry_safe(pos, tmp, xskb_list, xskb_list_node) { @@ -176,11 +178,13 @@ static int xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) len = pos->xdp.data_end - pos->xdp.data; err = __xsk_rcv_zc(xs, pos, len, contd); if (err) - return err; + goto err; list_del(&pos->xskb_list_node); } -out: + return 0; +err: + xsk_buff_free(xdp); return err; } diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 49cb9f9a09bee..b0a611677865d 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -541,6 +541,7 @@ struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool) xskb->xdp.data = xskb->xdp.data_hard_start + XDP_PACKET_HEADROOM; xskb->xdp.data_meta = xskb->xdp.data; + xskb->xdp.flags = 0; if (pool->dma_need_sync) { dma_sync_single_range_for_device(pool->dev, xskb->dma, 0, diff --git a/scripts/get_abi.pl b/scripts/get_abi.pl index 0ffd5531242aa..408bfd0216da0 100755 --- a/scripts/get_abi.pl +++ b/scripts/get_abi.pl @@ -98,7 +98,7 @@ sub parse_abi { $name =~ s,.*/,,; my $fn = $file; - $fn =~ s,Documentation/ABI/,,; + $fn =~ s,.*Documentation/ABI/,,; my $nametag = "File $fn"; $data{$nametag}->{what} = "File $name"; diff --git a/security/security.c b/security/security.c index 23b129d482a7c..840a3d58a2902 100644 --- a/security/security.c +++ b/security/security.c @@ -2648,6 +2648,24 @@ int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) } EXPORT_SYMBOL_GPL(security_file_ioctl); +/** + * security_file_ioctl_compat() - Check if an ioctl is allowed in compat mode + * @file: associated file + * @cmd: ioctl cmd + * @arg: ioctl arguments + * + * Compat version of security_file_ioctl() that correctly handles 32-bit + * processes running on 64-bit kernels. + * + * Return: Returns 0 if permission is granted. + */ +int security_file_ioctl_compat(struct file *file, unsigned int cmd, + unsigned long arg) +{ + return call_int_hook(file_ioctl_compat, 0, file, cmd, arg); +} +EXPORT_SYMBOL_GPL(security_file_ioctl_compat); + static inline unsigned long mmap_prot(struct file *file, unsigned long prot) { /* diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index aa0afc122f09f..53cfeefb2f194 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3725,6 +3725,33 @@ static int selinux_file_ioctl(struct file *file, unsigned int cmd, return error; } +static int selinux_file_ioctl_compat(struct file *file, unsigned int cmd, + unsigned long arg) +{ + /* + * If we are in a 64-bit kernel running 32-bit userspace, we need to + * make sure we don't compare 32-bit flags to 64-bit flags. + */ + switch (cmd) { + case FS_IOC32_GETFLAGS: + cmd = FS_IOC_GETFLAGS; + break; + case FS_IOC32_SETFLAGS: + cmd = FS_IOC_SETFLAGS; + break; + case FS_IOC32_GETVERSION: + cmd = FS_IOC_GETVERSION; + break; + case FS_IOC32_SETVERSION: + cmd = FS_IOC_SETVERSION; + break; + default: + break; + } + + return selinux_file_ioctl(file, cmd, arg); +} + static int default_noexec __ro_after_init; static int file_map_prot_check(struct file *file, unsigned long prot, int shared) @@ -7037,6 +7064,7 @@ static struct security_hook_list selinux_hooks[] __ro_after_init = { LSM_HOOK_INIT(file_permission, selinux_file_permission), LSM_HOOK_INIT(file_alloc_security, selinux_file_alloc_security), LSM_HOOK_INIT(file_ioctl, selinux_file_ioctl), + LSM_HOOK_INIT(file_ioctl_compat, selinux_file_ioctl_compat), LSM_HOOK_INIT(mmap_file, selinux_mmap_file), LSM_HOOK_INIT(mmap_addr, selinux_mmap_addr), LSM_HOOK_INIT(file_mprotect, selinux_file_mprotect), diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 65130a791f573..1f1ea8529421f 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -4973,6 +4973,7 @@ static struct security_hook_list smack_hooks[] __ro_after_init = { LSM_HOOK_INIT(file_alloc_security, smack_file_alloc_security), LSM_HOOK_INIT(file_ioctl, smack_file_ioctl), + LSM_HOOK_INIT(file_ioctl_compat, smack_file_ioctl), LSM_HOOK_INIT(file_lock, smack_file_lock), LSM_HOOK_INIT(file_fcntl, smack_file_fcntl), LSM_HOOK_INIT(mmap_file, smack_mmap_file), diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c index 25006fddc964b..298d182759c2d 100644 --- a/security/tomoyo/tomoyo.c +++ b/security/tomoyo/tomoyo.c @@ -568,6 +568,7 @@ static struct security_hook_list tomoyo_hooks[] __ro_after_init = { LSM_HOOK_INIT(path_rename, tomoyo_path_rename), LSM_HOOK_INIT(inode_getattr, tomoyo_inode_getattr), LSM_HOOK_INIT(file_ioctl, tomoyo_file_ioctl), + LSM_HOOK_INIT(file_ioctl_compat, tomoyo_file_ioctl), LSM_HOOK_INIT(path_chmod, tomoyo_path_chmod), LSM_HOOK_INIT(path_chown, tomoyo_path_chown), LSM_HOOK_INIT(path_chroot, tomoyo_path_chroot), diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c index 24e966a2ac2be..9ed572141fe5b 100644 --- a/sound/soc/intel/boards/sof_sdw.c +++ b/sound/soc/intel/boards/sof_sdw.c @@ -1197,11 +1197,11 @@ static int fill_sdw_codec_dlc(struct device *dev, else if (is_unique_device(adr_link, sdw_version, mfg_id, part_id, class_id, adr_index)) codec->name = devm_kasprintf(dev, GFP_KERNEL, - "sdw:%01x:%04x:%04x:%02x", link_id, + "sdw:0:%01x:%04x:%04x:%02x", link_id, mfg_id, part_id, class_id); else codec->name = devm_kasprintf(dev, GFP_KERNEL, - "sdw:%01x:%04x:%04x:%02x:%01x", link_id, + "sdw:0:%01x:%04x:%04x:%02x:%01x", link_id, mfg_id, part_id, class_id, unique_id); if (!codec->name) diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index e3e68c97b40cf..e51d11c36a211 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -31,6 +31,7 @@ #include "verifier_helper_restricted.skel.h" #include "verifier_helper_value_access.skel.h" #include "verifier_int_ptr.skel.h" +#include "verifier_iterating_callbacks.skel.h" #include "verifier_jeq_infer_not_null.skel.h" #include "verifier_ld_ind.skel.h" #include "verifier_ldsx.skel.h" @@ -138,6 +139,7 @@ void test_verifier_helper_packet_access(void) { RUN(verifier_helper_packet_acces void test_verifier_helper_restricted(void) { RUN(verifier_helper_restricted); } void test_verifier_helper_value_access(void) { RUN(verifier_helper_value_access); } void test_verifier_int_ptr(void) { RUN(verifier_int_ptr); } +void test_verifier_iterating_callbacks(void) { RUN(verifier_iterating_callbacks); } void test_verifier_jeq_infer_not_null(void) { RUN(verifier_jeq_infer_not_null); } void test_verifier_ld_ind(void) { RUN(verifier_ld_ind); } void test_verifier_ldsx(void) { RUN(verifier_ldsx); } diff --git a/tools/testing/selftests/bpf/progs/cb_refs.c b/tools/testing/selftests/bpf/progs/cb_refs.c index 76d661b20e87d..56c764df81967 100644 --- a/tools/testing/selftests/bpf/progs/cb_refs.c +++ b/tools/testing/selftests/bpf/progs/cb_refs.c @@ -33,6 +33,7 @@ int underflow_prog(void *ctx) if (!p) return 0; bpf_for_each_map_elem(&array_map, cb1, &p, 0); + bpf_kfunc_call_test_release(p); return 0; } diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c index 6b9b3c56f009a..c20c4e38b71c5 100644 --- a/tools/testing/selftests/bpf/progs/iters.c +++ b/tools/testing/selftests/bpf/progs/iters.c @@ -14,6 +14,13 @@ int my_pid; int arr[256]; int small_arr[16] SEC(".data.small_arr"); +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 10); + __type(key, int); + __type(value, int); +} amap SEC(".maps"); + #ifdef REAL_TEST #define MY_PID_GUARD() if (my_pid != (bpf_get_current_pid_tgid() >> 32)) return 0 #else @@ -716,4 +723,692 @@ int iter_pass_iter_ptr_to_subprog(const void *ctx) return 0; } +SEC("?raw_tp") +__failure +__msg("R1 type=scalar expected=fp") +__naked int delayed_read_mark(void) +{ + /* This is equivalent to C program below. + * The call to bpf_iter_num_next() is reachable with r7 values &fp[-16] and 0xdead. + * State with r7=&fp[-16] is visited first and follows r6 != 42 ... continue branch. + * At this point iterator next() call is reached with r7 that has no read mark. + * Loop body with r7=0xdead would only be visited if verifier would decide to continue + * with second loop iteration. Absence of read mark on r7 might affect state + * equivalent logic used for iterator convergence tracking. + * + * r7 = &fp[-16] + * fp[-16] = 0 + * r6 = bpf_get_prandom_u32() + * bpf_iter_num_new(&fp[-8], 0, 10) + * while (bpf_iter_num_next(&fp[-8])) { + * r6++ + * if (r6 != 42) { + * r7 = 0xdead + * continue; + * } + * bpf_probe_read_user(r7, 8, 0xdeadbeef); // this is not safe + * } + * bpf_iter_num_destroy(&fp[-8]) + * return 0 + */ + asm volatile ( + "r7 = r10;" + "r7 += -16;" + "r0 = 0;" + "*(u64 *)(r7 + 0) = r0;" + "call %[bpf_get_prandom_u32];" + "r6 = r0;" + "r1 = r10;" + "r1 += -8;" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "1:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto 2f;" + "r6 += 1;" + "if r6 != 42 goto 3f;" + "r7 = 0xdead;" + "goto 1b;" + "3:" + "r1 = r7;" + "r2 = 8;" + "r3 = 0xdeadbeef;" + "call %[bpf_probe_read_user];" + "goto 1b;" + "2:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + "r0 = 0;" + "exit;" + : + : __imm(bpf_get_prandom_u32), + __imm(bpf_iter_num_new), + __imm(bpf_iter_num_next), + __imm(bpf_iter_num_destroy), + __imm(bpf_probe_read_user) + : __clobber_all + ); +} + +SEC("?raw_tp") +__failure +__msg("math between fp pointer and register with unbounded") +__naked int delayed_precision_mark(void) +{ + /* This is equivalent to C program below. + * The test is similar to delayed_iter_mark but verifies that incomplete + * precision don't fool verifier. + * The call to bpf_iter_num_next() is reachable with r7 values -16 and -32. + * State with r7=-16 is visited first and follows r6 != 42 ... continue branch. + * At this point iterator next() call is reached with r7 that has no read + * and precision marks. + * Loop body with r7=-32 would only be visited if verifier would decide to continue + * with second loop iteration. Absence of precision mark on r7 might affect state + * equivalent logic used for iterator convergence tracking. + * + * r8 = 0 + * fp[-16] = 0 + * r7 = -16 + * r6 = bpf_get_prandom_u32() + * bpf_iter_num_new(&fp[-8], 0, 10) + * while (bpf_iter_num_next(&fp[-8])) { + * if (r6 != 42) { + * r7 = -32 + * r6 = bpf_get_prandom_u32() + * continue; + * } + * r0 = r10 + * r0 += r7 + * r8 = *(u64 *)(r0 + 0) // this is not safe + * r6 = bpf_get_prandom_u32() + * } + * bpf_iter_num_destroy(&fp[-8]) + * return r8 + */ + asm volatile ( + "r8 = 0;" + "*(u64 *)(r10 - 16) = r8;" + "r7 = -16;" + "call %[bpf_get_prandom_u32];" + "r6 = r0;" + "r1 = r10;" + "r1 += -8;" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "1:" + "r1 = r10;" + "r1 += -8;\n" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto 2f;" + "if r6 != 42 goto 3f;" + "r7 = -32;" + "call %[bpf_get_prandom_u32];" + "r6 = r0;" + "goto 1b;\n" + "3:" + "r0 = r10;" + "r0 += r7;" + "r8 = *(u64 *)(r0 + 0);" + "call %[bpf_get_prandom_u32];" + "r6 = r0;" + "goto 1b;\n" + "2:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + "r0 = r8;" + "exit;" + : + : __imm(bpf_get_prandom_u32), + __imm(bpf_iter_num_new), + __imm(bpf_iter_num_next), + __imm(bpf_iter_num_destroy), + __imm(bpf_probe_read_user) + : __clobber_all + ); +} + +SEC("?raw_tp") +__failure +__msg("math between fp pointer and register with unbounded") +__flag(BPF_F_TEST_STATE_FREQ) +__naked int loop_state_deps1(void) +{ + /* This is equivalent to C program below. + * + * The case turns out to be tricky in a sense that: + * - states with c=-25 are explored only on a second iteration + * of the outer loop; + * - states with read+precise mark on c are explored only on + * second iteration of the inner loop and in a state which + * is pushed to states stack first. + * + * Depending on the details of iterator convergence logic + * verifier might stop states traversal too early and miss + * unsafe c=-25 memory access. + * + * j = iter_new(); // fp[-16] + * a = 0; // r6 + * b = 0; // r7 + * c = -24; // r8 + * while (iter_next(j)) { + * i = iter_new(); // fp[-8] + * a = 0; // r6 + * b = 0; // r7 + * while (iter_next(i)) { + * if (a == 1) { + * a = 0; + * b = 1; + * } else if (a == 0) { + * a = 1; + * if (random() == 42) + * continue; + * if (b == 1) { + * *(r10 + c) = 7; // this is not safe + * iter_destroy(i); + * iter_destroy(j); + * return; + * } + * } + * } + * iter_destroy(i); + * a = 0; + * b = 0; + * c = -25; + * } + * iter_destroy(j); + * return; + */ + asm volatile ( + "r1 = r10;" + "r1 += -16;" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "r6 = 0;" + "r7 = 0;" + "r8 = -24;" + "j_loop_%=:" + "r1 = r10;" + "r1 += -16;" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto j_loop_end_%=;" + "r1 = r10;" + "r1 += -8;" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "r6 = 0;" + "r7 = 0;" + "i_loop_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto i_loop_end_%=;" + "check_one_r6_%=:" + "if r6 != 1 goto check_zero_r6_%=;" + "r6 = 0;" + "r7 = 1;" + "goto i_loop_%=;" + "check_zero_r6_%=:" + "if r6 != 0 goto i_loop_%=;" + "r6 = 1;" + "call %[bpf_get_prandom_u32];" + "if r0 != 42 goto check_one_r7_%=;" + "goto i_loop_%=;" + "check_one_r7_%=:" + "if r7 != 1 goto i_loop_%=;" + "r0 = r10;" + "r0 += r8;" + "r1 = 7;" + "*(u64 *)(r0 + 0) = r1;" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + "r1 = r10;" + "r1 += -16;" + "call %[bpf_iter_num_destroy];" + "r0 = 0;" + "exit;" + "i_loop_end_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + "r6 = 0;" + "r7 = 0;" + "r8 = -25;" + "goto j_loop_%=;" + "j_loop_end_%=:" + "r1 = r10;" + "r1 += -16;" + "call %[bpf_iter_num_destroy];" + "r0 = 0;" + "exit;" + : + : __imm(bpf_get_prandom_u32), + __imm(bpf_iter_num_new), + __imm(bpf_iter_num_next), + __imm(bpf_iter_num_destroy) + : __clobber_all + ); +} + +SEC("?raw_tp") +__failure +__msg("math between fp pointer and register with unbounded") +__flag(BPF_F_TEST_STATE_FREQ) +__naked int loop_state_deps2(void) +{ + /* This is equivalent to C program below. + * + * The case turns out to be tricky in a sense that: + * - states with read+precise mark on c are explored only on a second + * iteration of the first inner loop and in a state which is pushed to + * states stack first. + * - states with c=-25 are explored only on a second iteration of the + * second inner loop and in a state which is pushed to states stack + * first. + * + * Depending on the details of iterator convergence logic + * verifier might stop states traversal too early and miss + * unsafe c=-25 memory access. + * + * j = iter_new(); // fp[-16] + * a = 0; // r6 + * b = 0; // r7 + * c = -24; // r8 + * while (iter_next(j)) { + * i = iter_new(); // fp[-8] + * a = 0; // r6 + * b = 0; // r7 + * while (iter_next(i)) { + * if (a == 1) { + * a = 0; + * b = 1; + * } else if (a == 0) { + * a = 1; + * if (random() == 42) + * continue; + * if (b == 1) { + * *(r10 + c) = 7; // this is not safe + * iter_destroy(i); + * iter_destroy(j); + * return; + * } + * } + * } + * iter_destroy(i); + * i = iter_new(); // fp[-8] + * a = 0; // r6 + * b = 0; // r7 + * while (iter_next(i)) { + * if (a == 1) { + * a = 0; + * b = 1; + * } else if (a == 0) { + * a = 1; + * if (random() == 42) + * continue; + * if (b == 1) { + * a = 0; + * c = -25; + * } + * } + * } + * iter_destroy(i); + * } + * iter_destroy(j); + * return; + */ + asm volatile ( + "r1 = r10;" + "r1 += -16;" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "r6 = 0;" + "r7 = 0;" + "r8 = -24;" + "j_loop_%=:" + "r1 = r10;" + "r1 += -16;" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto j_loop_end_%=;" + + /* first inner loop */ + "r1 = r10;" + "r1 += -8;" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "r6 = 0;" + "r7 = 0;" + "i_loop_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto i_loop_end_%=;" + "check_one_r6_%=:" + "if r6 != 1 goto check_zero_r6_%=;" + "r6 = 0;" + "r7 = 1;" + "goto i_loop_%=;" + "check_zero_r6_%=:" + "if r6 != 0 goto i_loop_%=;" + "r6 = 1;" + "call %[bpf_get_prandom_u32];" + "if r0 != 42 goto check_one_r7_%=;" + "goto i_loop_%=;" + "check_one_r7_%=:" + "if r7 != 1 goto i_loop_%=;" + "r0 = r10;" + "r0 += r8;" + "r1 = 7;" + "*(u64 *)(r0 + 0) = r1;" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + "r1 = r10;" + "r1 += -16;" + "call %[bpf_iter_num_destroy];" + "r0 = 0;" + "exit;" + "i_loop_end_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + + /* second inner loop */ + "r1 = r10;" + "r1 += -8;" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "r6 = 0;" + "r7 = 0;" + "i2_loop_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto i2_loop_end_%=;" + "check2_one_r6_%=:" + "if r6 != 1 goto check2_zero_r6_%=;" + "r6 = 0;" + "r7 = 1;" + "goto i2_loop_%=;" + "check2_zero_r6_%=:" + "if r6 != 0 goto i2_loop_%=;" + "r6 = 1;" + "call %[bpf_get_prandom_u32];" + "if r0 != 42 goto check2_one_r7_%=;" + "goto i2_loop_%=;" + "check2_one_r7_%=:" + "if r7 != 1 goto i2_loop_%=;" + "r6 = 0;" + "r8 = -25;" + "goto i2_loop_%=;" + "i2_loop_end_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + + "r6 = 0;" + "r7 = 0;" + "goto j_loop_%=;" + "j_loop_end_%=:" + "r1 = r10;" + "r1 += -16;" + "call %[bpf_iter_num_destroy];" + "r0 = 0;" + "exit;" + : + : __imm(bpf_get_prandom_u32), + __imm(bpf_iter_num_new), + __imm(bpf_iter_num_next), + __imm(bpf_iter_num_destroy) + : __clobber_all + ); +} + +SEC("?raw_tp") +__success +__naked int triple_continue(void) +{ + /* This is equivalent to C program below. + * High branching factor of the loop body turned out to be + * problematic for one of the iterator convergence tracking + * algorithms explored. + * + * r6 = bpf_get_prandom_u32() + * bpf_iter_num_new(&fp[-8], 0, 10) + * while (bpf_iter_num_next(&fp[-8])) { + * if (bpf_get_prandom_u32() != 42) + * continue; + * if (bpf_get_prandom_u32() != 42) + * continue; + * if (bpf_get_prandom_u32() != 42) + * continue; + * r0 += 0; + * } + * bpf_iter_num_destroy(&fp[-8]) + * return 0 + */ + asm volatile ( + "r1 = r10;" + "r1 += -8;" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "loop_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto loop_end_%=;" + "call %[bpf_get_prandom_u32];" + "if r0 != 42 goto loop_%=;" + "call %[bpf_get_prandom_u32];" + "if r0 != 42 goto loop_%=;" + "call %[bpf_get_prandom_u32];" + "if r0 != 42 goto loop_%=;" + "r0 += 0;" + "goto loop_%=;" + "loop_end_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + "r0 = 0;" + "exit;" + : + : __imm(bpf_get_prandom_u32), + __imm(bpf_iter_num_new), + __imm(bpf_iter_num_next), + __imm(bpf_iter_num_destroy) + : __clobber_all + ); +} + +SEC("?raw_tp") +__success +__naked int widen_spill(void) +{ + /* This is equivalent to C program below. + * The counter is stored in fp[-16], if this counter is not widened + * verifier states representing loop iterations would never converge. + * + * fp[-16] = 0 + * bpf_iter_num_new(&fp[-8], 0, 10) + * while (bpf_iter_num_next(&fp[-8])) { + * r0 = fp[-16]; + * r0 += 1; + * fp[-16] = r0; + * } + * bpf_iter_num_destroy(&fp[-8]) + * return 0 + */ + asm volatile ( + "r0 = 0;" + "*(u64 *)(r10 - 16) = r0;" + "r1 = r10;" + "r1 += -8;" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "loop_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto loop_end_%=;" + "r0 = *(u64 *)(r10 - 16);" + "r0 += 1;" + "*(u64 *)(r10 - 16) = r0;" + "goto loop_%=;" + "loop_end_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + "r0 = 0;" + "exit;" + : + : __imm(bpf_iter_num_new), + __imm(bpf_iter_num_next), + __imm(bpf_iter_num_destroy) + : __clobber_all + ); +} + +SEC("raw_tp") +__success +__naked int checkpoint_states_deletion(void) +{ + /* This is equivalent to C program below. + * + * int *a, *b, *c, *d, *e, *f; + * int i, sum = 0; + * bpf_for(i, 0, 10) { + * a = bpf_map_lookup_elem(&amap, &i); + * b = bpf_map_lookup_elem(&amap, &i); + * c = bpf_map_lookup_elem(&amap, &i); + * d = bpf_map_lookup_elem(&amap, &i); + * e = bpf_map_lookup_elem(&amap, &i); + * f = bpf_map_lookup_elem(&amap, &i); + * if (a) sum += 1; + * if (b) sum += 1; + * if (c) sum += 1; + * if (d) sum += 1; + * if (e) sum += 1; + * if (f) sum += 1; + * } + * return 0; + * + * The body of the loop spawns multiple simulation paths + * with different combination of NULL/non-NULL information for a/b/c/d/e/f. + * Each combination is unique from states_equal() point of view. + * Explored states checkpoint is created after each iterator next call. + * Iterator convergence logic expects that eventually current state + * would get equal to one of the explored states and thus loop + * exploration would be finished (at-least for a specific path). + * Verifier evicts explored states with high miss to hit ratio + * to to avoid comparing current state with too many explored + * states per instruction. + * This test is designed to "stress test" eviction policy defined using formula: + * + * sl->miss_cnt > sl->hit_cnt * N + N // if true sl->state is evicted + * + * Currently N is set to 64, which allows for 6 variables in this test. + */ + asm volatile ( + "r6 = 0;" /* a */ + "r7 = 0;" /* b */ + "r8 = 0;" /* c */ + "*(u64 *)(r10 - 24) = r6;" /* d */ + "*(u64 *)(r10 - 32) = r6;" /* e */ + "*(u64 *)(r10 - 40) = r6;" /* f */ + "r9 = 0;" /* sum */ + "r1 = r10;" + "r1 += -8;" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "loop_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto loop_end_%=;" + + "*(u64 *)(r10 - 16) = r0;" + + "r1 = %[amap] ll;" + "r2 = r10;" + "r2 += -16;" + "call %[bpf_map_lookup_elem];" + "r6 = r0;" + + "r1 = %[amap] ll;" + "r2 = r10;" + "r2 += -16;" + "call %[bpf_map_lookup_elem];" + "r7 = r0;" + + "r1 = %[amap] ll;" + "r2 = r10;" + "r2 += -16;" + "call %[bpf_map_lookup_elem];" + "r8 = r0;" + + "r1 = %[amap] ll;" + "r2 = r10;" + "r2 += -16;" + "call %[bpf_map_lookup_elem];" + "*(u64 *)(r10 - 24) = r0;" + + "r1 = %[amap] ll;" + "r2 = r10;" + "r2 += -16;" + "call %[bpf_map_lookup_elem];" + "*(u64 *)(r10 - 32) = r0;" + + "r1 = %[amap] ll;" + "r2 = r10;" + "r2 += -16;" + "call %[bpf_map_lookup_elem];" + "*(u64 *)(r10 - 40) = r0;" + + "if r6 == 0 goto +1;" + "r9 += 1;" + "if r7 == 0 goto +1;" + "r9 += 1;" + "if r8 == 0 goto +1;" + "r9 += 1;" + "r0 = *(u64 *)(r10 - 24);" + "if r0 == 0 goto +1;" + "r9 += 1;" + "r0 = *(u64 *)(r10 - 32);" + "if r0 == 0 goto +1;" + "r9 += 1;" + "r0 = *(u64 *)(r10 - 40);" + "if r0 == 0 goto +1;" + "r9 += 1;" + + "goto loop_%=;" + "loop_end_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + "r0 = 0;" + "exit;" + : + : __imm(bpf_map_lookup_elem), + __imm(bpf_iter_num_new), + __imm(bpf_iter_num_next), + __imm(bpf_iter_num_destroy), + __imm_addr(amap) + : __clobber_all + ); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h index e02cfd3807469..40df2cc26eaf9 100644 --- a/tools/testing/selftests/bpf/progs/strobemeta.h +++ b/tools/testing/selftests/bpf/progs/strobemeta.h @@ -24,9 +24,11 @@ struct task_struct {}; #define STACK_TABLE_EPOCH_SHIFT 20 #define STROBE_MAX_STR_LEN 1 #define STROBE_MAX_CFGS 32 +#define READ_MAP_VAR_PAYLOAD_CAP \ + ((1 + STROBE_MAX_MAP_ENTRIES * 2) * STROBE_MAX_STR_LEN) #define STROBE_MAX_PAYLOAD \ (STROBE_MAX_STRS * STROBE_MAX_STR_LEN + \ - STROBE_MAX_MAPS * (1 + STROBE_MAX_MAP_ENTRIES * 2) * STROBE_MAX_STR_LEN) + STROBE_MAX_MAPS * READ_MAP_VAR_PAYLOAD_CAP) struct strobe_value_header { /* @@ -355,7 +357,7 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg, size_t idx, void *tls_base, struct strobe_value_generic *value, struct strobemeta_payload *data, - void *payload) + size_t off) { void *location; uint64_t len; @@ -366,7 +368,7 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg, return 0; bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location); - len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, value->ptr); + len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN, value->ptr); /* * if bpf_probe_read_user_str returns error (<0), due to casting to * unsinged int, it will become big number, so next check is @@ -378,14 +380,14 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg, return 0; data->str_lens[idx] = len; - return len; + return off + len; } -static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, - size_t idx, void *tls_base, - struct strobe_value_generic *value, - struct strobemeta_payload *data, - void *payload) +static __always_inline uint64_t read_map_var(struct strobemeta_cfg *cfg, + size_t idx, void *tls_base, + struct strobe_value_generic *value, + struct strobemeta_payload *data, + size_t off) { struct strobe_map_descr* descr = &data->map_descrs[idx]; struct strobe_map_raw map; @@ -397,11 +399,11 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, location = calc_location(&cfg->map_locs[idx], tls_base); if (!location) - return payload; + return off; bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location); if (bpf_probe_read_user(&map, sizeof(struct strobe_map_raw), value->ptr)) - return payload; + return off; descr->id = map.id; descr->cnt = map.cnt; @@ -410,10 +412,10 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, data->req_meta_valid = 1; } - len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, map.tag); + len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN, map.tag); if (len <= STROBE_MAX_STR_LEN) { descr->tag_len = len; - payload += len; + off += len; } #ifdef NO_UNROLL @@ -426,22 +428,22 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, break; descr->key_lens[i] = 0; - len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, + len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN, map.entries[i].key); if (len <= STROBE_MAX_STR_LEN) { descr->key_lens[i] = len; - payload += len; + off += len; } descr->val_lens[i] = 0; - len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, + len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN, map.entries[i].val); if (len <= STROBE_MAX_STR_LEN) { descr->val_lens[i] = len; - payload += len; + off += len; } } - return payload; + return off; } #ifdef USE_BPF_LOOP @@ -455,14 +457,20 @@ struct read_var_ctx { struct strobemeta_payload *data; void *tls_base; struct strobemeta_cfg *cfg; - void *payload; + size_t payload_off; /* value gets mutated */ struct strobe_value_generic *value; enum read_type type; }; -static int read_var_callback(__u32 index, struct read_var_ctx *ctx) +static int read_var_callback(__u64 index, struct read_var_ctx *ctx) { + /* lose precision info for ctx->payload_off, verifier won't track + * double xor, barrier_var() is needed to force clang keep both xors. + */ + ctx->payload_off ^= index; + barrier_var(ctx->payload_off); + ctx->payload_off ^= index; switch (ctx->type) { case READ_INT_VAR: if (index >= STROBE_MAX_INTS) @@ -472,14 +480,18 @@ static int read_var_callback(__u32 index, struct read_var_ctx *ctx) case READ_MAP_VAR: if (index >= STROBE_MAX_MAPS) return 1; - ctx->payload = read_map_var(ctx->cfg, index, ctx->tls_base, - ctx->value, ctx->data, ctx->payload); + if (ctx->payload_off > sizeof(ctx->data->payload) - READ_MAP_VAR_PAYLOAD_CAP) + return 1; + ctx->payload_off = read_map_var(ctx->cfg, index, ctx->tls_base, + ctx->value, ctx->data, ctx->payload_off); break; case READ_STR_VAR: if (index >= STROBE_MAX_STRS) return 1; - ctx->payload += read_str_var(ctx->cfg, index, ctx->tls_base, - ctx->value, ctx->data, ctx->payload); + if (ctx->payload_off > sizeof(ctx->data->payload) - STROBE_MAX_STR_LEN) + return 1; + ctx->payload_off = read_str_var(ctx->cfg, index, ctx->tls_base, + ctx->value, ctx->data, ctx->payload_off); break; } return 0; @@ -501,7 +513,8 @@ static void *read_strobe_meta(struct task_struct *task, pid_t pid = bpf_get_current_pid_tgid() >> 32; struct strobe_value_generic value = {0}; struct strobemeta_cfg *cfg; - void *tls_base, *payload; + size_t payload_off; + void *tls_base; cfg = bpf_map_lookup_elem(&strobemeta_cfgs, &pid); if (!cfg) @@ -509,7 +522,7 @@ static void *read_strobe_meta(struct task_struct *task, data->int_vals_set_mask = 0; data->req_meta_valid = 0; - payload = data->payload; + payload_off = 0; /* * we don't have struct task_struct definition, it should be: * tls_base = (void *)task->thread.fsbase; @@ -522,7 +535,7 @@ static void *read_strobe_meta(struct task_struct *task, .tls_base = tls_base, .value = &value, .data = data, - .payload = payload, + .payload_off = 0, }; int err; @@ -540,6 +553,11 @@ static void *read_strobe_meta(struct task_struct *task, err = bpf_loop(STROBE_MAX_MAPS, read_var_callback, &ctx, 0); if (err != STROBE_MAX_MAPS) return NULL; + + payload_off = ctx.payload_off; + /* this should not really happen, here only to satisfy verifer */ + if (payload_off > sizeof(data->payload)) + payload_off = sizeof(data->payload); #else #ifdef NO_UNROLL #pragma clang loop unroll(disable) @@ -555,7 +573,7 @@ static void *read_strobe_meta(struct task_struct *task, #pragma unroll #endif /* NO_UNROLL */ for (int i = 0; i < STROBE_MAX_STRS; ++i) { - payload += read_str_var(cfg, i, tls_base, &value, data, payload); + payload_off = read_str_var(cfg, i, tls_base, &value, data, payload_off); } #ifdef NO_UNROLL #pragma clang loop unroll(disable) @@ -563,7 +581,7 @@ static void *read_strobe_meta(struct task_struct *task, #pragma unroll #endif /* NO_UNROLL */ for (int i = 0; i < STROBE_MAX_MAPS; ++i) { - payload = read_map_var(cfg, i, tls_base, &value, data, payload); + payload_off = read_map_var(cfg, i, tls_base, &value, data, payload_off); } #endif /* USE_BPF_LOOP */ @@ -571,7 +589,7 @@ static void *read_strobe_meta(struct task_struct *task, * return pointer right after end of payload, so it's possible to * calculate exact amount of useful data that needs to be sent */ - return payload; + return &data->payload[payload_off]; } SEC("raw_tracepoint/kfree_skb") diff --git a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c new file mode 100644 index 0000000000000..5905e036e0eac --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c @@ -0,0 +1,242 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include "bpf_misc.h" + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 8); + __type(key, __u32); + __type(value, __u64); +} map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_USER_RINGBUF); + __uint(max_entries, 8); +} ringbuf SEC(".maps"); + +struct vm_area_struct; +struct bpf_map; + +struct buf_context { + char *buf; +}; + +struct num_context { + __u64 i; + __u64 j; +}; + +__u8 choice_arr[2] = { 0, 1 }; + +static int unsafe_on_2nd_iter_cb(__u32 idx, struct buf_context *ctx) +{ + if (idx == 0) { + ctx->buf = (char *)(0xDEAD); + return 0; + } + + if (bpf_probe_read_user(ctx->buf, 8, (void *)(0xBADC0FFEE))) + return 1; + + return 0; +} + +SEC("?raw_tp") +__failure __msg("R1 type=scalar expected=fp") +int unsafe_on_2nd_iter(void *unused) +{ + char buf[4]; + struct buf_context loop_ctx = { .buf = buf }; + + bpf_loop(100, unsafe_on_2nd_iter_cb, &loop_ctx, 0); + return 0; +} + +static int unsafe_on_zero_iter_cb(__u32 idx, struct num_context *ctx) +{ + ctx->i = 0; + return 0; +} + +SEC("?raw_tp") +__failure __msg("invalid access to map value, value_size=2 off=32 size=1") +int unsafe_on_zero_iter(void *unused) +{ + struct num_context loop_ctx = { .i = 32 }; + + bpf_loop(100, unsafe_on_zero_iter_cb, &loop_ctx, 0); + return choice_arr[loop_ctx.i]; +} + +static int widening_cb(__u32 idx, struct num_context *ctx) +{ + ++ctx->i; + return 0; +} + +SEC("?raw_tp") +__success +int widening(void *unused) +{ + struct num_context loop_ctx = { .i = 0, .j = 1 }; + + bpf_loop(100, widening_cb, &loop_ctx, 0); + /* loop_ctx.j is not changed during callback iteration, + * verifier should not apply widening to it. + */ + return choice_arr[loop_ctx.j]; +} + +static int loop_detection_cb(__u32 idx, struct num_context *ctx) +{ + for (;;) {} + return 0; +} + +SEC("?raw_tp") +__failure __msg("infinite loop detected") +int loop_detection(void *unused) +{ + struct num_context loop_ctx = { .i = 0 }; + + bpf_loop(100, loop_detection_cb, &loop_ctx, 0); + return 0; +} + +static __always_inline __u64 oob_state_machine(struct num_context *ctx) +{ + switch (ctx->i) { + case 0: + ctx->i = 1; + break; + case 1: + ctx->i = 32; + break; + } + return 0; +} + +static __u64 for_each_map_elem_cb(struct bpf_map *map, __u32 *key, __u64 *val, void *data) +{ + return oob_state_machine(data); +} + +SEC("?raw_tp") +__failure __msg("invalid access to map value, value_size=2 off=32 size=1") +int unsafe_for_each_map_elem(void *unused) +{ + struct num_context loop_ctx = { .i = 0 }; + + bpf_for_each_map_elem(&map, for_each_map_elem_cb, &loop_ctx, 0); + return choice_arr[loop_ctx.i]; +} + +static __u64 ringbuf_drain_cb(struct bpf_dynptr *dynptr, void *data) +{ + return oob_state_machine(data); +} + +SEC("?raw_tp") +__failure __msg("invalid access to map value, value_size=2 off=32 size=1") +int unsafe_ringbuf_drain(void *unused) +{ + struct num_context loop_ctx = { .i = 0 }; + + bpf_user_ringbuf_drain(&ringbuf, ringbuf_drain_cb, &loop_ctx, 0); + return choice_arr[loop_ctx.i]; +} + +static __u64 find_vma_cb(struct task_struct *task, struct vm_area_struct *vma, void *data) +{ + return oob_state_machine(data); +} + +SEC("?raw_tp") +__failure __msg("invalid access to map value, value_size=2 off=32 size=1") +int unsafe_find_vma(void *unused) +{ + struct task_struct *task = bpf_get_current_task_btf(); + struct num_context loop_ctx = { .i = 0 }; + + bpf_find_vma(task, 0, find_vma_cb, &loop_ctx, 0); + return choice_arr[loop_ctx.i]; +} + +static int iter_limit_cb(__u32 idx, struct num_context *ctx) +{ + ctx->i++; + return 0; +} + +SEC("?raw_tp") +__success +int bpf_loop_iter_limit_ok(void *unused) +{ + struct num_context ctx = { .i = 0 }; + + bpf_loop(1, iter_limit_cb, &ctx, 0); + return choice_arr[ctx.i]; +} + +SEC("?raw_tp") +__failure __msg("invalid access to map value, value_size=2 off=2 size=1") +int bpf_loop_iter_limit_overflow(void *unused) +{ + struct num_context ctx = { .i = 0 }; + + bpf_loop(2, iter_limit_cb, &ctx, 0); + return choice_arr[ctx.i]; +} + +static int iter_limit_level2a_cb(__u32 idx, struct num_context *ctx) +{ + ctx->i += 100; + return 0; +} + +static int iter_limit_level2b_cb(__u32 idx, struct num_context *ctx) +{ + ctx->i += 10; + return 0; +} + +static int iter_limit_level1_cb(__u32 idx, struct num_context *ctx) +{ + ctx->i += 1; + bpf_loop(1, iter_limit_level2a_cb, ctx, 0); + bpf_loop(1, iter_limit_level2b_cb, ctx, 0); + return 0; +} + +/* Check that path visiting every callback function once had been + * reached by verifier. Variables 'ctx{1,2}i' below serve as flags, + * with each decimal digit corresponding to a callback visit marker. + */ +SEC("socket") +__success __retval(111111) +int bpf_loop_iter_limit_nested(void *unused) +{ + struct num_context ctx1 = { .i = 0 }; + struct num_context ctx2 = { .i = 0 }; + __u64 a, b, c; + + bpf_loop(1, iter_limit_level1_cb, &ctx1, 0); + bpf_loop(1, iter_limit_level1_cb, &ctx2, 0); + a = ctx1.i; + b = ctx2.i; + /* Force 'ctx1.i' and 'ctx2.i' precise. */ + c = choice_arr[(a + b) % 2]; + /* This makes 'c' zero, but neither clang nor verifier know it. */ + c /= 10; + /* Make sure that verifier does not visit 'impossible' states: + * enumerate all possible callback visit masks. + */ + if (a != 0 && a != 1 && a != 11 && a != 101 && a != 111 && + b != 0 && b != 1 && b != 11 && b != 101 && b != 111) + asm volatile ("r0 /= 0;" ::: "r0"); + return 1000 * a + b + c; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c index db6b3143338b6..f61d623b1ce8d 100644 --- a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c +++ b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c @@ -119,15 +119,41 @@ __naked int global_subprog_result_precise(void) SEC("?raw_tp") __success __log_level(2) +/* First simulated path does not include callback body, + * r1 and r4 are always precise for bpf_loop() calls. + */ +__msg("9: (85) call bpf_loop#181") +__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1") +__msg("mark_precise: frame0: parent state regs=r4 stack=:") +__msg("mark_precise: frame0: last_idx 8 first_idx 0 subseq_idx 9") +__msg("mark_precise: frame0: regs=r4 stack= before 8: (b7) r4 = 0") +__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1") +__msg("mark_precise: frame0: parent state regs=r1 stack=:") +__msg("mark_precise: frame0: last_idx 8 first_idx 0 subseq_idx 9") +__msg("mark_precise: frame0: regs=r1 stack= before 8: (b7) r4 = 0") +__msg("mark_precise: frame0: regs=r1 stack= before 7: (b7) r3 = 0") +__msg("mark_precise: frame0: regs=r1 stack= before 6: (bf) r2 = r8") +__msg("mark_precise: frame0: regs=r1 stack= before 5: (bf) r1 = r6") +__msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3") +/* r6 precision propagation */ __msg("14: (0f) r1 += r6") -__msg("mark_precise: frame0: last_idx 14 first_idx 10") +__msg("mark_precise: frame0: last_idx 14 first_idx 9") __msg("mark_precise: frame0: regs=r6 stack= before 13: (bf) r1 = r7") __msg("mark_precise: frame0: regs=r6 stack= before 12: (27) r6 *= 4") __msg("mark_precise: frame0: regs=r6 stack= before 11: (25) if r6 > 0x3 goto pc+4") __msg("mark_precise: frame0: regs=r6 stack= before 10: (bf) r6 = r0") -__msg("mark_precise: frame0: parent state regs=r0 stack=:") -__msg("mark_precise: frame0: last_idx 18 first_idx 0") -__msg("mark_precise: frame0: regs=r0 stack= before 18: (95) exit") +__msg("mark_precise: frame0: regs=r0 stack= before 9: (85) call bpf_loop") +/* State entering callback body popped from states stack */ +__msg("from 9 to 17: frame1:") +__msg("17: frame1: R1=scalar() R2=0 R10=fp0 cb") +__msg("17: (b7) r0 = 0") +__msg("18: (95) exit") +__msg("returning from callee:") +__msg("to caller at 9:") +__msg("frame 0: propagating r1,r4") +__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1") +__msg("mark_precise: frame0: regs=r1,r4 stack= before 18: (95) exit") +__msg("from 18 to 9: safe") __naked int callback_result_precise(void) { asm volatile ( @@ -233,20 +259,36 @@ __naked int parent_callee_saved_reg_precise_global(void) SEC("?raw_tp") __success __log_level(2) +/* First simulated path does not include callback body */ __msg("12: (0f) r1 += r6") -__msg("mark_precise: frame0: last_idx 12 first_idx 10") +__msg("mark_precise: frame0: last_idx 12 first_idx 9") __msg("mark_precise: frame0: regs=r6 stack= before 11: (bf) r1 = r7") __msg("mark_precise: frame0: regs=r6 stack= before 10: (27) r6 *= 4") +__msg("mark_precise: frame0: regs=r6 stack= before 9: (85) call bpf_loop") __msg("mark_precise: frame0: parent state regs=r6 stack=:") -__msg("mark_precise: frame0: last_idx 16 first_idx 0") -__msg("mark_precise: frame0: regs=r6 stack= before 16: (95) exit") -__msg("mark_precise: frame1: regs= stack= before 15: (b7) r0 = 0") -__msg("mark_precise: frame1: regs= stack= before 9: (85) call bpf_loop#181") +__msg("mark_precise: frame0: last_idx 8 first_idx 0 subseq_idx 9") __msg("mark_precise: frame0: regs=r6 stack= before 8: (b7) r4 = 0") __msg("mark_precise: frame0: regs=r6 stack= before 7: (b7) r3 = 0") __msg("mark_precise: frame0: regs=r6 stack= before 6: (bf) r2 = r8") __msg("mark_precise: frame0: regs=r6 stack= before 5: (b7) r1 = 1") __msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3") +/* State entering callback body popped from states stack */ +__msg("from 9 to 15: frame1:") +__msg("15: frame1: R1=scalar() R2=0 R10=fp0 cb") +__msg("15: (b7) r0 = 0") +__msg("16: (95) exit") +__msg("returning from callee:") +__msg("to caller at 9:") +/* r1, r4 are always precise for bpf_loop(), + * r6 was marked before backtracking to callback body. + */ +__msg("frame 0: propagating r1,r4,r6") +__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1") +__msg("mark_precise: frame0: regs=r1,r4,r6 stack= before 16: (95) exit") +__msg("mark_precise: frame1: regs= stack= before 15: (b7) r0 = 0") +__msg("mark_precise: frame1: regs= stack= before 9: (85) call bpf_loop") +__msg("mark_precise: frame0: parent state regs= stack=:") +__msg("from 16 to 9: safe") __naked int parent_callee_saved_reg_precise_with_callback(void) { asm volatile ( @@ -373,22 +415,38 @@ __naked int parent_stack_slot_precise_global(void) SEC("?raw_tp") __success __log_level(2) +/* First simulated path does not include callback body */ __msg("14: (0f) r1 += r6") -__msg("mark_precise: frame0: last_idx 14 first_idx 11") +__msg("mark_precise: frame0: last_idx 14 first_idx 10") __msg("mark_precise: frame0: regs=r6 stack= before 13: (bf) r1 = r7") __msg("mark_precise: frame0: regs=r6 stack= before 12: (27) r6 *= 4") __msg("mark_precise: frame0: regs=r6 stack= before 11: (79) r6 = *(u64 *)(r10 -8)") +__msg("mark_precise: frame0: regs= stack=-8 before 10: (85) call bpf_loop") __msg("mark_precise: frame0: parent state regs= stack=-8:") -__msg("mark_precise: frame0: last_idx 18 first_idx 0") -__msg("mark_precise: frame0: regs= stack=-8 before 18: (95) exit") -__msg("mark_precise: frame1: regs= stack= before 17: (b7) r0 = 0") -__msg("mark_precise: frame1: regs= stack= before 10: (85) call bpf_loop#181") +__msg("mark_precise: frame0: last_idx 9 first_idx 0 subseq_idx 10") __msg("mark_precise: frame0: regs= stack=-8 before 9: (b7) r4 = 0") __msg("mark_precise: frame0: regs= stack=-8 before 8: (b7) r3 = 0") __msg("mark_precise: frame0: regs= stack=-8 before 7: (bf) r2 = r8") __msg("mark_precise: frame0: regs= stack=-8 before 6: (bf) r1 = r6") __msg("mark_precise: frame0: regs= stack=-8 before 5: (7b) *(u64 *)(r10 -8) = r6") __msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3") +/* State entering callback body popped from states stack */ +__msg("from 10 to 17: frame1:") +__msg("17: frame1: R1=scalar() R2=0 R10=fp0 cb") +__msg("17: (b7) r0 = 0") +__msg("18: (95) exit") +__msg("returning from callee:") +__msg("to caller at 10:") +/* r1, r4 are always precise for bpf_loop(), + * fp-8 was marked before backtracking to callback body. + */ +__msg("frame 0: propagating r1,r4,fp-8") +__msg("mark_precise: frame0: last_idx 10 first_idx 10 subseq_idx -1") +__msg("mark_precise: frame0: regs=r1,r4 stack=-8 before 18: (95) exit") +__msg("mark_precise: frame1: regs= stack= before 17: (b7) r0 = 0") +__msg("mark_precise: frame1: regs= stack= before 10: (85) call bpf_loop#181") +__msg("mark_precise: frame0: parent state regs= stack=:") +__msg("from 18 to 10: safe") __naked int parent_stack_slot_precise_with_callback(void) { asm volatile ( diff --git a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c index 04fc2c6c79e89..e4c729768b7d8 100644 --- a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c +++ b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c @@ -53,6 +53,8 @@ #define DEFAULT_TTL 64 #define MAX_ALLOWED_PORTS 8 +#define MAX_PACKET_OFF 0xffff + #define swap(a, b) \ do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) @@ -183,63 +185,76 @@ static __always_inline __u32 tcp_time_stamp_raw(void) } struct tcpopt_context { - __u8 *ptr; - __u8 *end; + void *data; void *data_end; __be32 *tsecr; __u8 wscale; bool option_timestamp; bool option_sack; + __u32 off; }; -static int tscookie_tcpopt_parse(struct tcpopt_context *ctx) +static __always_inline u8 *next(struct tcpopt_context *ctx, __u32 sz) { - __u8 opcode, opsize; + __u64 off = ctx->off; + __u8 *data; - if (ctx->ptr >= ctx->end) - return 1; - if (ctx->ptr >= ctx->data_end) - return 1; + /* Verifier forbids access to packet when offset exceeds MAX_PACKET_OFF */ + if (off > MAX_PACKET_OFF - sz) + return NULL; - opcode = ctx->ptr[0]; + data = ctx->data + off; + barrier_var(data); + if (data + sz >= ctx->data_end) + return NULL; - if (opcode == TCPOPT_EOL) - return 1; - if (opcode == TCPOPT_NOP) { - ++ctx->ptr; - return 0; - } + ctx->off += sz; + return data; +} - if (ctx->ptr + 1 >= ctx->end) - return 1; - if (ctx->ptr + 1 >= ctx->data_end) +static int tscookie_tcpopt_parse(struct tcpopt_context *ctx) +{ + __u8 *opcode, *opsize, *wscale, *tsecr; + __u32 off = ctx->off; + + opcode = next(ctx, 1); + if (!opcode) return 1; - opsize = ctx->ptr[1]; - if (opsize < 2) + + if (*opcode == TCPOPT_EOL) return 1; + if (*opcode == TCPOPT_NOP) + return 0; - if (ctx->ptr + opsize > ctx->end) + opsize = next(ctx, 1); + if (!opsize || *opsize < 2) return 1; - switch (opcode) { + switch (*opcode) { case TCPOPT_WINDOW: - if (opsize == TCPOLEN_WINDOW && ctx->ptr + TCPOLEN_WINDOW <= ctx->data_end) - ctx->wscale = ctx->ptr[2] < TCP_MAX_WSCALE ? ctx->ptr[2] : TCP_MAX_WSCALE; + wscale = next(ctx, 1); + if (!wscale) + return 1; + if (*opsize == TCPOLEN_WINDOW) + ctx->wscale = *wscale < TCP_MAX_WSCALE ? *wscale : TCP_MAX_WSCALE; break; case TCPOPT_TIMESTAMP: - if (opsize == TCPOLEN_TIMESTAMP && ctx->ptr + TCPOLEN_TIMESTAMP <= ctx->data_end) { + tsecr = next(ctx, 4); + if (!tsecr) + return 1; + if (*opsize == TCPOLEN_TIMESTAMP) { ctx->option_timestamp = true; /* Client's tsval becomes our tsecr. */ - *ctx->tsecr = get_unaligned((__be32 *)(ctx->ptr + 2)); + *ctx->tsecr = get_unaligned((__be32 *)tsecr); } break; case TCPOPT_SACK_PERM: - if (opsize == TCPOLEN_SACK_PERM) + if (*opsize == TCPOLEN_SACK_PERM) ctx->option_sack = true; break; } - ctx->ptr += opsize; + ctx->off = off + *opsize; return 0; } @@ -256,16 +271,21 @@ static int tscookie_tcpopt_parse_batch(__u32 index, void *context) static __always_inline bool tscookie_init(struct tcphdr *tcp_header, __u16 tcp_len, __be32 *tsval, - __be32 *tsecr, void *data_end) + __be32 *tsecr, void *data, void *data_end) { struct tcpopt_context loop_ctx = { - .ptr = (__u8 *)(tcp_header + 1), - .end = (__u8 *)tcp_header + tcp_len, + .data = data, .data_end = data_end, .tsecr = tsecr, .wscale = TS_OPT_WSCALE_MASK, .option_timestamp = false, .option_sack = false, + /* Note: currently verifier would track .off as unbound scalar. + * In case if verifier would at some point get smarter and + * compute bounded value for this var, beware that it might + * hinder bpf_loop() convergence validation. + */ + .off = (__u8 *)(tcp_header + 1) - (__u8 *)data, }; u32 cookie; @@ -635,7 +655,7 @@ static __always_inline int syncookie_handle_syn(struct header_pointers *hdr, cookie = (__u32)value; if (tscookie_init((void *)hdr->tcp, hdr->tcp_len, - &tsopt_buf[0], &tsopt_buf[1], data_end)) + &tsopt_buf[0], &tsopt_buf[1], data, data_end)) tsopt = tsopt_buf; /* Check that there is enough space for a SYNACK. It also covers diff --git a/tools/testing/selftests/drivers/net/bonding/bond_options.sh b/tools/testing/selftests/drivers/net/bonding/bond_options.sh index c54d1697f439a..d508486cc0bdc 100755 --- a/tools/testing/selftests/drivers/net/bonding/bond_options.sh +++ b/tools/testing/selftests/drivers/net/bonding/bond_options.sh @@ -162,7 +162,7 @@ prio_arp() local mode=$1 for primary_reselect in 0 1 2; do - prio_test "mode active-backup arp_interval 100 arp_ip_target ${g_ip4} primary eth1 primary_reselect $primary_reselect" + prio_test "mode $mode arp_interval 100 arp_ip_target ${g_ip4} primary eth1 primary_reselect $primary_reselect" log_test "prio" "$mode arp_ip_target primary_reselect $primary_reselect" done } @@ -178,7 +178,7 @@ prio_ns() fi for primary_reselect in 0 1 2; do - prio_test "mode active-backup arp_interval 100 ns_ip6_target ${g_ip6} primary eth1 primary_reselect $primary_reselect" + prio_test "mode $mode arp_interval 100 ns_ip6_target ${g_ip6} primary eth1 primary_reselect $primary_reselect" log_test "prio" "$mode ns_ip6_target primary_reselect $primary_reselect" done } @@ -194,9 +194,9 @@ prio() for mode in $modes; do prio_miimon $mode - prio_arp $mode - prio_ns $mode done + prio_arp "active-backup" + prio_ns "active-backup" } arp_validate_test() diff --git a/tools/testing/selftests/drivers/net/bonding/settings b/tools/testing/selftests/drivers/net/bonding/settings index 6091b45d226ba..79b65bdf05db6 100644 --- a/tools/testing/selftests/drivers/net/bonding/settings +++ b/tools/testing/selftests/drivers/net/bonding/settings @@ -1 +1 @@ -timeout=120 +timeout=1200 diff --git a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh index 1b08e042cf942..185b02d2d4cd1 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh @@ -269,6 +269,7 @@ for port in 0 1; do echo 1 > $NSIM_DEV_SYS/new_port fi NSIM_NETDEV=`get_netdev_name old_netdevs` + ifconfig $NSIM_NETDEV up msg="new NIC device created" exp0=( 0 0 0 0 ) @@ -430,6 +431,7 @@ for port in 0 1; do fi echo $port > $NSIM_DEV_SYS/new_port + NSIM_NETDEV=`get_netdev_name old_netdevs` ifconfig $NSIM_NETDEV up overflow_table0 "overflow NIC table" @@ -487,6 +489,7 @@ for port in 0 1; do fi echo $port > $NSIM_DEV_SYS/new_port + NSIM_NETDEV=`get_netdev_name old_netdevs` ifconfig $NSIM_NETDEV up overflow_table0 "overflow NIC table" @@ -543,6 +546,7 @@ for port in 0 1; do fi echo $port > $NSIM_DEV_SYS/new_port + NSIM_NETDEV=`get_netdev_name old_netdevs` ifconfig $NSIM_NETDEV up overflow_table0 "destroy NIC" @@ -572,6 +576,7 @@ for port in 0 1; do fi echo $port > $NSIM_DEV_SYS/new_port + NSIM_NETDEV=`get_netdev_name old_netdevs` ifconfig $NSIM_NETDEV up msg="create VxLANs v6" @@ -632,6 +637,7 @@ for port in 0 1; do fi echo $port > $NSIM_DEV_SYS/new_port + NSIM_NETDEV=`get_netdev_name old_netdevs` ifconfig $NSIM_NETDEV up echo 110 > $NSIM_DEV_DFS/ports/$port/udp_ports_inject_error @@ -687,6 +693,7 @@ for port in 0 1; do fi echo $port > $NSIM_DEV_SYS/new_port + NSIM_NETDEV=`get_netdev_name old_netdevs` ifconfig $NSIM_NETDEV up msg="create VxLANs v6" @@ -746,6 +753,7 @@ for port in 0 1; do fi echo $port > $NSIM_DEV_SYS/new_port + NSIM_NETDEV=`get_netdev_name old_netdevs` ifconfig $NSIM_NETDEV up msg="create VxLANs v6" @@ -876,6 +884,7 @@ msg="re-add a port" echo 2 > $NSIM_DEV_SYS/del_port echo 2 > $NSIM_DEV_SYS/new_port +NSIM_NETDEV=`get_netdev_name old_netdevs` check_tables msg="replace VxLAN in overflow table" diff --git a/tools/testing/selftests/mm/hugepage-vmemmap.c b/tools/testing/selftests/mm/hugepage-vmemmap.c index 5b354c209e936..894d28c3dd478 100644 --- a/tools/testing/selftests/mm/hugepage-vmemmap.c +++ b/tools/testing/selftests/mm/hugepage-vmemmap.c @@ -10,10 +10,7 @@ #include #include #include - -#define MAP_LENGTH (2UL * 1024 * 1024) - -#define PAGE_SIZE 4096 +#include "vm_util.h" #define PAGE_COMPOUND_HEAD (1UL << 15) #define PAGE_COMPOUND_TAIL (1UL << 16) @@ -39,6 +36,9 @@ #define MAP_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB) #endif +static size_t pagesize; +static size_t maplength; + static void write_bytes(char *addr, size_t length) { unsigned long i; @@ -56,7 +56,7 @@ static unsigned long virt_to_pfn(void *addr) if (fd < 0) return -1UL; - lseek(fd, (unsigned long)addr / PAGE_SIZE * sizeof(pagemap), SEEK_SET); + lseek(fd, (unsigned long)addr / pagesize * sizeof(pagemap), SEEK_SET); read(fd, &pagemap, sizeof(pagemap)); close(fd); @@ -86,7 +86,7 @@ static int check_page_flags(unsigned long pfn) * this also verifies kernel has correctly set the fake page_head to tail * while hugetlb_free_vmemmap is enabled. */ - for (i = 1; i < MAP_LENGTH / PAGE_SIZE; i++) { + for (i = 1; i < maplength / pagesize; i++) { read(fd, &pageflags, sizeof(pageflags)); if ((pageflags & TAIL_PAGE_FLAGS) != TAIL_PAGE_FLAGS || (pageflags & HEAD_PAGE_FLAGS) == HEAD_PAGE_FLAGS) { @@ -106,18 +106,25 @@ int main(int argc, char **argv) void *addr; unsigned long pfn; - addr = mmap(MAP_ADDR, MAP_LENGTH, PROT_READ | PROT_WRITE, MAP_FLAGS, -1, 0); + pagesize = psize(); + maplength = default_huge_page_size(); + if (!maplength) { + printf("Unable to determine huge page size\n"); + exit(1); + } + + addr = mmap(MAP_ADDR, maplength, PROT_READ | PROT_WRITE, MAP_FLAGS, -1, 0); if (addr == MAP_FAILED) { perror("mmap"); exit(1); } /* Trigger allocation of HugeTLB page. */ - write_bytes(addr, MAP_LENGTH); + write_bytes(addr, maplength); pfn = virt_to_pfn(addr); if (pfn == -1UL) { - munmap(addr, MAP_LENGTH); + munmap(addr, maplength); perror("virt_to_pfn"); exit(1); } @@ -125,13 +132,13 @@ int main(int argc, char **argv) printf("Returned address is %p whose pfn is %lx\n", addr, pfn); if (check_page_flags(pfn) < 0) { - munmap(addr, MAP_LENGTH); + munmap(addr, maplength); perror("check_page_flags"); exit(1); } /* munmap() length of MAP_HUGETLB memory must be hugepage aligned */ - if (munmap(addr, MAP_LENGTH)) { + if (munmap(addr, maplength)) { perror("munmap"); exit(1); } diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 8da562a9ae87e..19ff750516609 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -1,5 +1,6 @@ CONFIG_USER_NS=y CONFIG_NET_NS=y +CONFIG_BONDING=m CONFIG_BPF_SYSCALL=y CONFIG_TEST_BPF=m CONFIG_NUMA=y @@ -14,9 +15,13 @@ CONFIG_VETH=y CONFIG_NET_IPVTI=y CONFIG_IPV6_VTI=y CONFIG_DUMMY=y +CONFIG_BRIDGE_VLAN_FILTERING=y CONFIG_BRIDGE=y +CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_VLAN_8021Q=y CONFIG_IFB=y +CONFIG_INET_DIAG=y +CONFIG_IP_GRE=m CONFIG_NETFILTER=y CONFIG_NETFILTER_ADVANCED=y CONFIG_NF_CONNTRACK=m @@ -25,15 +30,36 @@ CONFIG_IP6_NF_IPTABLES=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP6_NF_NAT=m CONFIG_IP_NF_NAT=m +CONFIG_IPV6_GRE=m +CONFIG_IPV6_SEG6_LWTUNNEL=y +CONFIG_L2TP_ETH=m +CONFIG_L2TP_IP=m +CONFIG_L2TP=m +CONFIG_L2TP_V3=y +CONFIG_MACSEC=m +CONFIG_MACVLAN=y +CONFIG_MACVTAP=y +CONFIG_MPLS=y +CONFIG_MPTCP=y CONFIG_NF_TABLES=m CONFIG_NF_TABLES_IPV6=y CONFIG_NF_TABLES_IPV4=y CONFIG_NFT_NAT=m +CONFIG_NET_ACT_GACT=m +CONFIG_NET_CLS_BASIC=m +CONFIG_NET_CLS_U32=m +CONFIG_NET_IPGRE_DEMUX=m +CONFIG_NET_IPGRE=m +CONFIG_NET_SCH_FQ_CODEL=m +CONFIG_NET_SCH_HTB=m CONFIG_NET_SCH_FQ=m CONFIG_NET_SCH_ETF=m CONFIG_NET_SCH_NETEM=y +CONFIG_PSAMPLE=m +CONFIG_TCP_MD5SIG=y CONFIG_TEST_BLACKHOLE_DEV=m CONFIG_KALLSYMS=y +CONFIG_TLS=m CONFIG_TRACEPOINTS=y CONFIG_NET_DROP_MONITOR=m CONFIG_NETDEVSIM=m @@ -48,7 +74,9 @@ CONFIG_BAREUDP=m CONFIG_IPV6_IOAM6_LWTUNNEL=y CONFIG_CRYPTO_SM4_GENERIC=y CONFIG_AMT=m +CONFIG_TUN=y CONFIG_VXLAN=m CONFIG_IP_SCTP=m CONFIG_NETFILTER_XT_MATCH_POLICY=m CONFIG_CRYPTO_ARIA=y +CONFIG_XFRM_INTERFACE=m diff --git a/tools/testing/selftests/net/rps_default_mask.sh b/tools/testing/selftests/net/rps_default_mask.sh index a26c5624429fb..4287a85298907 100755 --- a/tools/testing/selftests/net/rps_default_mask.sh +++ b/tools/testing/selftests/net/rps_default_mask.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 readonly ksft_skip=4 @@ -33,6 +33,10 @@ chk_rps() { rps_mask=$($cmd /sys/class/net/$dev_name/queues/rx-0/rps_cpus) printf "%-60s" "$msg" + + # In case there is more than 32 CPUs we need to remove commas from masks + rps_mask=${rps_mask//,} + expected_rps_mask=${expected_rps_mask//,} if [ $rps_mask -eq $expected_rps_mask ]; then echo "[ ok ]" else diff --git a/tools/testing/selftests/net/so_incoming_cpu.c b/tools/testing/selftests/net/so_incoming_cpu.c index a148181641026..e9fa14e107322 100644 --- a/tools/testing/selftests/net/so_incoming_cpu.c +++ b/tools/testing/selftests/net/so_incoming_cpu.c @@ -3,19 +3,16 @@ #define _GNU_SOURCE #include +#include + #include #include #include #include "../kselftest_harness.h" -#define CLIENT_PER_SERVER 32 /* More sockets, more reliable */ -#define NR_SERVER self->nproc -#define NR_CLIENT (CLIENT_PER_SERVER * NR_SERVER) - FIXTURE(so_incoming_cpu) { - int nproc; int *servers; union { struct sockaddr addr; @@ -56,12 +53,47 @@ FIXTURE_VARIANT_ADD(so_incoming_cpu, after_all_listen) .when_to_set = AFTER_ALL_LISTEN, }; +static void write_sysctl(struct __test_metadata *_metadata, + char *filename, char *string) +{ + int fd, len, ret; + + fd = open(filename, O_WRONLY); + ASSERT_NE(fd, -1); + + len = strlen(string); + ret = write(fd, string, len); + ASSERT_EQ(ret, len); +} + +static void setup_netns(struct __test_metadata *_metadata) +{ + ASSERT_EQ(unshare(CLONE_NEWNET), 0); + ASSERT_EQ(system("ip link set lo up"), 0); + + write_sysctl(_metadata, "/proc/sys/net/ipv4/ip_local_port_range", "10000 60001"); + write_sysctl(_metadata, "/proc/sys/net/ipv4/tcp_tw_reuse", "0"); +} + +#define NR_PORT (60001 - 10000 - 1) +#define NR_CLIENT_PER_SERVER_DEFAULT 32 +static int nr_client_per_server, nr_server, nr_client; + FIXTURE_SETUP(so_incoming_cpu) { - self->nproc = get_nprocs(); - ASSERT_LE(2, self->nproc); + setup_netns(_metadata); + + nr_server = get_nprocs(); + ASSERT_LE(2, nr_server); + + if (NR_CLIENT_PER_SERVER_DEFAULT * nr_server < NR_PORT) + nr_client_per_server = NR_CLIENT_PER_SERVER_DEFAULT; + else + nr_client_per_server = NR_PORT / nr_server; + + nr_client = nr_client_per_server * nr_server; - self->servers = malloc(sizeof(int) * NR_SERVER); + self->servers = malloc(sizeof(int) * nr_server); ASSERT_NE(self->servers, NULL); self->in_addr.sin_family = AF_INET; @@ -74,7 +106,7 @@ FIXTURE_TEARDOWN(so_incoming_cpu) { int i; - for (i = 0; i < NR_SERVER; i++) + for (i = 0; i < nr_server; i++) close(self->servers[i]); free(self->servers); @@ -110,10 +142,10 @@ int create_server(struct __test_metadata *_metadata, if (variant->when_to_set == BEFORE_LISTEN) set_so_incoming_cpu(_metadata, fd, cpu); - /* We don't use CLIENT_PER_SERVER here not to block + /* We don't use nr_client_per_server here not to block * this test at connect() if SO_INCOMING_CPU is broken. */ - ret = listen(fd, NR_CLIENT); + ret = listen(fd, nr_client); ASSERT_EQ(ret, 0); if (variant->when_to_set == AFTER_LISTEN) @@ -128,7 +160,7 @@ void create_servers(struct __test_metadata *_metadata, { int i, ret; - for (i = 0; i < NR_SERVER; i++) { + for (i = 0; i < nr_server; i++) { self->servers[i] = create_server(_metadata, self, variant, i); if (i == 0) { @@ -138,7 +170,7 @@ void create_servers(struct __test_metadata *_metadata, } if (variant->when_to_set == AFTER_ALL_LISTEN) { - for (i = 0; i < NR_SERVER; i++) + for (i = 0; i < nr_server; i++) set_so_incoming_cpu(_metadata, self->servers[i], i); } } @@ -149,7 +181,7 @@ void create_clients(struct __test_metadata *_metadata, cpu_set_t cpu_set; int i, j, fd, ret; - for (i = 0; i < NR_SERVER; i++) { + for (i = 0; i < nr_server; i++) { CPU_ZERO(&cpu_set); CPU_SET(i, &cpu_set); @@ -162,7 +194,7 @@ void create_clients(struct __test_metadata *_metadata, ret = sched_setaffinity(0, sizeof(cpu_set), &cpu_set); ASSERT_EQ(ret, 0); - for (j = 0; j < CLIENT_PER_SERVER; j++) { + for (j = 0; j < nr_client_per_server; j++) { fd = socket(AF_INET, SOCK_STREAM, 0); ASSERT_NE(fd, -1); @@ -180,8 +212,8 @@ void verify_incoming_cpu(struct __test_metadata *_metadata, int i, j, fd, cpu, ret, total = 0; socklen_t len = sizeof(int); - for (i = 0; i < NR_SERVER; i++) { - for (j = 0; j < CLIENT_PER_SERVER; j++) { + for (i = 0; i < nr_server; i++) { + for (j = 0; j < nr_client_per_server; j++) { /* If we see -EAGAIN here, SO_INCOMING_CPU is broken */ fd = accept(self->servers[i], &self->addr, &self->addrlen); ASSERT_NE(fd, -1); @@ -195,7 +227,7 @@ void verify_incoming_cpu(struct __test_metadata *_metadata, } } - ASSERT_EQ(total, NR_CLIENT); + ASSERT_EQ(total, nr_client); TH_LOG("SO_INCOMING_CPU is very likely to be " "working correctly with %d sockets.", total); }